2004. január 12., hétfő
How to search for a string using the Soundex algorithm
Problem/Question/Abstract:
How to search for a string using the Soundex algorithm
Answer:
Solve 1:
unit SndxAlgs;
interface
uses
SysUtils;
function Soundex(in_str: string): string;
function NumericSoundex(in_str: string): Smallint;
function ExtendedSoundex(in_str: string): string;
implementation
{Calculate a normal Soundex encoding.}
function Soundex(in_str: string): string;
var
no_vowels, coded, out_str: string;
ch: Char;
i: Integer;
begin
{Make upper case and remove leading and trailing spaces.}
in_str := Trim(UpperCase(in_str));
{Remove vowels, spaces, H, W, and Y except for the first character.}
no_vowels := in_str[1];
for i := 2 to Length(in_str) do
begin
ch := in_str[i];
case ch of
'A', 'E', 'I', 'O', 'U', ' ', 'H', 'W', 'Y':
; {Do nothing.}
else
no_vowels := no_vowels + ch;
end;
end;
{Encode the characters.}
for i := 1 to Length(no_vowels) do
begin
ch := no_vowels[i];
case ch of
'B', 'F', 'P', 'V': ch := '1';
'C', 'G', 'J', 'K', 'Q', 'S', 'X', 'Z': ch := '2';
'D', 'T': ch := '3';
'L': ch := '4';
'M', 'N': ch := '5';
'R': ch := '6';
else {Vowels, H, W, and Y as the 1st letter.}
ch := '0';
end;
coded := coded + ch;
end;
{Use the first letter.}
out_str := no_vowels[1];
{Find three non-repeating codes.}
for i := 2 to Length(no_vowels) do
begin
{Look for a non-repeating code.}
if (coded[i] <> coded[i - 1]) then
begin
{This one works.}
out_str := out_str + coded[i];
if (Length(out_str) >= 4) then
Break;
end;
end;
Soundex := out_str;
end;
{Calculate a numeric Soundex encoding.}
function NumericSoundex(in_str: string): Smallint;
var
value: Integer;
begin
{Calculate the normal Soundex encoding.}
in_str := Soundex(in_str);
{Convert this into a numeric value.}
value := (Ord(in_str[1]) - Ord('A')) * 1000;
if (Length(in_str) > 1) then
value := value + StrToInt(Copy(in_str, 2, Length(in_str) - 1));
NumericSoundex := value;
end;
{Calculate an extended Soundex encoding.}
function ExtendedSoundex(in_str: string): string;
{Replace instances of fr_str with to_str in str.}
procedure ReplaceString(var str: string; fr_str, to_str: string);
var
fr_len, i: Integer;
begin
fr_len := Length(fr_str);
i := Pos(fr_str, str);
while (i > 0) do
begin
str := Copy(str, 1, i - 1) + to_str + Copy(str, i + fr_len, Length(str) - i - fr_len + 1);
i := Pos(fr_str, str);
end;
end;
var
no_vowels: string;
ch, last_ch: Char;
i: Integer;
begin
{Make upper case and remove leading and trailing spaces.}
in_str := Trim(UpperCase(in_str));
{Remove internal spaces.}
ReplaceString(in_str, ' ', '');
{Convert CHR to CR.}
ReplaceString(in_str, 'CHR', 'CR');
{Convert PH to F.}
ReplaceString(in_str, 'PH', 'F');
{Convert Z to S.}
ReplaceString(in_str, 'Z', 'S');
{Remove vowels and repeats.}
last_ch := in_str[1]; {The last character used.}
no_vowels := last_ch;
for i := 2 to Length(in_str) do
begin
ch := in_str[i];
case ch of
'A', 'E', 'I', 'O', 'U':
; {Do nothing.}
else
{Skip it if it's a duplicate.}
if (ch <> last_ch) then
begin
no_vowels := no_vowels + ch;
last_ch := ch;
end;
end;
end;
ExtendedSoundex := no_vowels;
end;
end.
Used like this:
unit Sndx;
interface
uses
Windows, Messages, SysUtils, Classes, Graphics, Controls, Forms, Dialogs,
StdCtrls, ExtCtrls, SndxAlgs;
type
TForm1 = class(TForm)
InputText: TEdit;
Label1: TLabel;
CmdEncode: TButton;
Label2: TLabel;
Label3: TLabel;
Panel1: TPanel;
SoundexLabel: TLabel;
Panel2: TPanel;
NumericLabel: TLabel;
Label4: TLabel;
Panel3: TPanel;
ExtendedLabel: TLabel;
procedure CmdEncodeClick(Sender: TObject);
private
{ Private declarations }
public
{ Public declarations }
end;
var
Form1: TForm1;
implementation
{$R *.DFM}
procedure TForm1.CmdEncodeClick(Sender: TObject);
begin
SoundexLabel.Caption := Soundex(InputText.Text);
NumericLabel.Caption := Format('%d', [NumericSoundex(InputText.Text)]);
ExtendedLabel.Caption := ExtendedSoundex(InputText.Text);
end;
end.
Solve 2:
The code below is designed for use in English language and does not work for special characters like French accents or German Umlauts
function StrSoundEx(const OrgString: string): string;
var
s: string;
PrevCh: Char;
Ch: Char;
i: Integer;
begin
s := UpperCase(Trim(OrgString));
if s <> '' then
begin
PrevCh := #0;
result := s[1];
for i := 2 to Length(s) do
begin
if Length(result) = 4 then
break;
Ch := s[i];
if (Ch <> PrevCh) then
begin
if Ch in ['B', 'P', 'F', 'V'] then
result := result + '1'
else if Ch in ['C', 'S', 'K', 'G', 'J', 'Q', 'X', 'Z'] then
result := result + '2'
else if Ch in ['D', 'T'] then
result := result + '3'
else if Ch in ['L'] then
result := result + '4'
else if Ch in ['M', 'N'] then
result := result + '5'
else if Ch in ['R'] then
result := result + '6';
PrevCh := Ch;
end;
end;
end;
while Length(result) < 4 do
result := result + '0';
end;
Solve 3:
The following differs from the standard Russell Soundex algorithm in that it lets you set the size of the Soundex code to something other than four characters:
{Given a string this fuction returns the Russell Soundex code for that string. Although the Russell Soundex code is limited to four characters this function allows you to get a code up to 16 characters in length. For names a six to eight character code reduces the number of false matches significantly.
Parameters:
TheWord: The string to be encoded.
SoundexSize: The number of characters in the returned code.
Returns: The Soundex code.}
function dgGetSoundexCode(TheWord: string; SoundexSize: Integer): string;
const
MaxSize = 16;
var
I: Integer;
WorkString1, WorkString2: string;
begin
{Raise an exception if the SoundexSize parameter is not in the allowed range}
if not SoundexSize in [1..MaxSize] then
raise Exception.Create('Soundex size must in the range 1 - 16.');
{Convert the word to upper case}
TheWord := UpperCase(TheWord);
{Copy the first letter}
WorkString1 := TheWord[1];
{Copy the rest of the word to WordString1 deleting duplicate letters}
for I := 2 to Length(TheWord) do
if TheWord[I - 1] <> TheWord[I] then
AppendStr(WorkString1, TheWord[I]);
{Move the first letter to WorkString2}
WorkString2 := WorkString1[1];
{Compute the Soundex codes for the remaining letters}
for I := 2 to Length(WorkString1) do
case WorkString1[I] of
'B', 'F', 'P', 'V':
AppendStr(WorkString2, '1');
'C', 'G', 'J', 'K', 'Q', 'S', 'X', 'Z':
Appendstr(WorkString2, '2');
'D', 'T':
Appendstr(WorkString2, '3');
'L':
Appendstr(WorkString2, '4');
'M', 'N':
Appendstr(WorkString2, '5');
'R':
Appendstr(WorkString2, '6');
end;
{Pad the string with zeros}
WorkString1 := '';
WorkString1 := dgFillString('0', MaxSize);
AppendStr(WorkString2, WorkString1);
Result := Copy(WorkString2, 1, SoundexSize);
end;
Feliratkozás:
Megjegyzések küldése (Atom)
Nincsenek megjegyzések:
Megjegyzés küldése