2008. november 11., kedd
Boyer-Moore string searching
Problem/Question/Abstract:
Boyer-Moore string searching
Answer:
Solve 1:
unit BMSearch;
interface
type
{$IFDEF WINDOWS}
size_t = Word;
{$ELSE}
size_t = LongInt;
{$ENDIF}
type
TTranslationTable = array[char] of char; { translation table }
TSearchBM = class(TObject)
private
FTranslate: TTranslationTable; { translation table }
FJumpTable: array[char] of Byte; { Jumping table }
FShift_1: integer;
FPattern: pchar;
FPatternLen: size_t;
public
procedure Prepare(Pattern: pchar; PatternLen: size_t; IgnoreCase: Boolean);
procedure PrepareStr(const Pattern: string; IgnoreCase: Boolean);
function Search(Text: pchar; TextLen: size_t): pchar;
function Pos(const S: string): integer;
end;
implementation
uses
SysUtils;
{Ignore Case Table Translation}
procedure CreateTranslationTable(var T: TTranslationTable; IgnoreCase: Boolean);
var
c: char;
begin
for c := #0 to #255 do
T[c] := c;
if not IgnoreCase then
exit;
for c := 'a' to 'z' do
T[c] := UpCase(c);
{ Mapping all accented characters to their uppercase equivalent }
T['�'] := 'A';
T['�'] := 'A';
T['�'] := 'A';
T['�'] := 'A';
T['�'] := 'A';
T['�'] := 'A';
T['�'] := 'A';
T['�'] := 'A';
T['�'] := 'E';
T['�'] := 'E';
T['�'] := 'E';
T['�'] := 'E';
T['�'] := 'E';
T['�'] := 'E';
T['�'] := 'E';
T['�'] := 'E';
T['�'] := 'I';
T['�'] := 'I';
T['�'] := 'I';
T['�'] := 'I';
T['�'] := 'I';
T['�'] := 'I';
T['�'] := 'I';
T['�'] := 'I';
T['�'] := 'O';
T['�'] := 'O';
T['�'] := 'O';
T['�'] := 'O';
T['�'] := 'O';
T['�'] := 'O';
T['�'] := 'O';
T['�'] := 'O';
T['�'] := 'U';
T['�'] := 'U';
T['�'] := 'U';
T['�'] := 'U';
T['�'] := 'U';
T['�'] := 'U';
T['�'] := 'U';
T['�'] := 'U';
T['�'] := '�';
end;
{Preparation of the jumping table}
procedure TSearchBM.Prepare(Pattern: pchar; PatternLen: size_t; IgnoreCase: Boolean);
var
i: integer;
c, lastc: char;
begin
FPattern := Pattern;
FPatternLen := PatternLen;
if FPatternLen < 1 then
FPatternLen := strlen(FPattern);
{This algorythm is based on a character set of 256}
if FPatternLen > 256 then
exit;
{1. Preparing translating table}
CreateTranslationTable(FTranslate, IgnoreCase);
{2. Preparing jumping table}
for c := #0 to #255 do
FJumpTable[c] := FPatternLen;
for i := FPatternLen - 1 downto 0 do
begin
c := FTranslate[FPattern[i]];
if FJumpTable[c] >= FPatternLen - 1 then
FJumpTable[c] := FPatternLen - 1 - i;
end;
FShift_1 := FPatternLen - 1;
lastc := FTranslate[Pattern[FPatternLen - 1]];
for i := FPatternLen - 2 downto 0 do
if FTranslate[FPattern[i]] = lastc then
begin
FShift_1 := FPatternLen - 1 - i;
break;
end;
if FShift_1 = 0 then
FShift_1 := 1;
end;
procedure TSearchBM.PrepareStr(const Pattern: string; IgnoreCase: Boolean);
var
str: pchar;
begin
if Pattern <> '' then
begin
{$IFDEF Windows}
str := @Pattern[1];
{$ELSE}
str := pchar(Pattern);
{$ENDIF}
Prepare(str, Length(Pattern), IgnoreCase);
end;
end;
{Searching Last char & scanning right to left}
function TSearchBM.Search(Text: pchar; TextLen: size_t): pchar;
var
shift, m1, j: integer;
jumps: size_t;
begin
result := nil;
if FPatternLen > 256 then
exit;
if TextLen < 1 then
TextLen := strlen(Text);
m1 := FPatternLen - 1;
shift := 0;
jumps := 0;
{Searching the last character}
while jumps <= TextLen do
begin
Inc(Text, shift);
shift := FJumpTable[FTranslate[Text^]];
while shift <> 0 do
begin
Inc(jumps, shift);
if jumps > TextLen then
exit;
Inc(Text, shift);
shift := FJumpTable[FTranslate[Text^]];
end;
{ Compare right to left FPatternLen - 1 characters }
if jumps >= m1 then
begin
j := 0;
while FTranslate[FPattern[m1 - j]] = FTranslate[(Text - j)^] do
begin
Inc(j);
if j = FPatternLen then
begin
result := Text - m1;
exit;
end;
end;
end;
shift := FShift_1;
Inc(jumps, shift);
end;
end;
function TSearchBM.Pos(const S: string): integer;
var
str, p: pchar;
begin
result := 0;
if S <> '' then
begin
{$IFDEF Windows}
str := @S[1];
{$ELSE}
str := pchar(S);
{$ENDIF}
p := Search(str, Length(S));
if p <> nil then
result := 1 + p - str;
end;
end;
end.
Solve 2:
Here's a demo program of the Boyer-Moore search algorithm. The basic idea is to first create a Boyer-Moore index table for the string you want to search for, and then call the BMsearch routine. Remember to turn-off Range Checking {$R-} in your finished program, otherwise the BMSearch will take 3-4 times longer than it should.
{Public-domain demo of Boyer-Moore search algorithm.
Guy McLoughlin - May 1, 1993.}
program DemoBMSearch;
{Boyer-Moore index table data definition}
type
BMTable = array[0..127] of byte;
{Create a Boyer-Moore index table to search with.}
procedure Create_BMTable(Pattern: string; var BMT: BMTable);
var
Index: byte;
begin
fillchar(BMT, sizeof(BMT), length(Pattern));
for Index := 1 to length(Pattern) do
BMT[ord(Pattern[Index])] := (length(Pattern) - Index)
end;
{Boyer-Moore Search function. Returns 0 if string is not found. Returns 65,535 if
BufferSize is too large, ie: greater than 65,520 bytes.}
function BMsearch(var Buffer; BuffSize: word; var BMT: BMTable; Pattern: string): word;
var
Buffer2: array[1..65520] of char absolute Buffer;
Index1, Index2, PatSize: word;
begin
if (BuffSize > 65520) then
begin
BMsearch := $FFFF;
exit
end;
PatSize := length(Pattern);
Index1 := PatSize;
Index2 := PatSize;
repeat
if (Buffer2[Index1] = Pattern[Index2]) then
begin
dec(Index1);
dec(Index2)
end
else
begin
if (succ(PatSize - Index2) > (BMT[ord(Buffer2[Index1])])) then
inc(Index1, succ(PatSize - Index2))
else
inc(Index1, BMT[ord(Buffer2[Index1])]);
Index2 := PatSize
end;
until
(Index2 < 1) or (Index1 > BuffSize);
if (Index1 > BuffSize) then
BMsearch := 0
else
BMsearch := succ(Index1)
end;
type
arby_64K = array[1..65520] of byte;
var
Index: word;
st_Temp: string[10];
Buffer: ^arby_64K;
BMT: BMTable;
begin
new(Buffer);
fillchar(Buffer^, sizeof(Buffer^), 0);
st_Temp := 'Gumby';
move(st_Temp[1], Buffer^[65516], length(st_Temp));
Create_BMTable(st_Temp, BMT);
Index := BMSearch(Buffer^, sizeof(Buffer^), BMT, st_Temp);
writeln(st_Temp, ' found at offset ', Index)
end.
Feliratkozás:
Megjegyzések küldése (Atom)
Nincsenek megjegyzések:
Megjegyzés küldése