2004. november 23., kedd
How to find a string in a file
Problem/Question/Abstract:
I have an array of char called FBuffer1. Let's say: StrCopy(FBuffer1,'Test'). I also have a file, let's say File1.exe. I would like to find a very quick way to be able to localize the string "test" in the file1.exe
Answer:
Solve 1:
One way is to remove the file access problems. Load the whole file into a TMemoryStream, then search the stream. Example:
{ ... }
var
tmem: TMemoryStream;
buf: array[1..4] of Char;
begin
zeromemory(@buf, 4);
tmem := TMemoryStream.Create;
tmem.loadfromfile('test1.exe');
tmem.position := 0;
while tmem.position <> tmem.size do
begin
buf[1] := buf[2];
buf[2] := buf[3];
buf[3] := buf[4];
tmem.read(buf[4], 1);
if compare(buf, 'hello') then
Memo1.Lines.Add('match found at position ' + Inttostr(tmem.position));
end;
tmem.destroy;
end;
Solve 2:
I was working on just that some time ago. Here is my project file with some alternative functions and a time test. Just paste the following listing into a text file, rename the file to Project1.dpr, open the file in Delphi and run it.
{$APPTYPE CONSOLE}
program Project1;
uses
Windows, SysUtils;
function ScanString(SourceStart, SourceEnd, Search: PChar; CaseSensitive: Boolean): PChar;
var
SourcePtr: PChar;
SourceChr: Char;
SearchPos: DWord;
SearchPtr: PChar;
begin
Result := nil;
if SourceStart > SourceEnd then
Exit;
if not CaseSensitive then
CharUpperBuff(Search, Length(Search));
SourcePtr := SourceStart;
SearchPos := 0;
SearchPtr := Search;
while SourcePtr <= SourceEnd do
begin
SourceChr := SourcePtr^;
if not CaseSensitive then
CharUpperBuff(@SourceChr, 1);
if SourceChr = SearchPtr^ then
begin
Inc(SearchPtr);
if SearchPtr^ = #0 then
begin
Result := SourcePtr - SearchPos;
Break;
end;
Inc(SearchPos);
end
else if SearchPos > 0 then
begin
SearchPos := 0;
SearchPtr := Search;
end;
Inc(SourcePtr);
end;
end;
function ScanStringNew(SourceStart, SourceEnd, SearchStr: PChar;
CaseSensitive: Boolean): PChar;
var
SourcePtr: PChar;
ScanLen: DWord;
ScanPos: DWord;
ScanStr: PChar;
ScanPtr: PChar;
ScanUppStr: PChar;
ScanUppPtr: PChar;
ScanLowStr: PChar;
ScanLowPtr: PChar;
begin
Result := nil;
if SourceStart > SourceEnd then
Exit;
ScanLen := Length(SearchStr);
if not CaseSensitive then
begin
GetMem(ScanUppStr, ScanLen);
CopyMemory(ScanUppStr, SearchStr, ScanLen);
CharUpperBuff(ScanUppStr, ScanLen);
GetMem(ScanLowStr, ScanLen);
CopyMemory(ScanLowStr, SearchStr, ScanLen);
CharLowerBuff(ScanLowStr, ScanLen);
end
else
begin
ScanUppStr := SearchStr;
ScanLowStr := SearchStr;
end;
ScanPos := 0;
ScanUppPtr := ScanUppStr;
ScanLowPtr := ScanLowStr;
SourcePtr := SourceStart;
ScanPtr := ScanStr;
while SourcePtr <= SourceEnd do
begin
if (SourcePtr^ = ScanUppPtr^) or (SourcePtr^ = ScanLowPtr^) then
begin
Inc(ScanPos);
if ScanPos = ScanLen then
begin
Result := SourcePtr - ScanPos + 1;
Break;
end;
Inc(ScanUppPtr);
Inc(ScanLowPtr);
end
else if ScanPos > 0 then
begin
ScanPos := 0;
ScanUppPtr := ScanUppStr;
ScanLowPtr := ScanLowStr;
end;
Inc(SourcePtr);
end;
if not CaseSensitive then
begin
FreeMem(ScanUppStr, ScanLen);
FreeMem(ScanLowStr, ScanLen);
end;
end;
function ScanStringAsm(SourceStart, SourceEnd, SearchStr: PChar;
CaseSensitive: Boolean): PChar;
var
ScanLen: DWord;
ScanPos: DWord;
ScanStr: PChar;
ScanPtr: PChar;
ScanUppStr: PChar;
ScanUppPtr: PChar;
ScanLowStr: PChar;
ScanLowPtr: PChar;
begin
if SourceStart > SourceEnd then
Exit;
ScanLen := Length(SearchStr);
if not CaseSensitive then
begin
GetMem(ScanUppStr, ScanLen);
CopyMemory(ScanUppStr, SearchStr, ScanLen);
CharUpperBuff(ScanUppStr, ScanLen);
GetMem(ScanLowStr, ScanLen);
CopyMemory(ScanLowStr, SearchStr, ScanLen);
CharLowerBuff(ScanLowStr, ScanLen);
end
else
begin
ScanUppStr := SearchStr;
ScanLowStr := SearchStr;
end;
GetMem(ScanStr, ScanLen * 2 + 2);
ScanPos := ScanLen;
ScanPtr := ScanStr;
ScanUppPtr := ScanUppStr;
ScanLowPtr := ScanLowStr;
while ScanPos > 0 do
begin
ScanPtr^ := ScanUppPtr^;
Inc(ScanPtr);
Inc(ScanUppPtr);
ScanPtr^ := ScanLowPtr^;
Inc(ScanPtr);
Inc(ScanLowPtr);
Dec(ScanPos);
end;
ScanPtr^ := #0;
asm
{Register use:
EDI - pointer to source char
ESI - pointer to par of scan chars
AL - current source char
EBX - match length counter
ECX - source length counter
DX - current par of scan chars}
end;
if not CaseSensitive then
begin
FreeMem(ScanUppStr, ScanLen);
FreeMem(ScanLowStr, ScanLen);
end;
FreeMem(ScanStr, ScanLen * 2 + 2);
end;
end;
if not CaseSensitive then
begin
FreeMem(ScanUppStr, ScanLen);
FreeMem(ScanLowStr, ScanLen);
end;
FreeMem(ScanStr, ScanLen * 2 + 2);
end;
{Preserve registers:}
PUSH EBX {Preserve registers EBX, EDI, ESI:}
PUSH EDI
PUSH ESI
{Initialize registers:}
MOV EDI, SourceStart {Move addr SourceStart to EDI}
MOV ECX, SourceEnd {Calculate source length in ECX:}
SUB ECX, EDI
INC ECX
MOV ESI, ScanStr {Move addr ScanStr to ESI}
MOV DX, WORD[ESI] {Move first par of scan chars to DX}
xor EBX, EBX {Set EBX (match counter) to 0}
@01: {Main test loop:}
MOV AL, BYTE[EDI] {Move current source char to AL}
INC EDI {Inc EDI to point to next source char}
CMP AL, DL {Compare AL with scan char in DL (uppcase)}
JE@10 {Jump to @10 if equal (match)}
CMP AL, DH {Compare AL with scan char in DH (lowcase)}
JE@10 {Jump to @10 if equal (match)}
TEST EBX, EBX {Test EBX (match counter)}
JZ@02 {Jump to @02 if zero (i.e. first scan char)}
SUB ESI, EBX {Move ESI back to start of scan string:}
SUB ESI, EBX
MOV DX, WORD[ESI] {Move first par of scan chars to DX}
xor EBX, EBX {Set EBX to 0}
@02: {Next loop:}
DEC ECX {Dec ECX (source length counter)}
JNZ@01 {Jump back to @01 if not zero}
MOV Result, 0 {Move nil to Result (match not found)}
JMP@99 {Jump to @99}
@10: {Char match found:}
INC EBX {Inc EBX (match length counter):}
ADD ESI, 2 {Move ESI to next par of scan chars:}
MOV DX, WORD[ESI] {Move this par of scan chars to DX}
CMP DL, 0 {Compare char in DL with #0 (end of string)}
JNE@02 {Jump to @02 if not equal (test next char)}
{Match found:}
SUB EDI, EBX {Move EDI back to first char in match}
MOV Result, EDI {Move addr of match to Result}
@99: {Restore registers:}
POP ESI
POP EDI
POP EBX
end;
if not CaseSensitive then
begin
FreeMem(ScanUppStr, ScanLen);
FreeMem(ScanLowStr, ScanLen);
end;
FreeMem(ScanStr, ScanLen * 2 + 2);
end;
procedure TimeTest2;
var
Time1: DWord;
Time2: DWord;
Search: string;
TestName: string;
TestFile: file;
TestSize: DWord;
TestBuff: PChar;
TestScan: PChar;
TestPtr: PChar;
TestPos: Integer;
HitCount: Integer;
n, i, j: Integer;
c: Char;
Show: Boolean;
begin
n := 20;
Show := false;
Search := 'WINDOWS';
{TestBuff := PChar(Search);
TestScan := TestBuff;
c := TestScan^;
Time1 := GetTickCount;
for i := 1 to 10000000 do
begin
if TestBuff^ = c then
begin
end;
end;
Time2 := GetTickCount;
WriteLn('Tickcount : ', Time2 - Time1);
Exit;}
TestName := 'c:\windows\help\getstart.chm';
AssignFile(TestFile, TestName);
Reset(TestFile, 1);
TestSize := FileSize(TestFile);
GetMem(TestBuff, TestSize);
BlockRead(TestFile, TestBuff^, TestSize);
CloseFile(TestFile);
WriteLn;
WriteLn('Scaning for "', Search, '" ', n, ' times');
WriteLn('in file: ', TestName, ' size: ', TestSize, ' bytes');
HitCount := 0;
Time1 := GetTickCount;
for i := 1 to n do
begin
TestScan := TestBuff;
repeat
if TestScan <> TestBuff then
Inc(TestScan, Length(Search));
TestScan := ScanString(TestScan, TestBuff + TestSize - 1, PChar(Search), false);
if TestScan <> nil then
begin
Inc(HitCount);
if Show then
begin
Write(HitCount, ' ');
TestPtr := TestScan;
for TestPos := 1 to Length(Search) do
begin
Write(TestPtr^);
Inc(TestPtr);
end;
WriteLn;
ReadLn;
end;
end;
until TestScan = nil;
end;
Time2 := GetTickCount;
WriteLn(' Tickcount ScanString : ', Time2 - Time1: 5, 'ms', ' hitcount:', HitCount);
HitCount := 0;
Time1 := GetTickCount;
for i := 1 to n do
begin
TestScan := TestBuff;
repeat
if TestScan <> TestBuff then
Inc(TestScan, Length(Search));
TestScan := ScanStringNew(TestScan, TestBuff + TestSize - 1, PChar(Search), false);
if TestScan <> nil then
begin
Inc(HitCount);
if Show then
begin
Write(HitCount, ' ');
TestPtr := TestScan;
for TestPos := 1 to Length(Search) do
begin
Write(TestPtr^);
Inc(TestPtr);
end;
WriteLn;
ReadLn;
end;
end;
until TestScan = nil;
end;
Time2 := GetTickCount;
WriteLn(' Tickcount ScanStringNew: ', Time2 - Time1: 5, 'ms', ' hitcount:', HitCount);
HitCount := 0;
Time1 := GetTickCount;
for i := 1 to n do
begin
TestScan := TestBuff;
repeat
if TestScan <> TestBuff then
Inc(TestScan, Length(Search));
TestScan := ScanStringAsm(TestScan, TestBuff + TestSize - 1, PChar(Search), false);
if TestScan <> nil then
begin
Inc(HitCount);
if Show then
begin
Write(HitCount, ' ');
TestPtr := TestScan;
for TestPos := 1 to Length(Search) do
begin
Write(TestPtr^);
Inc(TestPtr);
end;
WriteLn;
ReadLn;
end;
end;
until TestScan = nil;
end;
Time2 := GetTickCount;
WriteLn(' Tickcount ScanStringAsm: ', Time2 - Time1: 5, 'ms', ' hitcount:', HitCount);
FreeMem(TestBuff, TestSize);
end;
begin
TimeTest2;
WriteLn;
WriteLn('** press enter to close **');
ReadLn;
end.
Solve 3:
function ScanFile(const filename: string; const forString: string; caseSensitive: Boolean): LongInt;
{ returns position of string in file or -1, if not found }
const
BufferSize = $8001; { 32K + 1 bytes }
var
pBuf, pEnd, pScan, pPos: Pchar;
filesize: LongInt;
bytesRemaining: LongInt;
bytesToRead: Integer;
F: file;
SearchFor: Pchar;
oldMode: Word;
begin
Result := -1; { assume failure }
if (Length(forString) = 0) or (Length(filename) = 0) then
Exit;
SearchFor := nil;
pBuf := nil;
{ open file as binary, 1 byte recordsize }
AssignFile(F, filename);
oldMode := FileMode;
FileMode := 0; { read-only access }
Reset(F, 1);
FileMode := oldMode;
try { allocate memory for buffer and pchar search string }
SearchFor := StrAlloc(Length(forString) + 1);
StrPCopy(SearchFor, forString);
if not caseSensitive then { convert to upper case }
AnsiUpper(SearchFor);
GetMem(pBuf, BufferSize);
filesize := System.Filesize(F);
bytesRemaining := filesize;
pPos := nil;
while bytesRemaining > 0 do
begin
{ calc how many bytes to read this round }
if bytesRemaining >= BufferSize then
bytesToRead := Pred(BufferSize)
else
bytesToRead := bytesRemaining;
{ read a buffer full and zero-terminate the buffer }
BlockRead(F, pBuf^, bytesToRead, bytesToRead);
pEnd := @pBuf[bytesToRead];
pEnd^ := #0;
{ scan the buffer. Problem: buffer may contain #0 chars! So we treat it as
a concatenation of zero-terminated strings. }
pScan := pBuf;
while pScan < pEnd do
begin
if not caseSensitive then { convert to upper case }
AnsiUpper(pScan);
pPos := StrPos(pScan, SearchFor); { search for substring }
if pPos <> nil then
begin { Found it! }
Result := FileSize - bytesRemaining + LongInt(pPos) - LongInt(pBuf);
Break;
end;
pScan := StrEnd(pScan);
Inc(pScan);
end;
if pPos <> nil then
Break;
bytesRemaining := bytesRemaining - bytesToRead;
if bytesRemaining > 0 then
begin
{ no luck in this buffers load. We need to handle the case of the search
string spanning two chunks of file now. We simply go back a bit in the file
and read from there, thus inspecting some characters twice }
Seek(F, FilePos(F) - Length(forString));
bytesRemaining := bytesRemaining + Length(forString);
end;
end;
finally
CloseFile(F);
if SearchFor <> nil then
StrDispose(SearchFor);
if pBuf <> nil then
FreeMem(pBuf, BufferSize);
end;
end;
Solve 4:
One option is to just read the entire file into a single string. The old-fashioned way is to use BlockRead. You could also use a file stream. Once you have it in a single string you can use any normal string operations, even if there are embedded null bytes or CR/LF's.
procedure TForm1.Button1Click(Sender: TObject);
var
s: string;
f: file;
p: integer;
begin
AssignFile(f, 'c:\winnt\system32\mspaint.exe');
FileMode := 0;
Reset(f, 1);
SetLength(s, FileSize(f));
BlockRead(f, s[1], FileSize(f));
CloseFile(f);
p := pos('This program cannot be run in DOS mode', s);
Label1.Caption := 'Found at : ' + IntToStr(p);
end;
Feliratkozás:
Megjegyzések küldése (Atom)
Nincsenek megjegyzések:
Megjegyzés küldése