2004. február 5., csütörtök
How to determine whether a string contains binary data or normal readable text
Problem/Question/Abstract:
How to determine whether a string contains binary data or normal readable text
Answer:
Solve 1:
Well, actually there is not really a difference between binary and not. There is just a common "agreement" that everything below ASCII code 32 is a "binary" character.
function IsReadable(st: string): Boolean;
var
i: Integer;
begin
Result := false;
for i := 1 to Length(st) do
if st[i] < #32 then
Exit;
Result := true;
end;
Solve 2:
procedure GetVeryBasicFileType(const Path: string;
var VeryBasicFileType: LongWord); overload;
var
Fs: TFileStream;
ExtCharCount, NothingCount, SpaceCount, LfCount, i, DidRead: Word;
LastB,
B: byte;
ReadBuffer: array[0..MAX_FILE_DETECT_BUFFER] of byte;
begin
VeryBasicFileType := VERY_BASIC_FILETYPE_UNKNOWN;
if (FileExists(Path)) then
begin
Fs := TFileStream.Create(Path, fmOpenRead);
try
if (Fs.Size >= 1) then
begin
LfCount := 0;
ExtCharCount := 0;
SpaceCount := 0;
NothingCount := 0;
LastB := 0;
Fs.Seek(0, soFromBeginning);
DidRead := Fs.Read(ReadBuffer, SizeOf(ReadBuffer));
for i := 0 to DidRead - 2 do
begin
B := ReadBuffer[i];
if (B < 1) then
Inc(NothingCount)
else if (((B < 32) or (B > 127)) and not (B in [$0A, $0D, $21])) then
Inc(ExtCharCount)
else if ((B = $0A) or (B = $0D)) then
Inc(LfCount)
else if ((B = $21) and (LastB <> $21)) then
Inc(SpaceCount);
LastB := B;
end;
if (NothingCount > (DidRead div 30)) then
begin
if ((DidRead >= 4) and ((ReadBuffer[0] = $FF) and (ReadBuffer[1] = $FE)) or
((ReadBuffer[0] = $FE) and (ReadBuffer[1] = $FF))) then
begin
VeryBasicFileType := VERY_BASIC_FILETYPE_TEXT_UNICODE;
Exit;
end
else
begin
VeryBasicFileType := VERY_BASIC_FILETYPE_BINARY;
Exit;
end;
end;
if (ExtCharCount > (DidRead div 3)) then
begin
if ((LfCount >= (DidRead div 60)) or (SpaceCount > (DidRead div 6))) then
VeryBasicFileType := VERY_BASIC_FILETYPE_TEXT
else
VeryBasicFileType := VERY_BASIC_FILETYPE_BINARY;
end
else
begin
if ((ReadBuffer[0] = $EF) and (ReadBuffer[1] = $BB) and
(ReadBuffer[2] = $BF) and (LfCount > (DidRead div 60))) then
VeryBasicFileType := VERY_BASIC_FILETYPE_TEXT_UTF8
else
VeryBasicFileType := VERY_BASIC_FILETYPE_TEXT;
end;
end
else
VeryBasicFileType := VERY_BASIC_FILETYPE_EMPTY;
finally
Fs.Free;
end;
end;
end;
Feliratkozás:
Megjegyzések küldése (Atom)
Nincsenek megjegyzések:
Megjegyzés küldése