2004. február 5., csütörtök

How to determine whether a string contains binary data or normal readable text


Problem/Question/Abstract:

How to determine whether a string contains binary data or normal readable text

Answer:

Solve 1:

Well, actually there is not really a difference between binary and not. There is just a common "agreement" that everything below ASCII code 32 is a "binary" character.

function IsReadable(st: string): Boolean;
var
  i: Integer;
begin
  Result := false;
  for i := 1 to Length(st) do
    if st[i] < #32 then
      Exit;
  Result := true;
end;


Solve 2:

procedure GetVeryBasicFileType(const Path: string;
  var VeryBasicFileType: LongWord); overload;
var
  Fs: TFileStream;
  ExtCharCount, NothingCount, SpaceCount, LfCount, i, DidRead: Word;
  LastB,
    B: byte;
  ReadBuffer: array[0..MAX_FILE_DETECT_BUFFER] of byte;
begin
  VeryBasicFileType := VERY_BASIC_FILETYPE_UNKNOWN;
  if (FileExists(Path)) then
  begin
    Fs := TFileStream.Create(Path, fmOpenRead);
    try
      if (Fs.Size >= 1) then
      begin
        LfCount := 0;
        ExtCharCount := 0;
        SpaceCount := 0;
        NothingCount := 0;
        LastB := 0;
        Fs.Seek(0, soFromBeginning);
        DidRead := Fs.Read(ReadBuffer, SizeOf(ReadBuffer));
        for i := 0 to DidRead - 2 do
        begin
          B := ReadBuffer[i];
          if (B < 1) then
            Inc(NothingCount)
          else if (((B < 32) or (B > 127)) and not (B in [$0A, $0D, $21])) then
            Inc(ExtCharCount)
          else if ((B = $0A) or (B = $0D)) then
            Inc(LfCount)
          else if ((B = $21) and (LastB <> $21)) then
            Inc(SpaceCount);
          LastB := B;
        end;
        if (NothingCount > (DidRead div 30)) then
        begin
          if ((DidRead >= 4) and ((ReadBuffer[0] = $FF) and (ReadBuffer[1] = $FE)) or
            ((ReadBuffer[0] = $FE) and (ReadBuffer[1] = $FF))) then
          begin
            VeryBasicFileType := VERY_BASIC_FILETYPE_TEXT_UNICODE;
            Exit;
          end
          else
          begin
            VeryBasicFileType := VERY_BASIC_FILETYPE_BINARY;
            Exit;
          end;
        end;
        if (ExtCharCount > (DidRead div 3)) then
        begin
          if ((LfCount >= (DidRead div 60)) or (SpaceCount > (DidRead div 6))) then
            VeryBasicFileType := VERY_BASIC_FILETYPE_TEXT
          else
            VeryBasicFileType := VERY_BASIC_FILETYPE_BINARY;
        end
        else
        begin
          if ((ReadBuffer[0] = $EF) and (ReadBuffer[1] = $BB) and
                                                 (ReadBuffer[2] = $BF) and (LfCount > (DidRead div 60))) then
            VeryBasicFileType := VERY_BASIC_FILETYPE_TEXT_UTF8
          else
            VeryBasicFileType := VERY_BASIC_FILETYPE_TEXT;
        end;
      end
      else
        VeryBasicFileType := VERY_BASIC_FILETYPE_EMPTY;
    finally
      Fs.Free;
    end;
  end;
end;

Nincsenek megjegyzések:

Megjegyzés küldése