2009. október 16., péntek

Extract string property values from DFM files


Problem/Question/Abstract:

Does anybody know the name of the routine used in the IDE that enables control characters embedded into a string to be recognised. For example the characters 'Line 1'#13#10'Line 2' are recognised by the compiler as a single string literal. I'd like to build a similar facility into an application. Is a single routine used for this or is it embedded somewhere in the parser of the compiler?

Answer:

I'm pretty sure that the compiler uses an internal routine for this which is not accessible to us mere mortals. Perhaps you can extract something useful from the unit below. I wrote it to extract string property values from DFM files. It is a work in progress, so if you want to use it for the same purpose be aware that you may have DFMs it will not be able to digest without modifications.

unit DFMParser;

interface

uses
  classes, sysutils;

type
  TBaseParser = class
  private
    FText: string;
    FCurrent, FAnchor: Integer;
    FToken: string;
  protected
    procedure Error(const S: string); overload;
    procedure Error(const fmt: string; const A: array of const); overload;
    procedure DropAnchor;
    procedure NextToken;
    procedure NextChar;
    procedure SkipWhitespace;
    procedure SkipToEol;
    procedure SkipTo(ch: Char);
    procedure SkipToString(const S: string);
    function EndOfText: Boolean;
    function IsTokenChar: Boolean;
    function IsWhiteSpace: Boolean;
    function CurrentChar: Char;
    function LastWord: string;
    function ParseEncodedChar: Char;
    function ParseQuotedString: string;
    function ParseStringValue: string;
  public
    procedure Parse; virtual; abstract;
    constructor Create(const S: string); virtual;
    property Token: string read FToken;
  end;

  TParsePropertyEvent = procedure(const aComponentName, aPropertyName,
    aPropertyValue: string) of object;

  TDFMParser = class(TBaseParser)
  private
    FParsePropertyEvent: TParsePropertyEvent;
  protected
    procedure ParseComponent;
    procedure ParseProperty(const componentName: string);
    procedure ParsePropertyString(const componentName, propertyName: string);
    function TokenIsObject: Boolean;
    function IsEndToken: Boolean;
    procedure DoPropertyEvent(const componentName, propertyname, propvalue: string);
  public
    procedure Parse; override;
    property OnParseProperty: TParsePropertyEvent read FParsePropertyEvent
      write FParsePropertyEvent;
  end;

  EDFMParserError = class(Exception);

  TTranslationItemEvent = procedure(const name, value: string) of object;

  TTranslationParser = class(TBaseParser)
  private
    FTranslationItemEvent: TTranslationItemEvent;
    procedure ParseStringConstant;
    procedure SkipWhitespaceAndComments;
    procedure DoTranslationItem(const name, value: string);
  public
    constructor Create(const S: string); override;
    procedure Parse; override;
    property OnTranslationItem: TTranslationItemEvent read FTranslationItemEvent
      write FTranslationItemEvent;
  end;

implementation

uses
  charsets;

const
  quote = '''';

constructor TBaseParser.Create(const S: string);
begin
  FText := S;
  FCurrent := 1;
end;

function TBaseParser.CurrentChar: Char;
begin
  result := FText[FCurrent];
end;

procedure TBaseParser.DropAnchor;
begin
  FAnchor := FCurrent;
end;

function TBaseParser.EndOfText: Boolean;
begin
  result := FCurrent > Length(FText);
end;

procedure TBaseParser.Error(const S: string);
begin
  raise EPArserError.Create(S);
end;

procedure TBaseParser.Error(const fmt: string; const A: array of const);
begin
  Error(Format(fmt, A));
end;

function TBaseParser.IsTokenChar: Boolean;
begin
  result := (Currentchar in Charsets.IdentifierChars) or (CurrentChar = '.');
end;

function TBaseParser.IsWhiteSpace: Boolean;
begin
  result := Currentchar in [#1..#32];
end;

function TBaseParser.LastWord: string;
begin
  Assert(FAnchor <= FCurrent);
  result := Copy(FText, FAnchor, FCurrent - FAnchor);
end;

procedure TBaseParser.NextChar;
begin
  Inc(FCurrent);
  if EndOfText then
    Error('Unexpected end of text');
end;

procedure TBaseParser.NextToken;
begin
  SkipWhitespace;
  DropAnchor;
  while not EndOfText and IsTokenChar do
    Inc(FCurrent);
  FToken := LastWord;
end;

procedure TBaseParser.SkipTo(ch: Char);
begin
  while not EndOfText and (Currentchar <> ch) do
    NextChar;
  Inc(FCurrent);
end;

procedure TBaseParser.SkipToString(const S: string);
var
  P: PChar;
begin
  p := StrPos(@FText[FCurrent], Pchar(S));
  if Assigned(p) then
    FCurrent := p - PChar(FText) + 1 + Length(S)
  else
    Error('Expected string "%s" not found', [s]);
end;

procedure TBaseParser.SkipToEol;
begin
  while not EndOfText and (FText[FCurrent] <> #10) do
    Inc(FCurrent);
end;

procedure TBaseParser.SkipWhitespace;
begin
  while not EndOfText and IsWhiteSpace do
    Inc(FCurrent);
end;

function TBaseParser.ParseQuotedString: string;
begin
  Assert(CurrentChar = quote);
  Result := '';
  repeat
    NextChar; {skip leading quote}
    DropAnchor;
    while CurrentChar <> quote do
      NextChar;
    Result := Result + LastWord;
    NextChar;
    if CurrentChar = quote then
      Result := Result + quote; {literal quote}
  until
    CurrentChar <> quote;
  SkipWhitespace;
end;

function TBaseParser.ParseEncodedChar: Char;
var
  allowed: Charsets.TCharset;
  n: Integer;
begin
  Assert(CurrentChar = '#');
  NextChar;
  DropAnchor;
  if CurrentChar = '$' then
  begin
    allowed := CHarsets.HexNumerals;
    NextChar;
  end
  else
    allowed := Charsets.IntegerChars;
  while CurrentChar in allowed do
    NextChar;
  n := StrToInt(LastWord);
  if n > High(Byte) then
    Error('Encountered UNICODE character in string, cannot handle that.');
  Result := Char(n);
end;

function TBaseParser.ParseStringValue: string;
begin
  Result := '';
  while True do
    case CurrentChar of
      quote:
        Result := Result + ParseQuotedString;
      '#':
        Result := Result + ParseEncodedChar;
      '+':
        begin
          NextChar;
          SkipWhitespace;
        end;
    else
      Break;
    end;
end;

{ TDFMParser }

procedure TDFMParser.DoPropertyEvent(const componentName, propertyname, propvalue:
  string);
begin
  if Assigned(FParsePropertyEvent) then
    FParsePropertyEvent(componentName, propertyname, propvalue);
end;

function TDFMParser.IsEndToken: Boolean;
begin
  result := Token = 'end';
end;

procedure TDFMParser.Parse;
begin
  while not EndOfText do
  begin
    ParseComponent;
    SkipWhitespace;
  end;
end;

procedure TDFMParser.ParseComponent;
var
  componentName: string;
begin
  if FToken = '' then
    NextToken;
  if not TokenIsObject then
    Error('Expected: inherited or object, found : %s', [Token]);
  NextToken;
  componentName := Token;
  SkipToEol;
  repeat
    NextToken;
    if TokenIsObject then
      ParseComponent
    else if not IsEndToken then
      ParseProperty(componentName);
  until
    IsEndToken or EndOfText;
  if IsEndToken then
    FToken := '';
end;

procedure TDFMParser.ParseProperty(const componentName: string);
var
  propname: string;
begin
  propname := Token;
  SkipWhitespace;
  if CurrentChar <> '=' then
    Error('Expected: =, found %s', [Currentchar]);
  NextChar;
  SkipWhitespace;
  case CurrentChar of
    '{':
      SkipTo('}');
    '(':
      SkipTo(')');
    '[':
      SkipTo(']');
    quote, '#':
      ParsePropertyString(componentName, propname);
  else
    SkipToEol
  end;
end;

procedure TDFMParser.ParsePropertyString(const componentName, propertyName: string);
var
  propvalue: string;
begin
  propvalue := ParseStringValue;
  if propvalue <> '' then
    DoPropertyEvent(componentName, propertyname, propvalue);
end;

function TDFMParser.TokenIsObject: Boolean;
begin
  Result := (Token = 'inherited') or (Token = 'object')
end;

{ TTranslationParser }

constructor TTranslationParser.Create(const S: string);
const
  resStr = 'resourcestring';
var
  lS: string;
  resourceStringPos: Integer;
  n1, n2: Integer;
begin
  {Isolate the resourcestring section. We expect only one}
  lS := LowerCase(S);
  resourceStringPos := Pos(resStr, lS);
  if resourceStringPos = 0 then
    inherited Create('')
  else
  begin
    {look for an $ifdef german}
    n1 := Pos('{$ifdef german', lS);
    if n1 > 0 then
    begin
      {look for the following $else}
      Delete(lS, 1, n1 - 1);
      n2 := Pos('{$else}', lS);
      if n2 = 0 then
        Error('Malformed $IFDEF...$ELSE encountered, $ELSE not found');
      Delete(lS, 1, n2 - 1);
      Inc(n1, n2 - 1);
      {look for the $ENDIF}
      n2 := Pos('{$endif}', lS);
      if n2 = 0 then
        Error('Malformed $IFDEF...$ENDIF encountered, $ENDIF not found');
      inherited Create(Copy(S, n1, n2 - 1));
    end
    else
    begin
      {look for an $ifndef german}
      n1 := Pos('{$ifndef german', lS);
      if n1 = 0 then
        inherited Create('')
      else
      begin
        {in the $ifndef german construct the resourcestring keyword often comes after the $ifndef.}
        if n1 < resourceStringPos then
          n1 := resourceStringPos + Length(resstr);
        Delete(lS, 1, n1 - 1);
        {look for the $ENDIF}
        n2 := Pos('{$endif}', lS);
        if n2 = 0 then
          Error('Malformed $IFDEF...$ENDIF encountered, $ENDIF not found');
        inherited Create(Copy(S, n1, n2 - 1));
      end;
    end;
  end;
end;

procedure TTranslationParser.DoTranslationItem(const name, value: string);
begin
  if Assigned(FTranslationItemEvent) then
    FTranslationItemEvent(name, value);
end;

procedure TTranslationParser.Parse;
begin
  while not EndOfText do
  begin
    ParseStringConstant;
    SkipWhitespace;
  end;
end;

procedure TTranslationParser.ParseStringConstant;
var
  name, value: string;
begin
  SkipWhitespaceAndComments;
  if EndOfText then
    Exit;
  NextToken;
  name := Token;
  SkipWhitespaceAndComments;
  if EndOfText then
    Exit;
  if CurrentChar <> '=' then
    Error('Expected: =, found "%s"', [CurrentChar]);
  NextChar;
  SkipWhitespaceAndComments;
  if EndOfText then
    Exit;
  value := ParseStringValue;
  SkipWhiteSpace;
  if not EndOfText and (CurrentChar = ';') then
    NextChar;
  DoTranslationItem(name, value);
end;

procedure TTranslationParser.SkipWhitespaceAndComments;
begin
  while True do
  begin
    SkipWhitespace;
    if not EndOfText then
    begin
      case CurrentChar of
        '/':
          SkipToEol; { single line comment }
        '{':
          SkipTo('}'); { comment }
        '(':
          begin
            NextChar;
            if CurrentChar = '*' then
              SkipToString('*)')
            else
              Error('Expected: comment or indentifier, found: "(%s"', [CurrentChar]);
          end;
      else
        Break
      end;
    end
    else
      Break;
  end;
end;

end.

unit Charsets;

interface

type
  TCharSet = set of AnsiChar;
const
  Signs: TCharset = ['-', '+'];
  Numerals: TCharset = ['0'..'9'];
  HexNumerals: TCharset = ['A'..'F', 'a'..'f', '0'..'9'];
  IntegerChars: TCharset = ['0'..'9', '-', '+'];
  IdentifierChars: TCharset = ['a'..'z', 'A'..'Z', '0'..'9', '_'];
var
  Digits, Letters, LowerCaseLetters, UpperCaseLetters: TCharSet;
  FloatChars, SciFloatChars: TCharset;
  AlphaNum, NonAlphaNum: TCharset;

  { Need to call this again when locale changes.  }
procedure SetupCharsets;

implementation

uses
  Windows, Sysutils;

var
  locale: DWORD = 0;

procedure SetupCharsets;
var
  ch: AnsiChar;
begin
  if locale = GetThreadLocale then
    Exit
  else
    Locale := GetThreadLocale;
  LowerCaseLetters := [];
  UpperCaseLetters := [];
  AlphaNum := [];
  NonAlphaNum := [];
  Digits := Numerals;
  for ch := Low(ch) to High(ch) do
  begin
    if IsCharAlpha(ch) then
      if IsCharUpper(ch) then
        Include(UpperCaseLetters, ch)
      else
        Include(LowerCaseLetters, ch);
    if IsCharAlphanumeric(ch) then
      Include(AlphaNum, ch)
    else
      Include(NonAlphaNum, ch);
  end;
  Letters := LowerCaseLetters + UpperCaseLetters;
  FloatChars := IntegerChars;
  Include(FloatChars, DecimalSeparator);
  SciFloatChars := FloatChars + ['e', 'E'];
end;

initialization
  SetupCharsets;
end.

Nincsenek megjegyzések:

Megjegyzés küldése