2009. október 16., péntek
Extract string property values from DFM files
Problem/Question/Abstract:
Does anybody know the name of the routine used in the IDE that enables control characters embedded into a string to be recognised. For example the characters 'Line 1'#13#10'Line 2' are recognised by the compiler as a single string literal. I'd like to build a similar facility into an application. Is a single routine used for this or is it embedded somewhere in the parser of the compiler?
Answer:
I'm pretty sure that the compiler uses an internal routine for this which is not accessible to us mere mortals. Perhaps you can extract something useful from the unit below. I wrote it to extract string property values from DFM files. It is a work in progress, so if you want to use it for the same purpose be aware that you may have DFMs it will not be able to digest without modifications.
unit DFMParser;
interface
uses
classes, sysutils;
type
TBaseParser = class
private
FText: string;
FCurrent, FAnchor: Integer;
FToken: string;
protected
procedure Error(const S: string); overload;
procedure Error(const fmt: string; const A: array of const); overload;
procedure DropAnchor;
procedure NextToken;
procedure NextChar;
procedure SkipWhitespace;
procedure SkipToEol;
procedure SkipTo(ch: Char);
procedure SkipToString(const S: string);
function EndOfText: Boolean;
function IsTokenChar: Boolean;
function IsWhiteSpace: Boolean;
function CurrentChar: Char;
function LastWord: string;
function ParseEncodedChar: Char;
function ParseQuotedString: string;
function ParseStringValue: string;
public
procedure Parse; virtual; abstract;
constructor Create(const S: string); virtual;
property Token: string read FToken;
end;
TParsePropertyEvent = procedure(const aComponentName, aPropertyName,
aPropertyValue: string) of object;
TDFMParser = class(TBaseParser)
private
FParsePropertyEvent: TParsePropertyEvent;
protected
procedure ParseComponent;
procedure ParseProperty(const componentName: string);
procedure ParsePropertyString(const componentName, propertyName: string);
function TokenIsObject: Boolean;
function IsEndToken: Boolean;
procedure DoPropertyEvent(const componentName, propertyname, propvalue: string);
public
procedure Parse; override;
property OnParseProperty: TParsePropertyEvent read FParsePropertyEvent
write FParsePropertyEvent;
end;
EDFMParserError = class(Exception);
TTranslationItemEvent = procedure(const name, value: string) of object;
TTranslationParser = class(TBaseParser)
private
FTranslationItemEvent: TTranslationItemEvent;
procedure ParseStringConstant;
procedure SkipWhitespaceAndComments;
procedure DoTranslationItem(const name, value: string);
public
constructor Create(const S: string); override;
procedure Parse; override;
property OnTranslationItem: TTranslationItemEvent read FTranslationItemEvent
write FTranslationItemEvent;
end;
implementation
uses
charsets;
const
quote = '''';
constructor TBaseParser.Create(const S: string);
begin
FText := S;
FCurrent := 1;
end;
function TBaseParser.CurrentChar: Char;
begin
result := FText[FCurrent];
end;
procedure TBaseParser.DropAnchor;
begin
FAnchor := FCurrent;
end;
function TBaseParser.EndOfText: Boolean;
begin
result := FCurrent > Length(FText);
end;
procedure TBaseParser.Error(const S: string);
begin
raise EPArserError.Create(S);
end;
procedure TBaseParser.Error(const fmt: string; const A: array of const);
begin
Error(Format(fmt, A));
end;
function TBaseParser.IsTokenChar: Boolean;
begin
result := (Currentchar in Charsets.IdentifierChars) or (CurrentChar = '.');
end;
function TBaseParser.IsWhiteSpace: Boolean;
begin
result := Currentchar in [#1..#32];
end;
function TBaseParser.LastWord: string;
begin
Assert(FAnchor <= FCurrent);
result := Copy(FText, FAnchor, FCurrent - FAnchor);
end;
procedure TBaseParser.NextChar;
begin
Inc(FCurrent);
if EndOfText then
Error('Unexpected end of text');
end;
procedure TBaseParser.NextToken;
begin
SkipWhitespace;
DropAnchor;
while not EndOfText and IsTokenChar do
Inc(FCurrent);
FToken := LastWord;
end;
procedure TBaseParser.SkipTo(ch: Char);
begin
while not EndOfText and (Currentchar <> ch) do
NextChar;
Inc(FCurrent);
end;
procedure TBaseParser.SkipToString(const S: string);
var
P: PChar;
begin
p := StrPos(@FText[FCurrent], Pchar(S));
if Assigned(p) then
FCurrent := p - PChar(FText) + 1 + Length(S)
else
Error('Expected string "%s" not found', [s]);
end;
procedure TBaseParser.SkipToEol;
begin
while not EndOfText and (FText[FCurrent] <> #10) do
Inc(FCurrent);
end;
procedure TBaseParser.SkipWhitespace;
begin
while not EndOfText and IsWhiteSpace do
Inc(FCurrent);
end;
function TBaseParser.ParseQuotedString: string;
begin
Assert(CurrentChar = quote);
Result := '';
repeat
NextChar; {skip leading quote}
DropAnchor;
while CurrentChar <> quote do
NextChar;
Result := Result + LastWord;
NextChar;
if CurrentChar = quote then
Result := Result + quote; {literal quote}
until
CurrentChar <> quote;
SkipWhitespace;
end;
function TBaseParser.ParseEncodedChar: Char;
var
allowed: Charsets.TCharset;
n: Integer;
begin
Assert(CurrentChar = '#');
NextChar;
DropAnchor;
if CurrentChar = '$' then
begin
allowed := CHarsets.HexNumerals;
NextChar;
end
else
allowed := Charsets.IntegerChars;
while CurrentChar in allowed do
NextChar;
n := StrToInt(LastWord);
if n > High(Byte) then
Error('Encountered UNICODE character in string, cannot handle that.');
Result := Char(n);
end;
function TBaseParser.ParseStringValue: string;
begin
Result := '';
while True do
case CurrentChar of
quote:
Result := Result + ParseQuotedString;
'#':
Result := Result + ParseEncodedChar;
'+':
begin
NextChar;
SkipWhitespace;
end;
else
Break;
end;
end;
{ TDFMParser }
procedure TDFMParser.DoPropertyEvent(const componentName, propertyname, propvalue:
string);
begin
if Assigned(FParsePropertyEvent) then
FParsePropertyEvent(componentName, propertyname, propvalue);
end;
function TDFMParser.IsEndToken: Boolean;
begin
result := Token = 'end';
end;
procedure TDFMParser.Parse;
begin
while not EndOfText do
begin
ParseComponent;
SkipWhitespace;
end;
end;
procedure TDFMParser.ParseComponent;
var
componentName: string;
begin
if FToken = '' then
NextToken;
if not TokenIsObject then
Error('Expected: inherited or object, found : %s', [Token]);
NextToken;
componentName := Token;
SkipToEol;
repeat
NextToken;
if TokenIsObject then
ParseComponent
else if not IsEndToken then
ParseProperty(componentName);
until
IsEndToken or EndOfText;
if IsEndToken then
FToken := '';
end;
procedure TDFMParser.ParseProperty(const componentName: string);
var
propname: string;
begin
propname := Token;
SkipWhitespace;
if CurrentChar <> '=' then
Error('Expected: =, found %s', [Currentchar]);
NextChar;
SkipWhitespace;
case CurrentChar of
'{':
SkipTo('}');
'(':
SkipTo(')');
'[':
SkipTo(']');
quote, '#':
ParsePropertyString(componentName, propname);
else
SkipToEol
end;
end;
procedure TDFMParser.ParsePropertyString(const componentName, propertyName: string);
var
propvalue: string;
begin
propvalue := ParseStringValue;
if propvalue <> '' then
DoPropertyEvent(componentName, propertyname, propvalue);
end;
function TDFMParser.TokenIsObject: Boolean;
begin
Result := (Token = 'inherited') or (Token = 'object')
end;
{ TTranslationParser }
constructor TTranslationParser.Create(const S: string);
const
resStr = 'resourcestring';
var
lS: string;
resourceStringPos: Integer;
n1, n2: Integer;
begin
{Isolate the resourcestring section. We expect only one}
lS := LowerCase(S);
resourceStringPos := Pos(resStr, lS);
if resourceStringPos = 0 then
inherited Create('')
else
begin
{look for an $ifdef german}
n1 := Pos('{$ifdef german', lS);
if n1 > 0 then
begin
{look for the following $else}
Delete(lS, 1, n1 - 1);
n2 := Pos('{$else}', lS);
if n2 = 0 then
Error('Malformed $IFDEF...$ELSE encountered, $ELSE not found');
Delete(lS, 1, n2 - 1);
Inc(n1, n2 - 1);
{look for the $ENDIF}
n2 := Pos('{$endif}', lS);
if n2 = 0 then
Error('Malformed $IFDEF...$ENDIF encountered, $ENDIF not found');
inherited Create(Copy(S, n1, n2 - 1));
end
else
begin
{look for an $ifndef german}
n1 := Pos('{$ifndef german', lS);
if n1 = 0 then
inherited Create('')
else
begin
{in the $ifndef german construct the resourcestring keyword often comes after the $ifndef.}
if n1 < resourceStringPos then
n1 := resourceStringPos + Length(resstr);
Delete(lS, 1, n1 - 1);
{look for the $ENDIF}
n2 := Pos('{$endif}', lS);
if n2 = 0 then
Error('Malformed $IFDEF...$ENDIF encountered, $ENDIF not found');
inherited Create(Copy(S, n1, n2 - 1));
end;
end;
end;
end;
procedure TTranslationParser.DoTranslationItem(const name, value: string);
begin
if Assigned(FTranslationItemEvent) then
FTranslationItemEvent(name, value);
end;
procedure TTranslationParser.Parse;
begin
while not EndOfText do
begin
ParseStringConstant;
SkipWhitespace;
end;
end;
procedure TTranslationParser.ParseStringConstant;
var
name, value: string;
begin
SkipWhitespaceAndComments;
if EndOfText then
Exit;
NextToken;
name := Token;
SkipWhitespaceAndComments;
if EndOfText then
Exit;
if CurrentChar <> '=' then
Error('Expected: =, found "%s"', [CurrentChar]);
NextChar;
SkipWhitespaceAndComments;
if EndOfText then
Exit;
value := ParseStringValue;
SkipWhiteSpace;
if not EndOfText and (CurrentChar = ';') then
NextChar;
DoTranslationItem(name, value);
end;
procedure TTranslationParser.SkipWhitespaceAndComments;
begin
while True do
begin
SkipWhitespace;
if not EndOfText then
begin
case CurrentChar of
'/':
SkipToEol; { single line comment }
'{':
SkipTo('}'); { comment }
'(':
begin
NextChar;
if CurrentChar = '*' then
SkipToString('*)')
else
Error('Expected: comment or indentifier, found: "(%s"', [CurrentChar]);
end;
else
Break
end;
end
else
Break;
end;
end;
end.
unit Charsets;
interface
type
TCharSet = set of AnsiChar;
const
Signs: TCharset = ['-', '+'];
Numerals: TCharset = ['0'..'9'];
HexNumerals: TCharset = ['A'..'F', 'a'..'f', '0'..'9'];
IntegerChars: TCharset = ['0'..'9', '-', '+'];
IdentifierChars: TCharset = ['a'..'z', 'A'..'Z', '0'..'9', '_'];
var
Digits, Letters, LowerCaseLetters, UpperCaseLetters: TCharSet;
FloatChars, SciFloatChars: TCharset;
AlphaNum, NonAlphaNum: TCharset;
{ Need to call this again when locale changes. }
procedure SetupCharsets;
implementation
uses
Windows, Sysutils;
var
locale: DWORD = 0;
procedure SetupCharsets;
var
ch: AnsiChar;
begin
if locale = GetThreadLocale then
Exit
else
Locale := GetThreadLocale;
LowerCaseLetters := [];
UpperCaseLetters := [];
AlphaNum := [];
NonAlphaNum := [];
Digits := Numerals;
for ch := Low(ch) to High(ch) do
begin
if IsCharAlpha(ch) then
if IsCharUpper(ch) then
Include(UpperCaseLetters, ch)
else
Include(LowerCaseLetters, ch);
if IsCharAlphanumeric(ch) then
Include(AlphaNum, ch)
else
Include(NonAlphaNum, ch);
end;
Letters := LowerCaseLetters + UpperCaseLetters;
FloatChars := IntegerChars;
Include(FloatChars, DecimalSeparator);
SciFloatChars := FloatChars + ['e', 'E'];
end;
initialization
SetupCharsets;
end.
Feliratkozás:
Megjegyzések küldése (Atom)
Nincsenek megjegyzések:
Megjegyzés küldése