2009. január 16., péntek
How to split up a formatted source string into substrings and integers
Problem/Question/Abstract:
How to split up a formatted source string into substrings and integers
Answer:
function Unformat(const source, pattern: string; const args: array of const): Integer;
{The opposite of Format, Unformat splits up a formatted source string into substrings and Integers.
It is an alternative to parsing when the format is known to be fixed. The pattern parameter contains the format string, which is a combination of plain characters and format specifiers.
The following specifiers are supported:
%s indicates that a string value is required
%d indicates that an integer value is required
%S indicates that a string value should be ignored
%D indicates that an integer value should be ignored
Unformat compares the source with the pattern, and plain characters that do not match will raise an EConvertError. When a format specifier is encountered in the pattern, an argument is fetched and used to store the result that is obtained from the source. Then the comparison continues.
For each %s, the args list must contain a pointer to a string variable, followed by an integer specifying the maximum length of the string. For each %d, the args list must contain a pointer to an integer variable.
When the end of the source string is reached, the function returns without modifying the remaining arguments, so you might wish to initialize your variables to "default" values before the function call.
Unformat returns the number of values it has extracted.
Examples:
var
s1, s2: string[31];
i: Integer;
Unformat('[abc]123(def)', '[%s]%d(%s)', [@s1, 31, @i, @s2, 31]);
(* s1 = 'abc', i = 123, s2 = 'def' *)
Unformat('Hello, Universe!!!', '%s, %s%d', [@s1, 31, @s2, 31, @i]);
(* s1 = 'Hello', s2 = 'Universe!!!', i is untouched *)
Unformat('How much wood could a woodchuck chuck...',
'%S %S %s could a %S %s...', [@s1, 31, @s2, 31]);
(* s1 = 'wood', s2 = 'chuck' *)
}
function Min(a, b: Integer): Integer; assembler;
{ use AX for 16-bit, EAX for 32-bit }
asm
MOV EAX,a
CMP EAX,b
JLE @@1
MOV EAX,b
@@1:
end;
var
i, j, argindex, start, finish, maxlen: Integer;
c: Char;
begin
Result := 0;
argindex := 0;
i := 1;
j := 1;
while (i < Length(pattern)) and (j <= Length(source)) do
begin
if pattern[i] = '%' then
case pattern[i + 1] of
'D':
begin
Inc(i, 2);
while (j <= Length(source)) and ((source[j] in Digits) or (source[j] =
'-')) do
Inc(j);
Inc(Result);
end;
'S':
begin
Inc(i, 2);
if i > Length(pattern) then
break
else
begin
c := pattern[i];
while (j <= Length(source)) and (source[j] <> c) do
Inc(j);
end;
Inc(Result);
end;
'd':
begin
if argindex > High(args) then
raise EConvertError.Create('Not enough arguments');
Inc(i, 2);
start := j;
while (j <= Length(source)) and ((source[j] in Digits) or (source[j] =
'-')) do
Inc(j);
finish := j;
if finish > start then
PInteger(args[argindex].VPointer)^ := StrToInt(Copy(source, start,
finish - start));
Inc(argindex);
Inc(Result);
end;
's':
begin
if argindex > High(args) - 1 then
raise EConvertError.Create('Not enough arguments');
if args[argindex + 1].VType <> vtInteger then
raise EConvertError.Create('No string size specified');
maxlen := args[argindex + 1].VInteger;
Inc(i, 2);
if i > Length(pattern) then
begin
args[argindex].VString^ := Copy(source, j, Min(Length(source) + 1 - j,
maxlen));
Inc(argindex);
break;
end
else
begin
c := pattern[i];
start := j;
while (j <= Length(source)) and (source[j] <> c) do
Inc(j);
finish := j;
args[argindex].VString^ := Copy(source, start, Min(finish - start,
maxlen));
Inc(argindex, 2);
end;
Inc(Result);
end;
else
Inc(i);
end
else
{if pattern[i] <> source[j] then
raise EConvertError.Create('Pattern mismatch')
else}
begin
Inc(i);
Inc(j);
end;
end;
end;
Feliratkozás:
Megjegyzések küldése (Atom)
Nincsenek megjegyzések:
Megjegyzés küldése