2009. január 16., péntek

How to split up a formatted source string into substrings and integers


Problem/Question/Abstract:

How to split up a formatted source string into substrings and integers

Answer:

function Unformat(const source, pattern: string; const args: array of const): Integer;

{The opposite of Format, Unformat splits up a formatted source string into substrings and Integers.
It is an alternative to parsing when the format is known to be fixed.  The pattern parameter contains the format string, which is a combination of plain characters and format specifiers.

The following specifiers are supported:

%s   indicates that a string value is required
%d   indicates that an integer value is required
%S   indicates that a string value should be ignored
%D   indicates that an integer value should be ignored

Unformat compares the source with the pattern, and plain characters  that do not match will raise an EConvertError. When a format specifier is encountered in the pattern, an argument is fetched and used to store the result that is obtained from the source. Then the comparison continues.

For each %s, the args list must contain a pointer to a string variable, followed by an integer specifying the maximum length of the string. For each %d, the args list must contain a pointer to an integer variable.

When the end of the source string is reached, the function returns without modifying the remaining arguments, so you might wish to initialize your variables to "default" values before the function call.

Unformat returns the number of values it has extracted.

Examples:

  var
    s1, s2: string[31];
    i: Integer;

  Unformat('[abc]123(def)', '[%s]%d(%s)', [@s1, 31, @i, @s2, 31]);
    (* s1 = 'abc', i = 123, s2 = 'def' *)

  Unformat('Hello, Universe!!!', '%s, %s%d', [@s1, 31, @s2, 31, @i]);
    (* s1 = 'Hello', s2 = 'Universe!!!', i is untouched *)

  Unformat('How much wood could a woodchuck chuck...',
           '%S %S %s could a %S %s...', [@s1, 31, @s2, 31]);
    (* s1 = 'wood', s2 = 'chuck' *)
}

  function Min(a, b: Integer): Integer; assembler;

    { use AX for 16-bit, EAX for 32-bit }
  asm
      MOV     EAX,a
      CMP     EAX,b
      JLE       @@1
      MOV     EAX,b
      @@1:
  end;

var
  i, j, argindex, start, finish, maxlen: Integer;
  c: Char;
begin
  Result := 0;
  argindex := 0;
  i := 1;
  j := 1;
  while (i < Length(pattern)) and (j <= Length(source)) do
  begin
    if pattern[i] = '%' then
      case pattern[i + 1] of
        'D':
          begin
            Inc(i, 2);
            while (j <= Length(source)) and ((source[j] in Digits) or (source[j] =
              '-')) do
              Inc(j);
            Inc(Result);
          end;
        'S':
          begin
            Inc(i, 2);
            if i > Length(pattern) then
              break
            else
            begin
              c := pattern[i];
              while (j <= Length(source)) and (source[j] <> c) do
                Inc(j);
            end;
            Inc(Result);
          end;
        'd':
          begin
            if argindex > High(args) then
              raise EConvertError.Create('Not enough arguments');
            Inc(i, 2);
            start := j;
            while (j <= Length(source)) and ((source[j] in Digits) or (source[j] =
              '-')) do
              Inc(j);
            finish := j;
            if finish > start then
              PInteger(args[argindex].VPointer)^ := StrToInt(Copy(source, start,
                                                         finish - start));
            Inc(argindex);
            Inc(Result);
          end;
        's':
          begin
            if argindex > High(args) - 1 then
              raise EConvertError.Create('Not enough arguments');
            if args[argindex + 1].VType <> vtInteger then
              raise EConvertError.Create('No string size specified');
            maxlen := args[argindex + 1].VInteger;
            Inc(i, 2);
            if i > Length(pattern) then
            begin
              args[argindex].VString^ := Copy(source, j, Min(Length(source) + 1 - j,
                maxlen));
              Inc(argindex);
              break;
            end
            else
            begin
              c := pattern[i];
              start := j;
              while (j <= Length(source)) and (source[j] <> c) do
                Inc(j);
              finish := j;
              args[argindex].VString^ := Copy(source, start, Min(finish - start,
                maxlen));
              Inc(argindex, 2);
            end;
            Inc(Result);
          end;
      else
        Inc(i);
      end
    else
      {if pattern[i] <> source[j] then
        raise EConvertError.Create('Pattern mismatch')
      else}
    begin
      Inc(i);
      Inc(j);
    end;
  end;
end;

Nincsenek megjegyzések:

Megjegyzés küldése