2005. június 28., kedd

Parsing quoted strings


Problem/Question/Abstract:

How do I parse a string containing quoted strings and other tokens?

Answer:

{
  These routines can be used to parse strings.
  Use GetToken to extract normal tokens from InTxt.
  Use GetTokenStr to extract quoted tokens from InTxt.

  GetTokenStr raises an exception in case of an error. Use
  try..except blocks to handle these.
}
type
  CharSet = set of char;
const
  CS_Space: CharSet = [' '];
const
  CS_CSV: CharSet = [',', ' '];
const
  CS_STab: CharSet = [#9, ' '];
const
  DoubleQuote = '"';
const
  SingleQuote = '''';

function GetToken(var InTxt: string; SpaceChar: CharSet): string;
var
  i: Integer;
begin
  { Find first SpaceCharacter }
  i := 1;
  while (i <= length(InTxt)) and not (InTxt[i] in SpaceChar) do
    inc(i);
  { Get text upto that spacechar }
  Result := Copy(InTxt, 1, i - 1);
  { Remove fetched part from InTxt }
  Delete(InTxt, 1, i);
  { Delete SpaceChars in front of InTxt }
  i := 1;
  while (i <= length(InTxt)) and (InTxt[i] in SpaceChar) do
    inc(i);
  Delete(InTxt, 1, i - 1);
end;

function GetTokenStr(var InTxt: string; SpaceChar: CharSet; QuoteChar: Char): string;
var
  Done: Boolean;
  i: Integer;
begin
  { Error checking: Does the string start with a quote? }
  if Copy(InTxt, 1, 1) <> QuoteChar then
    raise Exception.Create('Expected ' + QuoteChar + ' but ' + Copy(InTxt, 1, 1) +
      ' found.');

  { Remove starting quote }
  Delete(InTxt, 1, 1);

  { Init result }
  Result := '';

  { Find a quote char that ends the string }
  repeat
    { Find first QuoteCharacter }
    i := 1;
    while (i <= length(InTxt)) and not (InTxt[i] = QuoteChar) do
      inc(i);

    { Error checking: Unsuspected end of string? }
    if i > Length(InTxt) then
      raise exception.create('Unexpected end of string.');

    { Copy upto (but not including) the quote char }
    Result := Result + Copy(InTxt, 1, i - 1);

    { Remove parsed part from InTxt }
    Delete(InTxt, 1, i);

    { If it isn't a double quote, we're done. }
    Done := (Copy(InTxt, 1, 1) <> QuoteChar);

    { If not done...}
    if not Done then
    begin
      { Insert a quote in the result token }
      Result := Result + QuoteChar;
      { Remove 2nd parsed quote from InTxt }
      Delete(InTxt, 1, 1);
    end;
  until Done;

  { Delete SpaceChars in front of InTxt }
  i := 1;
  while (i <= length(InTxt)) and (InTxt[i] in SpaceChar) do
    inc(i);
  Delete(InTxt, 1, i - 1);
end;

Usage Example:

var
  s: string;
begin
  s := '"John Smith", 500, "This is ""quoted""", "", "That was an empty string"';
  Memo1.Lines.Add(GetTokenStr(s, CS_CSV, DoubleQuote));
  Memo1.Lines.Add(GetToken(s, CS_CSV));
  Memo1.Lines.Add(GetTokenStr(s, CS_CSV, DoubleQuote));
  Memo1.Lines.Add(GetTokenStr(s, CS_CSV, DoubleQuote));
  Memo1.Lines.Add(GetTokenStr(s, CS_CSV, DoubleQuote));
end;

Nincsenek megjegyzések:

Megjegyzés küldése