## 2005. június 28., kedd

### Parsing quoted strings

Problem/Question/Abstract:

How do I parse a string containing quoted strings and other tokens?

{
These routines can be used to parse strings.
Use GetToken to extract normal tokens from InTxt.
Use GetTokenStr to extract quoted tokens from InTxt.

GetTokenStr raises an exception in case of an error. Use
try..except blocks to handle these.
}
type
CharSet = set of char;
const
CS_Space: CharSet = [' '];
const
CS_CSV: CharSet = [',', ' '];
const
CS_STab: CharSet = [#9, ' '];
const
DoubleQuote = '"';
const
SingleQuote = '''';

function GetToken(var InTxt: string; SpaceChar: CharSet): string;
var
i: Integer;
begin
{ Find first SpaceCharacter }
i := 1;
while (i <= length(InTxt)) and not (InTxt[i] in SpaceChar) do
inc(i);
{ Get text upto that spacechar }
Result := Copy(InTxt, 1, i - 1);
{ Remove fetched part from InTxt }
Delete(InTxt, 1, i);
{ Delete SpaceChars in front of InTxt }
i := 1;
while (i <= length(InTxt)) and (InTxt[i] in SpaceChar) do
inc(i);
Delete(InTxt, 1, i - 1);
end;

function GetTokenStr(var InTxt: string; SpaceChar: CharSet; QuoteChar: Char): string;
var
Done: Boolean;
i: Integer;
begin
if Copy(InTxt, 1, 1) <> QuoteChar then
raise Exception.Create('Expected ' + QuoteChar + ' but ' + Copy(InTxt, 1, 1) +
' found.');

{ Remove starting quote }
Delete(InTxt, 1, 1);

{ Init result }
Result := '';

{ Find a quote char that ends the string }
repeat
{ Find first QuoteCharacter }
i := 1;
while (i <= length(InTxt)) and not (InTxt[i] = QuoteChar) do
inc(i);

{ Error checking: Unsuspected end of string? }
if i > Length(InTxt) then
raise exception.create('Unexpected end of string.');

{ Copy upto (but not including) the quote char }
Result := Result + Copy(InTxt, 1, i - 1);

{ Remove parsed part from InTxt }
Delete(InTxt, 1, i);

{ If it isn't a double quote, we're done. }
Done := (Copy(InTxt, 1, 1) <> QuoteChar);

{ If not done...}
if not Done then
begin
{ Insert a quote in the result token }
Result := Result + QuoteChar;
{ Remove 2nd parsed quote from InTxt }
Delete(InTxt, 1, 1);
end;
until Done;

{ Delete SpaceChars in front of InTxt }
i := 1;
while (i <= length(InTxt)) and (InTxt[i] in SpaceChar) do
inc(i);
Delete(InTxt, 1, i - 1);
end;

Usage Example:

var
s: string;
begin
s := '"John Smith", 500, "This is ""quoted""", "", "That was an empty string"';