2008. május 9., péntek

How to search a string with wildcards


Problem/Question/Abstract:

I have a body of text. I want to allow the user to enter a string that could contain wildcards (well, just the " * ") and search for it.

Answer:

Your first task is to split the paragraph into words (since i take it from your description that the match has to be inside a word). The next is to match each word to the mask. The following implementation is certainly not the fastest possible but it should make the algorithm clear.


procedure SplitTextIntoWords(const S: string; words: TStringlist);
var
  startpos, endpos: Integer;
begin
  Assert(Assigned(words));
  words.clear;
  startpos := 1;
  while startpos <= Length(S) do
  begin
    {skip non-letters }
    while (startpos <= Length(S)) and not IsCharAlpha(S[startpos]) do
      Inc(startpos);
    if startpos <= Length(S) then
    begin
      {find next non-letter}
      endpos := startpos + 1;
      while (endpos <= Length(S)) and IsCharAlpha(S[endpos]) do
        Inc(endpos);
      words.add(Copy(S, startpos, endpos - startpos));
      startpos := endpos + 1;
    end;
  end;
end;

function StringMatchesMask(S, mask: string; case_sensitive: Boolean): Boolean;
var
  sIndex, maskIndex: Integer;
begin
  if not case_sensitive then
  begin
    S := AnsiUpperCase(S);
    mask := AnsiUpperCase(mask);
  end;
  Result := True; {blatant optimism}
  sIndex := 1;
  maskIndex := 1;
  while (sIndex <= Length(S)) and (maskIndex <= Length(mask)) do
  begin
    case mask[maskIndex] of
      '?':
        begin
          {matches any character}
          Inc(sIndex);
          Inc(maskIndex);
        end;
      '*':
        begin
          {matches 0 or more characters, so need to check for next character in mask}
          Inc(maskIndex);
          if maskIndex > Length(mask) then
            { * at end matches rest of string}
            Exit
          else if mask[maskindex] in ['*', '?'] then
            raise Exception.Create('Invalid mask');
          {look for mask character in S}
          while (sIndex <= Length(S)) and (S[sIndex] <> mask[maskIndex]) do
            Inc(sIndex);
          if sIndex > Length(S) then
          begin
            {character not found, no match}
            Result := false;
            Exit;
          end;
        end;
    else
      if S[sIndex] = mask[maskIndex] then
      begin
        Inc(sIndex);
        Inc(maskIndex);
      end
      else
      begin
        {no match}
        Result := False;
        Exit;
      end;
    end;
  end;
  {if we have reached the end of both S and mask we have a complete match,
  otherwise we only have a partial match}
  if (sIndex <= Length(S)) or (maskIndex <= Length(mask)) then
    Result := false;
end;

procedure FindMatchingWords(const S, mask: string; case_sensitive: Boolean;
  matches: TStringlist);
var
  words: TStringlist;
  i: Integer;
begin
  Assert(Assigned(matches));
  words := TStringlist.Create;
  try
    SplitTextIntoWords(S, words);
    matches.clear;
    for i := 0 to words.count - 1 do
    begin
      if StringMatchesMask(words[i], mask, case_sensitive) then
        matches.Add(words[i]);
    end;
  finally
    words.free;
  end;
end;

{Form has one memo for the text to check, one edit for the mask, one checkbox
(check = case sensitive), one listbox for the results, one button }

procedure TForm1.Button1Click(Sender: TObject);
begin
  FindMatchingWords(memo1.text, edit1.text, checkbox1.checked, listbox1.items);
end;

Nincsenek megjegyzések:

Megjegyzés küldése