2006. április 17., hétfő

How to parse a TRichEdit for domain names


Problem/Question/Abstract:

How can I parse a TRichEdit for domains ending in .com, .net, .org?

Answer:

Solve 1:

Not extensively tested:

procedure TForm1.Button1Click(Sender: TObject);
const
  charsAllowedInDomain = ['a'..'z', '0'..'9', '.', '_']; {may be more}
  numExts = 4;
  domainExts: array[1..numExts] of Pchar = ('.com', '.net', '.org', '.gov'); {lower case!}
  lens: array[1..numExts] of Integer = (4, 4, 4, 4);
var
  S: string;
  pStartString, pScan, pStartDomain, pEndDomain: Pchar;
  domain: string;
  i: Integer;
begin
  S := AnsiLowerCase(richedit1.text);
  pStartString := PChar(S);
  pScan := pStartString;
  while pScan^ <> #0 do
  begin
    if pScan^ = '.' then
    begin
      for i := Low(domainExts) to High(domainExts) do
        if StrLComp(pScan, domainExts[i], lens[i]) = 0 then
        begin
          {we have a candidate}
          pStartDomain := pScan;
          pEndDomain := pScan + lens[i];
          if not (pEndDomain^ in charsAllowedInDomain) then
          begin
            while (pStartDomain > pStartString) and (pStartDomain[-1] in charsAllowedInDomain) do
              Dec(pStartDomain);
            SetString(domain, pStartDomain, pEndDomain - pStartDomain);
            listbox1.items.add(domain);
            pScan := pEndDomain - 1;
            break;
          end;
        end;
    end;
    Inc(pScan);
  end;
end;


Solve 2:

{ ... }
type {declared in richedit.pas D3}

  TCharRange = record
    cpMin: Longint;
    cpMax: LongInt;
  end;

  TFindTextExA = record {declared in richedit.pas D3}
    chrg: TCharRange;
    lpstrText: PAnsiChar;
    chrgText: TCharRange;
  end;

procedure REFindDomain(RE: TRichEdit; const Target: string; Strs: TStrings);
const
  {maybe more than these?}
  ValidChars: set of char = ['a'..'z', 'A'..'Z', '0'..'9', '.', '/', ':', '_', '-'];
var
  ftx: TFindTextExA;
  flags: longint;
  charpos: longint;
  s: string;
begin
  if (Target = '') then
    exit; {nothing to look for}
  {searches all of the RichEdit}
  ftx.chrg.cpMin := 0;
  ftx.chrg.cpMax := -1;
  ftx.lpstrText := PChar(Target);
  ftx.chrgText.cpMin := 0;
  ftx.chrgText.cpMax := 0;
  flags := 0;
  // EM_FINDTEXTEX = WM_USER + 79;  {declared in richedit.pas D3}
  while SendMessage(RE.Handle, WM_USER + 79, flags, longint(@ftx)) > -1 do
  begin
    RE.SelStart := ftx.chrgText.cpMin; {found at position}
    RE.SelLength := Length(Target);
    {get the line}
    if ftx.chrgText.cpMax >= 255 then
      s := Copy(RE.Lines.Text, ftx.chrgText.cpMax - 254, 255)
    else
      s := Copy(RE.Lines.Text, 1, ftx.chrgText.cpMax);
    {need to find start of domain name}
    charpos := Length(s);
    while (charpos > 1) and (s[charpos] in ValidChars) do
      Dec(charpos);
    if not (s[charpos] in ValidChars) then
      Inc(charpos);
    Strs.Add(Copy(s, charpos, Length(s)));
    ftx.chrg.cpMin := ftx.chrgText.cpMin + 1; {reset to found at pos}
  end;
end;

{ListBox1 contains 3 lines: '.com'  '.net'  '.org',   ListBox2 receives the results}

procedure TForm1.Button1Click(Sender: TObject);
var
  i: integer;
begin
  if ListBox1.Items.Count > 0 then
  begin
    ListBox2.Clear;
    for i := 0 to ListBox1.Items.Count - 1 do
    begin
      REFindDomain(RichEdit1, ListBox1.Items[i], ListBox2.Items);
    end;
    Label1.Caption := IntToStr(ListBox2.Items.Count);
  end;
end;

Nincsenek megjegyzések:

Megjegyzés küldése