2006. április 17., hétfő
How to parse a TRichEdit for domain names
Problem/Question/Abstract:
How can I parse a TRichEdit for domains ending in .com, .net, .org?
Answer:
Solve 1:
Not extensively tested:
procedure TForm1.Button1Click(Sender: TObject);
const
charsAllowedInDomain = ['a'..'z', '0'..'9', '.', '_']; {may be more}
numExts = 4;
domainExts: array[1..numExts] of Pchar = ('.com', '.net', '.org', '.gov'); {lower case!}
lens: array[1..numExts] of Integer = (4, 4, 4, 4);
var
S: string;
pStartString, pScan, pStartDomain, pEndDomain: Pchar;
domain: string;
i: Integer;
begin
S := AnsiLowerCase(richedit1.text);
pStartString := PChar(S);
pScan := pStartString;
while pScan^ <> #0 do
begin
if pScan^ = '.' then
begin
for i := Low(domainExts) to High(domainExts) do
if StrLComp(pScan, domainExts[i], lens[i]) = 0 then
begin
{we have a candidate}
pStartDomain := pScan;
pEndDomain := pScan + lens[i];
if not (pEndDomain^ in charsAllowedInDomain) then
begin
while (pStartDomain > pStartString) and (pStartDomain[-1] in charsAllowedInDomain) do
Dec(pStartDomain);
SetString(domain, pStartDomain, pEndDomain - pStartDomain);
listbox1.items.add(domain);
pScan := pEndDomain - 1;
break;
end;
end;
end;
Inc(pScan);
end;
end;
Solve 2:
{ ... }
type {declared in richedit.pas D3}
TCharRange = record
cpMin: Longint;
cpMax: LongInt;
end;
TFindTextExA = record {declared in richedit.pas D3}
chrg: TCharRange;
lpstrText: PAnsiChar;
chrgText: TCharRange;
end;
procedure REFindDomain(RE: TRichEdit; const Target: string; Strs: TStrings);
const
{maybe more than these?}
ValidChars: set of char = ['a'..'z', 'A'..'Z', '0'..'9', '.', '/', ':', '_', '-'];
var
ftx: TFindTextExA;
flags: longint;
charpos: longint;
s: string;
begin
if (Target = '') then
exit; {nothing to look for}
{searches all of the RichEdit}
ftx.chrg.cpMin := 0;
ftx.chrg.cpMax := -1;
ftx.lpstrText := PChar(Target);
ftx.chrgText.cpMin := 0;
ftx.chrgText.cpMax := 0;
flags := 0;
// EM_FINDTEXTEX = WM_USER + 79; {declared in richedit.pas D3}
while SendMessage(RE.Handle, WM_USER + 79, flags, longint(@ftx)) > -1 do
begin
RE.SelStart := ftx.chrgText.cpMin; {found at position}
RE.SelLength := Length(Target);
{get the line}
if ftx.chrgText.cpMax >= 255 then
s := Copy(RE.Lines.Text, ftx.chrgText.cpMax - 254, 255)
else
s := Copy(RE.Lines.Text, 1, ftx.chrgText.cpMax);
{need to find start of domain name}
charpos := Length(s);
while (charpos > 1) and (s[charpos] in ValidChars) do
Dec(charpos);
if not (s[charpos] in ValidChars) then
Inc(charpos);
Strs.Add(Copy(s, charpos, Length(s)));
ftx.chrg.cpMin := ftx.chrgText.cpMin + 1; {reset to found at pos}
end;
end;
{ListBox1 contains 3 lines: '.com' '.net' '.org', ListBox2 receives the results}
procedure TForm1.Button1Click(Sender: TObject);
var
i: integer;
begin
if ListBox1.Items.Count > 0 then
begin
ListBox2.Clear;
for i := 0 to ListBox1.Items.Count - 1 do
begin
REFindDomain(RichEdit1, ListBox1.Items[i], ListBox2.Items);
end;
Label1.Caption := IntToStr(ListBox2.Items.Count);
end;
end;
Feliratkozás:
Megjegyzések küldése (Atom)
Nincsenek megjegyzések:
Megjegyzés küldése