2004. október 7., csütörtök
How to convert accented characters to unaccented ones
Problem/Question/Abstract:
Is there a way to convert accented characters to unaccented (meaning ASCII A - Z, a - z)?
Answer:
The classical way is to have a conversion table and do a lookup in that table. The problem with that is that the table is of course specific to a certain charset (encoding), like Windows Latin-1. You could build a table for a range of UNICODE (widechar) characters to get around this limitation and convert the strings to widestrings before you do the accent removals. The routine below uses ANSI characters with the Windows western (Latin-1) encoding.
function SimplifyChar(const _ch: char): char;
const
Charmap: array[#128..#255] of Char = (
#128 { ? }, #129 { ? }, #130 { ? }, #131 { ? }, #132 { ? },
#133 { ? }, #134 { ? }, #135 { ? }, #136 { ? }, #137 { ? },
#138 { ? }, #139 { ? }, #140 { ? }, #141 { ? }, #142 { ? },
#143 { ? }, #144 { ? }, #145 { ? }, #146 { ? }, #147 { ? },
#148 { ? }, #149 { ? }, #150 { ? }, #151 { ? }, #152 { ? },
#153 { ? }, #154 { ? }, #155 { ? }, #156 { ? }, #157 { ? },
#158 { ? }, #159 { ? }, #160 { � }, #161 { · }, #162 { � },
#163 { � }, #164 { � }, #165 { � }, #166 { ¦ }, #167 { � },
#168 { � }, #169 { © }, #170 { � }, #171 { « }, #172 { ¬ },
#173 { }, #174 { ® }, #175 { � }, #176 { � }, #177 { ± },
#178 { � }, #179 { � }, #180 { � }, #181 { µ }, #182 { ¶ },
#183 { · }, #184 { � }, #185 { ± }, #186 { � }, #187 { » },
#188 { � }, #189 { � }, #190 { µ }, #191 { � }, 'A' { � },
'A' { � }, 'A' { � }, 'A' { � }, 'A' { � }, 'A' { � },
#198 { � }, #199 { � }, 'E' { � }, 'E' { � }, 'E' { � },
'E' { � }, 'I' { � }, 'I' { � }, 'I' { � }, 'I' { � },
#208 { � }, #209 { � }, 'O' { � }, 'O' { � }, 'O' { � },
'O' { � }, 'O' { � }, #215 { � }, #216 { � }, 'U' { � },
'U' { � }, 'U' { � }, 'U' { � }, #221 { � }, #222 { � },
#223 { � }, 'a' { � }, 'a' { � }, 'a' { � }, 'a' { � },
'a' { � }, 'a' { � }, #230 { � }, #231 { � }, 'e' { � },
'e' { � }, 'e' { � }, 'e' { � }, 'i' { � }, 'i' { � },
'i' { � }, 'i' { � }, #240 { � }, #241 { � }, 'o' { � },
'o' { � }, 'o' { � }, 'o' { � }, 'o' { � }, #247 { � },
#248 { � }, 'u' { � }, 'u' { � }, 'u' { � }, 'u' { � },
#253 { � }, #254 { � }, #255 { � }
);
begin
if _ch >= #128 then
Result := Charmap[_ch]
else
Result := _ch;
end;
The charmap table was created by this little routine and then edited:
procedure CreateCharacterMap(fromchar, tochar: Char);
function DisplayStr(const ch: Char): string;
begin
if ch < #32 then
Result := '^' + Chr(Ord('A') - 1 + Ord(ch))
else
Result := ch;
end;
var
sl: TStringlist;
line, element: string;
ch: char;
begin
Assert(fromchar <= tochar);
sl := Tstringlist.Create;
try
sl.Add('Const');
line := Format(' Charmap: array [#%d..#%d] of Char = (', [Ord(fromchar),
Ord(tochar)]);
sl.Add(line);
line := '';
for ch := fromchar to toChar do
begin
element := Format('#%3.3d { %s }', [Ord(ch), DisplayStr(ch)]);
if (Length(line) + Length(element)) > 66 then
begin
sl.Add(' ' + line);
line := '';
end;
line := line + element;
if ch <> tochar then
line := line + ', ';
end;
sl.Add(' ' + line);
sl.add(' );');
Clipboard.AsText := sl.Text;
finally
sl.Free
end;
end;
Feliratkozás:
Megjegyzések küldése (Atom)
Nincsenek megjegyzések:
Megjegyzés küldése