I have a question about a code that i have to convert UTF8 strings to ANSI strings. My code works for accents in vowels, but with letter Ñ it doesn't work. The code breaks the string. How can I fix this mistake?
The string I have in UTF8: EDIFICIO PEÑAS BLANCAS
The string I would have in ANSI if correct: EDIFICIO PEÑAS BLANCAS
The string I have in ANSI now: EDIFICIO PE
The code is here:
function TFormMain.convertir_utf8_ansi(const Source: string):string;
var
Iterator, SourceLength, FChar, NChar: Integer;
begin
Result := '';
Iterator := 0;
SourceLength := Length(Source);
while Iterator < SourceLength do
begin
Inc(Iterator);
FChar := Ord(Source[Iterator]);
if FChar >= $80 then
begin
Inc(Iterator);
if Iterator > SourceLength then break;
FChar := FChar and $3F;
if (FChar and $20) <> 0 then
begin
FChar := FChar and $1F;
NChar := Ord(Source[Iterator]);
if (NChar and $C0) <> $80 then break;
FChar := (FChar shl 6) or (NChar and $3F);
Inc(Iterator);
if Iterator > SourceLength then break;
end;
NChar := Ord(Source[Iterator]);
if (NChar and $C0) <> $80 then break;
Result := Result + WideChar((FChar shl 6) or (NChar and $3F));
end
else
Result := Result + WideChar(FChar);
end;
end;
Thanks.
If you are using Delphi 2009 or higher, you should let the RTL do the conversion for you:
type
Latin1String = type AnsiString(28591); // codepage 28591 = ISO-8859-1
var
utf8: UTF8String;
latin1: Latin1String;
begin
utf8 := ...; // your source UTF-8 string
latin1 := Latin1String(utf8);
end;
If you are using Delphi 2007 or earlier, you can still do the conversion, just let the OS do it for you:
var
utf8: UTF8String;
latin1: AnsiString;
ws: WideString;
len: Integer;
begin
utf8 := ...; // your source UTF-8 string
len := MultiByteToWideChar(CP_UTF8, 0, PAnsiChar(utf8), Length(utf8), nil, 0);
SetLength(ws, len);
MultiByteToWideChar(CP_UTF8, 0, PAnsiChar(utf8), Length(utf8), PWideChar(ws), len);
len := WideCharToMultiByte(28591, 0, PWideChar(ws), Length(ws), nil, 0, nil, nil);
SetLength(latin1, len);
WideCharToMultiByte(28591, 0, PWideChar(ws), Length(ws), PAnsiChar(latin1), len, nil, nil);
end;