RAD Studio VCL Reference
ContentsIndex
PreviousUpNext
System.UnicodeToUtf8 Function

Converts a string of Unicode characters into a UTF-8 string.

Pascal
function UnicodeToUtf8(Dest: PChar; MaxDestBytes: Cardinal; Source: PWideChar; SourceChars: Cardinal): Cardinal; overload;
function UnicodeToUtf8(Dest: PChar; Source: PWideChar; MaxBytes: Integer): Integer; overload; deprecated;
function UnicodeToUtf8(Dest: PAnsiChar; Source: PWideChar; MaxBytes: Integer): Integer; overload; deprecated;
function UnicodeToUtf8(Dest: PAnsiChar; MaxDestBytes: Cardinal; Source: PWideChar; SourceChars: Cardinal): Cardinal; overload;
C++
unsigned UnicodeToUtf8(const char * Dest, unsigned MaxDestBytes, PWideChar Source, unsigned SourceChars);
int UnicodeToUtf8(const char * Dest, PWideChar Source, int MaxBytes);
int UnicodeToUtf8(PAnsiChar Dest, PWideChar Source, int MaxBytes);
unsigned UnicodeToUtf8(PAnsiChar Dest, unsigned MaxDestBytes, PWideChar Source, unsigned SourceChars);

Call UnicodeToUtf8 to convert a Unicode string to a null-terminated sequence of UTF-8 characters. Use the second syntax. The deprecated syntax is included only for backward compatibility.  

Dest receives the resulting null-terminated array of UTF-8 characters.  

MaxDestBytes or MaxBytes indicates the number of bytes available in the buffer specified by Dest, including the byte for the null terminator.  

Source is an array of Unicode characters.  

SourceChars is the number of characters in Source. If SourceChars is not specified, Source must be null-terminated.  

UnicodeToUtf8 returns the number of bytes written to Dest.  

Delphi Examples: 

 

{
This example converts various string formats to and from Unicode.
This example requires two text edits and a combobox.
}
procedure TForm1.Button1Click(Sender: TObject);
var
  LBuffer: TBytes;
  LByteOrderMark: TBytes;
  LFileStream: TFileStream;
  LOffset: Integer;
  LEncoding, DestEncoding: TEncoding;
  ComboIndex: Integer;
  ByteString: RawByteString;
  FileText: string;
  ucs4Text: UCS4String;
  wideStringText: WideString;
  utf8Text: UTF8String;
  L: Cardinal;

begin
  LEncoding:= nil;
  ComboIndex := ComboBox1.ItemIndex;
  LFileStream := TFileStream.Create(Edit1.Text, fmOpenRead);
  try
    // Read file into buffer
    SetLength(LBuffer, LFileStream.Size);
    LFileStream.ReadBuffer(Pointer(LBuffer)^, Length(LBuffer));
    // Get data encoding of data read
    LOffset := TEncoding.GetBufferEncoding(LBuffer, LEncoding);
  finally
    LFileStream.Free;
  end;

  // Process various types
  case ComboIndex of

    // AnsiToUtf8
    0:
    begin
      // Identify encoding and convert buffer to Ansi
      DestEncoding := TEncoding.ASCII;
      LBuffer := LEncoding.Convert(LEncoding, DestEncoding, LBuffer,
        LOffset, Length(LBuffer) - LOffset);
      LOffset := TEncoding.GetBufferEncoding(LBuffer, DestEncoding);
      FileText := DestEncoding.GetString(LBuffer, LOffset, Length(LBuffer) - LOffset);
      ByteString := AnsiToUtf8(FileText);
      ShowMessage(UTF8ToString(ByteString));

      // Write to file
      LFileStream := TFileStream.Create(Edit2.Text, fmCreate);
      try
        // New destination encoding
        DestEncoding := TEncoding.UTF8;
        LByteOrderMark := DestEncoding.GetPreamble;
        LFileStream.Write(LByteOrderMark[0], Length(LByteOrderMark));
        LFileStream.Write(ByteString[1], Length(ByteString));
      finally
        LFileStream.Free;
      end;
    end;

    // UCS4StringToWideString
    1:
    begin
      // Identify encoding and convert buffer to UCS4
      DestEncoding := TEncoding.Unicode;
      LBuffer := LEncoding.Convert(LEncoding, DestEncoding, LBuffer,
        LOffset, Length(LBuffer) - LOffset);
      LOffset := TEncoding.GetBufferEncoding(LBuffer, DestEncoding);
      FileText := DestEncoding.GetString(LBuffer, LOffset, Length(LBuffer) - LOffset);
      ucs4Text := UnicodeStringToUCS4String(FileText);
      wideStringText := UCS4StringToWideString(ucs4Text);
      ShowMessage(wideStringText);

      // Write to file
      LFileStream := TFileStream.Create(Edit2.Text, fmCreate);
      try
        // New destination encoding
//        DestEncoding := TEncoding.UTF8;
        LByteOrderMark := DestEncoding.GetPreamble;
        LFileStream.Write(LByteOrderMark[0], Length(LByteOrderMark));
        LFileStream.Write(wideStringText[1], Length(wideStringText) * SizeOf(Char));
      finally
        LFileStream.Free;
      end;
    end;

    // UnicodeToUtf8
    2:
    begin
      // Identify encoding and convert buffer to Unicode
      DestEncoding := TEncoding.Unicode;
      LBuffer := LEncoding.Convert(LEncoding, DestEncoding, LBuffer,
        LOffset, Length(LBuffer) - LOffset);
      LOffset := TEncoding.GetBufferEncoding(LBuffer, DestEncoding);
      FileText := DestEncoding.GetString(LBuffer, LOffset, Length(LBuffer) - LOffset);
      L := Length(FileText);
      SetLength(utf8Text, L * SizeOf(Char) + 1);
      L := UnicodeToUtf8(PAnsiChar(utf8Text), Length(utf8Text), PWideChar(FileText), L);
      ShowMessage(UTF8ToString(utf8Text));

      // Write to file
      LFileStream := TFileStream.Create(Edit2.Text, fmCreate);
      try
        // New destination encoding
        DestEncoding := TEncoding.UTF8;
        LByteOrderMark := DestEncoding.GetPreamble;
        LFileStream.Write(LByteOrderMark[0], Length(LByteOrderMark));
        LFileStream.Write(utf8Text[1], L);
      finally
        LFileStream.Free;
      end;
    end;
  end;
end;

procedure TForm1.FormCreate(Sender: TObject);
var
  StringList: TStrings;
begin
  Edit1.Text := 'Sample.txt';
  Edit2.Text := 'Dest.txt';
  StringList := TStringList.Create;
  try
    with StringList do begin
      Add('AnsiToUtf8');
      Add('UCS4StringToWideString');
      Add('UnicodeToUtf8');
    end;
    with ComboBox1 do begin
      Items.Assign(StringList);
      ItemIndex := 0;
    end;
  finally
    StringList.free;
  end;
end;

 

Copyright(C) 2009 Embarcadero Technologies, Inc. All Rights Reserved.
What do you think about this topic? Send feedback!