支持AnsiString/UnicodeString/WideString
[Quote title="MECSUtils.pas"]{ *********************************************************************** }
{ }
{ MECS Utilities Unit ver1.31 }
{ }
{ Copyright (c) 2008,2009 Hideaki Tominaga (DEKO) }
{ }
{ *********************************************************************** }
unit MECSUtils;
interface
uses
SysUtils, Graphics, Windows, Dialogs;
type
TLeadBytes = set of AnsiChar;
UTF8Char = type AnsiChar;
PUTF8Char = type PAnsiChar;
{$IFDEF UNICODE}
UnicodeChar = type Char;
PUnicodeChar = type PChar;
{$ENDIF}
TElementType = (etSingle, etLead, etTrail);
TEastAsianWidth = (eawNeutral, eawFullwidth, eawHalfwidth, eawWide, eawNarrow, eawAmbiguous);
TCombiningType = (ctBase, ctCombining);
TIndexCount =
record
StartIndex: Integer;
EndIndex: Integer;
Count: Integer;
end;
// Normalization forms
TNORM_FORM =
(
NormalizationOther = $00,
NormalizationC = $01, // Normalization Form Canonical Composition(NFC)
NormalizationD = $02, // Normalization Form Canonical Decomposition(NFD)
NormalizationKC = $05, // Normalization Form Compatibility Composition(NFKC)
NormalizationKD = $06 // Normalization Form Compatibility Decomposition(NFKD)
);
// for Codepage
// -----------------------------------------------------------------------------
function CodePageToFontCharset(Codepage: DWORD): TFontCharset;
function FontCharsetToCodePage(FontCharset: TFontCharset): DWORD;
function IsFarEastCharSet(CharSet: TFontCharSet): Boolean;
function IsFarEastLCID(Locale: LCID): Boolean;
// for AnsiString
// -----------------------------------------------------------------------------
{$IFDEF UNICODE}
function MecsGetCodePage(S: RawByteString): DWORD;
{$ELSE}
function MecsGetCodePage(S: AnsiString): DWORD;
{$ENDIF}
// MecsGetLeadBytes
function MecsGetLeadBytes: TLeadBytes; overload;
function MecsGetLeadBytes(Codepage: DWORD): TLeadBytes; overload;
// for UnicodeString/WideString
// -----------------------------------------------------------------------------
function MecsEastAsianWidth(U: UCS4Char): TEastAsianWidth;
function MecsIsNormalized(const AText: WideString; NormForm: TNORM_FORM): Boolean;
// MecsMappingFixJA
{$IFDEF UNICODE}
function MecsMappingFixJA(const S: UnicodeString): UnicodeString; overload;
{$ENDIF}
function MecsMappingFixJA(const S: WideString): WideString; overload;
function MecsNormalize(const Src: WideString; var Dst: WideString; NormForm: TNORM_FORM): Boolean;
{$IFNDEF UNICODE}
function StrLen(const Str: PWideChar): Cardinal;
{$ENDIF}
// for AnsiString/UnicodeString/WideString (Common)
// -----------------------------------------------------------------------------
// MecsCharToElementIndex
{$IFDEF UNICODE}
function MecsCharToElementIndex(const S: RawByteString; Index: Integer): Integer; overload;
function MecsCharToElementIndex(const S: RawByteString; Index: Integer; Codepage: DWORD): Integer; overload;
function MecsCharToElementIndex(const S: UnicodeString; Index: Integer): Integer; overload;
{$ELSE}
function MecsCharToElementIndex(const S: AnsiString; Index: Integer): Integer; overload;
function MecsCharToElementIndex(const S: AnsiString; Index: Integer; Codepage: DWORD): Integer; overload;
{$ENDIF}
function MecsCharToElementIndex(const S: WideString; Index: Integer): Integer; overload;
// MecsCharToElementIndexCount
{$IFDEF UNICODE}
function MecsCharToElementIndexCount(const S: RawByteString; Index, Count: Integer): TIndexCount; overload;
function MecsCharToElementIndexCount(const S: RawByteString; Index, Count: Integer; Codepage: DWORD): TIndexCount; overload;
function MecsCharToElementIndexCount(const S: UnicodeString; Index, Count: Integer): TIndexCount; overload;
{$ELSE}
function MecsCharToElementIndexCount(const S: AnsiString; Index, Count: Integer): TIndexCount; overload;
function MecsCharToElementIndexCount(const S: AnsiString; Index, Count: Integer; Codepage: DWORD): TIndexCount; overload;
{$ENDIF}
function MecsCharToElementIndexCount(const S: WideString; Index, Count: Integer): TIndexCount; overload;
// MecsCharToElementLen
{$IFDEF UNICODE}
function MecsCharToElementLen(const S: RawByteString; MaxLen: Integer): Integer; overload;
function MecsCharToElementLen(const S: RawByteString; MaxLen: Integer; Codepage: DWORD): Integer; overload;
function MecsCharToElementLen(const S: UnicodeString; MaxLen: Integer): Integer; overload;
{$ELSE}
function MecsCharToElementLen(const S: AnsiString; MaxLen: Integer): Integer; overload;
function MecsCharToElementLen(const S: AnsiString; MaxLen: Integer; Codepage: DWORD): Integer; overload;
{$ENDIF}
function MecsCharToElementLen(const S: WideString; MaxLen: Integer): Integer; overload;
// MecsCharLength
{$IFDEF UNICODE}
function MecsCharLength(const S: RawByteString; Index: Integer): Integer; overload;
function MecsCharLength(const S: RawByteString; Index: Integer; Codepage: DWORD): Integer; overload;
function MecsCharLength(const S: UnicodeString; Index: Integer): Integer; overload;
{$ELSE}
function MecsCharLength(const S: AnsiString; Index: Integer): Integer; overload;
function MecsCharLength(const S: AnsiString; Index: Integer; Codepage: DWORD): Integer; overload;
{$ENDIF}
function MecsCharLength(const S: WideString; Index: Integer): Integer; overload;
// MecsCopy
{$IFDEF UNICODE}
function MecsCopy(const S: RawByteString; Index, Count: Integer): RawByteString; overload;
function MecsCopy(const S: RawByteString; Index, Count: Integer; Codepage: DWORD): RawByteString; overload;
function MecsCopy(const S: UnicodeString; Index, Count: Integer): UnicodeString; overload;
{$ELSE}
function MecsCopy(const S: AnsiString; Index, Count: Integer): AnsiString; overload;
function MecsCopy(const S: AnsiString; Index, Count: Integer; Codepage: DWORD): AnsiString; overload;
{$ENDIF}
function MecsCopy(const S: WideString; Index, Count: Integer): WideString; overload;
// MecsDelete
{$IFDEF UNICODE}
procedure MecsDelete(var S: RawByteString; Index, Count: Integer); overload;
procedure MecsDelete(var S: RawByteString; Index, Count: Integer; Codepage: DWORD); overload;
procedure MecsDelete(var S: UnicodeString; Index, Count: Integer); overload;
{$ELSE}
procedure MecsDelete(var S: AnsiString; Index, Count: Integer); overload;
procedure MecsDelete(var S: AnsiString; Index, Count: Integer; Codepage: DWORD); overload;
{$ENDIF}
procedure MecsDelete(var S: WideString; Index, Count: Integer); overload;
// MecsElementToCharIndex
{$IFDEF UNICODE}
function MecsElementToCharIndex(const S: RawByteString; Index: Integer): Integer; overload;
function MecsElementToCharIndex(const S: RawByteString; Index: Integer; Codepage: DWORD): Integer; overload;
function MecsElementToCharIndex(const S: UnicodeString; Index: Integer): Integer; overload;
{$ELSE}
function MecsElementToCharIndex(const S: AnsiString; Index: Integer): Integer; overload;
function MecsElementToCharIndex(const S: AnsiString; Index: Integer; Codepage: DWORD): Integer; overload;
{$ENDIF}
function MecsElementToCharIndex(const S: WideString; Index: Integer): Integer; overload;
// MecsElementToCharLen
{$IFDEF UNICODE}
function MecsElementToCharLen(const S: RawByteString; MaxLen: Integer): Integer; overload;
function MecsElementToCharLen(const S: RawByteString; MaxLen: Integer; Codepage: DWORD): Integer; overload;
function MecsElementToCharLen(const S: UnicodeString; MaxLen: Integer): Integer; overload;
{$ELSE}
function MecsElementToCharLen(const S: AnsiString; MaxLen: Integer): Integer; overload;
function MecsElementToCharLen(const S: AnsiString; MaxLen: Integer; Codepage: DWORD): Integer; overload;
{$ENDIF}
function MecsElementToCharLen(const S: WideString; MaxLen: Integer): Integer; overload;
// MecsElementType
{$IFDEF UNICODE}
function MecsElementType(const S: RawByteString; Index: Integer): TElementType; overload;
function MecsElementType(const S: RawByteString; Index: Integer; Codepage: DWORD): TElementType; overload;
function MecsElementType(const S: UnicodeString; Index: Integer): TElementType; overload;
{$ELSE}
function MecsElementType(const S: AnsiString; Index: Integer): TElementType; overload;
function MecsElementType(const S: AnsiString; Index: Integer; Codepage: DWORD): TElementType; overload;
{$ENDIF}
function MecsElementType(const S: WideString; Index: Integer): TElementType; overload;
// MecsInsert
{$IFDEF UNICODE}
procedure MecsInsert(const Source: RawByteString; var S: RawByteString; Index: Integer); overload;
procedure MecsInsert(const Source: RawByteString; var S: RawByteString; Index: Integer; Codepage: DWORD); overload;
procedure MecsInsert(const Source: UnicodeString; var S: UnicodeString; Index: Integer); overload;
{$ELSE}
procedure MecsInsert(const Source: AnsiString; var S: AnsiString; Index: Integer); overload;
procedure MecsInsert(const Source: AnsiString; var S: AnsiString; Index: Integer; Codepage: DWORD); overload;
{$ENDIF}
procedure MecsInsert(const Source: WideString; var S: WideString; Index: Integer); overload;
// MecsIsFullWidth
{$IFDEF UNICODE}
function MecsIsFullWidth(const S: RawByteString; CharIndex: Integer): Boolean; overload;
function MecsIsFullWidth(const S: RawByteString; CharIndex: Integer; FarEast: Boolean): Boolean; overload;
function MecsIsFullWidth(const S: RawByteString; CharIndex: Integer; FarEast: Boolean; CodePage: DWORD): Boolean; overload;
function MecsIsFullWidth(const S: UnicodeString; CharIndex: Integer): Boolean; overload;
function MecsIsFullWidth(const S: UnicodeString; CharIndex: Integer; FarEast: Boolean): Boolean; overload;
{$ELSE}
function MecsIsFullWidth(const S: AnsiString; CharIndex: Integer): Boolean; overload;
function MecsIsFullWidth(const S: AnsiString; CharIndex: Integer; FarEast: Boolean): Boolean; overload;
function MecsIsFullWidth(const S: AnsiString; CharIndex: Integer; FarEast: Boolean; CodePage: DWORD): Boolean; overload;
{$ENDIF}
function MecsIsFullWidth(const S: WideString; CharIndex: Integer): Boolean; overload;
function MecsIsFullWidth(const S: WideString; CharIndex: Integer; FarEast: Boolean): Boolean; overload;
// MecsIsLeadElement
function MecsIsLeadElement(TestChar: AnsiChar): Boolean; overload;
function MecsIsLeadElement(TestChar: AnsiChar; Codepage: DWORD): Boolean; overload;
{$IFDEF UNICODE}
function MecsIsLeadElement(TestChar: UnicodeChar): Boolean; overload;
{$ELSE}
function MecsIsLeadElement(TestChar: WideChar): Boolean; overload;
{$ENDIF}
// MecsIsMECElement
function MecsIsMECElement(TestChar: AnsiChar): Boolean; overload;
function MecsIsMECElement(TestChar: AnsiChar; Codepage: DWORD): Boolean; overload;
{$IFDEF UNICODE}
function MecsIsMECElement(TestChar: UnicodeChar): Boolean; overload;
{$ELSE}
function MecsIsMECElement(TestChar: WideChar): Boolean; overload;
{$ENDIF}
// MecsIsTrailElement
function MecsIsTrailElement(TestChar: AnsiChar): Boolean; overload;
function MecsIsTrailElement(TestChar: AnsiChar; Codepage: DWORD): Boolean; overload;
{$IFDEF UNICODE}
function MecsIsTrailElement(TestChar: UnicodeChar): Boolean; overload;
{$ELSE}
function MecsIsTrailElement(TestChar: WideChar): Boolean; overload;
{$ENDIF}
// MecsLeftStr
{$IFDEF UNICODE}
function MecsLeftStr(const AText: RawByteString; const ACount: Integer): RawByteString; overload;
function MecsLeftStr(const AText: RawByteString; const ACount: Integer; Codepage: DWORD): RawByteString; overload;
function MecsLeftStr(const AText: UnicodeString; const ACount: Integer): UnicodeString; overload;
{$ELSE}
function MecsLeftStr(const AText: AnsiString; const ACount: Integer): AnsiString; overload;
function MecsLeftStr(const AText: AnsiString; const ACount: Integer; Codepage: DWORD): AnsiString; overload;
{$ENDIF}
function MecsLeftStr(const AText: WideString; const ACount: Integer): WideString; overload;
// MecsLength
{$IFDEF UNICODE}
function MecsLength(const S: RawByteString): Integer; overload;
function MecsLength(const S: RawByteString; Codepage: DWORD): Integer; overload;
function MecsLength(const S: UnicodeString): Integer; overload;
{$ELSE}
function MecsLength(const S: AnsiString): Integer; overload;
function MecsLength(const S: AnsiString; Codepage: DWORD): Integer; overload;
{$ENDIF}
function MecsLength(const S: WideString): Integer; overload;
// MecsMidStr
{$IFDEF UNICODE}
function MecsMidStr(const AText: RawByteString; const AStart, ACount: Integer): RawByteString; overload;
function MecsMidStr(const AText: RawByteString; const AStart, ACount: Integer; Codepage: DWORD): RawByteString; overload;
function MecsMidStr(const AText: UnicodeString; const AStart, ACount: Integer): UnicodeString; overload;
{$ELSE}
function MecsMidStr(const AText: AnsiString; const AStart, ACount: Integer): AnsiString; overload;
function MecsMidStr(const AText: AnsiString; const AStart, ACount: Integer; Codepage: DWORD): AnsiString; overload;
{$ENDIF}
function MecsMidStr(const AText: WideString; const AStart, ACount: Integer): WideString; overload;
// MecsNextCharIndex
{$IFDEF UNICODE}
function MecsNextCharIndex(const S: RawByteString; Index: Integer): Integer; overload;
function MecsNextCharIndex(const S: RawByteString; Index: Integer; Codepage: DWORD): Integer; overload;
function MecsNextCharIndex(const S: UnicodeString; Index: Integer): Integer; overload;
{$ELSE}
function MecsNextCharIndex(const S: AnsiString; Index: Integer): Integer; overload;
function MecsNextCharIndex(const S: AnsiString; Index: Integer; Codepage: DWORD): Integer; overload;
{$ENDIF}
function MecsNextCharIndex(const S: WideString; Index: Integer): Integer; overload;
// MecsReverseString
{$IFDEF UNICODE}
function MecsReverseString(const AText: RawByteString): RawByteString; overload;
function MecsReverseString(const AText: RawByteString; Codepage: DWORD): RawByteString; overload;
function MecsReverseString(const AText: UnicodeString): UnicodeString; overload;
{$ELSE}
function MecsReverseString(const AText: AnsiString): AnsiString; overload;
function MecsReverseString(const AText: AnsiString; Codepage: DWORD): AnsiString; overload;
{$ENDIF}
function MecsReverseString(const AText: WideString): WideString; overload;
// MecsRightStr
{$IFDEF UNICODE}
function MecsRightStr(const AText: RawByteString; const ACount: Integer): RawByteString; overload;
function MecsRightStr(const AText: RawByteString; const ACount: Integer; Codepage: DWORD): RawByteString; overload;
function MecsRightStr(const AText: UnicodeString; const ACount: Integer): UnicodeString; overload;
{$ELSE}
function MecsRightStr(const AText: AnsiString; const ACount: Integer): AnsiString; overload;
function MecsRightStr(const AText: AnsiString; const ACount: Integer; Codepage: DWORD): AnsiString; overload;
{$ENDIF}
function MecsRightStr(const AText: WideString; const ACount: Integer): WideString; overload;
// MecsStrCharLength
function MecsStrCharLength(const Str: PAnsiChar): Integer; overload;
function MecsStrCharLength(const Str: PAnsiChar; Codepage: DWORD): Integer; overload;
{$IFDEF UNICODE}
function MecsStrCharLength(const Str: PUnicodeChar): Integer; overload;
{$ENDIF}
function MecsStrCharLength(const Str: PWideChar): Integer; overload;
// MecsStrElementType
function MecsStrElementType(const Str: PAnsiChar; Index: Cardinal): TElementType; overload;
function MecsStrElementType(const Str: PAnsiChar; Index: Cardinal; Codepage: DWORD): TElementType; overload;
{$IFDEF UNICODE}
function MecsStrElementType(const Str: PUnicodeChar; Index: Cardinal): TElementType; overload;
{$ENDIF}
function MecsStrElementType(const Str: PWideChar; Index: Cardinal): TElementType; overload;
// MecsStrLen
function MecsStrLen(const Str: PAnsiChar): Cardinal; overload;
function MecsStrLen(const Str: PAnsiChar; Codepage: DWORD): Cardinal; overload;
{$IFDEF UNICODE}
function MecsStrLen(const Str: PUnicodeChar): Cardinal; overload;
{$ENDIF}
function MecsStrLen(const Str: PWideChar): Cardinal; overload;
// MecsStrNextChar
function MecsStrNextChar(const Str: PAnsiChar): PAnsiChar; overload;
function MecsStrNextChar(const Str: PAnsiChar; Codepage: DWORD): PAnsiChar; overload;
{$IFDEF UNICODE}
function MecsStrNextChar(const Str: PUnicodeChar): PUnicodeChar; overload;
{$ENDIF}
function MecsStrNextChar(const Str: PWideChar): PWideChar; overload;
// for UnicodeString/WideString (Combining Character Sequence)
// -----------------------------------------------------------------------------
// MecsCombiningType
function MecsCombiningType(U: UCS4Char): TCombiningType;
// MecsCCSLength
{$IFDEF UNICODE}
function MecsCCSLength(const S: UnicodeString; Index: Integer): Integer; overload;
{$ENDIF}
function MecsCCSLength(const S: WideString; Index: Integer): Integer; overload;
// MecsCCSToElementIndex
{$IFDEF UNICODE}
function MecsCCSToElementIndex(const S: UnicodeString; Index: Integer): Integer; overload;
{$ENDIF}
function MecsCCSToElementIndex(const S: WideString; Index: Integer): Integer; overload;
// MecsCCSToElementIndexCount
{$IFDEF UNICODE}
function MecsCCSToElementIndexCount(const S: UnicodeString; Index, Count: Integer): TIndexCount; overload;
{$ENDIF}
function MecsCCSToElementIndexCount(const S: WideString; Index, Count: Integer): TIndexCount; overload;
// MecsCCSToElementLen
{$IFDEF UNICODE}
function MecsCCSToElementLen(const S: UnicodeString; MaxLen: Integer): Integer; overload;
{$ENDIF}
function MecsCCSToElementLen(const S: WideString; MaxLen: Integer): Integer; overload;
// MecsDeleteC
{$IFDEF UNICODE}
procedure MecsDeleteC(var S: UnicodeString; Index, Count: Integer); overload;
{$ENDIF}
procedure MecsDeleteC(var S: WideString; Index, Count: Integer); overload;
// MecsElementToCCSIndex
{$IFDEF UNICODE}
function MecsElementToCCSIndex(const S: UnicodeString; Index: Integer): Integer; overload;
{$ENDIF}
function MecsElementToCCSIndex(const S: WideString; Index: Integer): Integer; overload;
// MecsElementToCCSLen
{$IFDEF UNICODE}
function MecsElementToCCSLen(const S: UnicodeString; MaxLen: Integer): Integer; overload;
{$ENDIF}
function MecsElementToCCSLen(const S: WideString; MaxLen: Integer): Integer; overload;
// MecsLengthC
{$IFDEF UNICODE}
function MecsLengthC(const S: UnicodeString): Integer; overload;
{$ENDIF}
function MecsLengthC(const S: WideString): Integer; overload;
// MecsCopyC
{$IFDEF UNICODE}
function MecsCopyC(const S: UnicodeString; Index, Count: Integer): UnicodeString; overload;
{$ENDIF}
function MecsCopyC(const S: WideString; Index, Count: Integer): WideString; overload;
// MecsInsertC
{$IFDEF UNICODE}
procedure MecsInsertC(const Source: UnicodeString; var S: UnicodeString; Index: Integer); overload;
{$ENDIF}
procedure MecsInsertC(const Source: WideString; var S: WideString; Index: Integer); overload;
// MecsLeftStrC
{$IFDEF UNICODE}
function MecsLeftStrC(const AText: UnicodeString; const ACount: Integer): UnicodeString; overload;
{$ENDIF}
function MecsLeftStrC(const AText: WideString; const ACount: Integer): WideString; overload;
// MecsMidStrC
{$IFDEF UNICODE}
function MecsMidStrC(const AText: UnicodeString; const AStart, ACount: Integer): UnicodeString; overload;
{$ENDIF}
function MecsMidStrC(const AText: WideString; const AStart, ACount: Integer): WideString; overload;
// MecsNextCCSIndex
{$IFDEF UNICODE}
function MecsNextCCSIndex(const S: UnicodeString; Index: Integer): Integer; overload;
{$ENDIF}
function MecsNextCCSIndex(const S: WideString; Index: Integer): Integer; overload;
// MecsReverseStringC
{$IFDEF UNICODE}
function MecsReverseStringC(const AText: UnicodeString): UnicodeString; overload;
{$ENDIF}
function MecsReverseStringC(const AText: WideString): WideString; overload;
// MecsRightStrC
{$IFDEF UNICODE}
function MecsRightStrC(const AText: UnicodeString; const ACount: Integer): UnicodeString; overload;
{$ENDIF}
function MecsRightStrC(const AText: WideString; const ACount: Integer): WideString; overload;
// MecsStrCCSLength
{$IFDEF UNICODE}
function MecsStrCCSLength(const Str: PUnicodeChar): Integer; overload;
{$ENDIF}
function MecsStrCCSLength(const Str: PWideChar): Integer; overload;
// MecsStrLenC
{$IFDEF UNICODE}
function MecsStrLenC(const Str: PUnicodeChar): Cardinal; overload;
{$ENDIF}
function MecsStrLenC(const Str: PWideChar): Cardinal; overload;
// MecsStrNextCCS
{$IFDEF UNICODE}
function MecsStrNextCCS(const Str: PUnicodeChar): PUnicodeChar; overload;
{$ENDIF}
function MecsStrNextCCS(const Str: PWideChar): PWideChar; overload;
// for Convert
// -----------------------------------------------------------------------------
// AnsiToUTF32
{$IFDEF UNICODE}
function AnsiToUTF32(const AText: RawByteString; Codepage: DWORD): UCS4String; overload;
function AnsiToUTF32(const AText: RawByteString): UCS4String; overload;
{$ELSE}
function AnsiToUTF32(const AText: AnsiString; Codepage: DWORD): UCS4String; overload;
function AnsiToUTF32(const AText: AnsiString): UCS4String; overload;
{$ENDIF}
// AnsiToUTF16
{$IFDEF UNICODE}
function AnsiToUTF16(const AText: RawByteString; Codepage: DWORD): WideString; overload;
function AnsiToUTF16(const AText: RawByteString): WideString; overload;
{$ELSE}
function AnsiToUTF16(const AText: AnsiString; Codepage: DWORD): WideString; overload;
function AnsiToUTF16(const AText: AnsiString): WideString; overload;
{$ENDIF}
// AnsiToUTF8
{$IFDEF UNICODE}
function AnsiToUTF8(const AText: RawByteString; Codepage: DWORD): RawByteString; overload;
function AnsiToUTF8(const AText: RawByteString): RawByteString; overload;
{$ELSE}
function AnsiToUTF8(const AText: AnsiString; Codepage: DWORD): UTF8String; overload;
function AnsiToUTF8(const AText: AnsiString): UTF8String; overload;
{$ENDIF}
// CodePointToUTF16
function CodePointToUTF16(const UCS4: UCS4Char): WideString; overload;
{$IFDEF UNICODE}
function CodePointToUTF8(const UCS4: UCS4Char): RawByteString;
{$ELSE}
function CodePointToUTF8(const UCS4: UCS4Char): UTF8String;
{$ENDIF}
// ConvertMultiByteToUnicode
{$IFDEF UNICODE}
function ConvertMultiByteToUnicode(SrcCodepage: DWORD; const SrcStr: RawByteString; var DstStr: WideString): Boolean;
{$ELSE}
function ConvertMultiByteToUnicode(SrcCodepage: DWORD; const SrcStr: AnsiString; var DstStr: WideString): Boolean;
{$ENDIF}
// ConvertString
{$IFDEF UNICODE}
function ConvertString(SrcCodepage, DstCodepage: DWORD; const SrcStr: RawByteString; var DstStr: RawByteString): Boolean;
{$ELSE}
function ConvertString(SrcCodepage, DstCodepage: DWORD; const SrcStr: AnsiString; var DstStr: AnsiString): Boolean;
{$ENDIF}
// ConvertUnicodeToMultiByte
{$IFDEF UNICODE}
function ConvertUnicodeToMultiByte(DstCodepage: DWORD; const SrcStr: WideString; var DstStr: RawByteString): Boolean;
{$ELSE}
function ConvertUnicodeToMultiByte(DstCodepage: DWORD; const SrcStr: WideString; var DstStr: AnsiString): Boolean;
{$ENDIF}
// UTF32ToAnsi
{$IFDEF UNICODE}
function UTF32ToAnsi(const UCS4Text: UCS4String; Codepage: DWORD): RawByteString; overload;
function UTF32ToAnsi(const UCS4Text: UCS4String): RawByteString; overload;
{$ELSE}
function UTF32ToAnsi(const UCS4Text: UCS4String; Codepage: DWORD): AnsiString; overload;
function UTF32ToAnsi(const UCS4Text: UCS4String): AnsiString; overload;
{$ENDIF}
// UTF32ToUTF16
function UTF32ToUTF16(const UCS4Text: UCS4String): WideString;
// UTF32ToUTF8
{$IFDEF UNICODE}
function UTF32ToUTF8(const UCS4Text: UCS4String): RawByteString;
{$ELSE}
function UTF32ToUTF8(const UCS4Text: UCS4String): UTF8String;
{$ENDIF}
// UTF16ToAnsi
{$IFDEF UNICODE}
function UTF16ToAnsi(const WText: WideString; Codepage: DWORD): RawByteString; overload;
function UTF16ToAnsi(const WText: WideString): RawByteString; overload;
{$ELSE}
function UTF16ToAnsi(const WText: WideString; Codepage: DWORD): AnsiString; overload;
function UTF16ToAnsi(const WText: WideString): AnsiString; overload;
{$ENDIF}
// UTF16ToUTF32
function UTF16ToUTF32(const WText: WideString): UCS4String;
// UTF16ToUTF32
{$IFDEF UNICODE}
function UTF16ToUTF8(const WText: WideString): RawByteString;
{$ELSE}
function UTF16ToUTF8(const WText: WideString): UTF8String;
{$ENDIF}
// UTF16ToAnsi
{$IFDEF UNICODE}
function UTF8ToAnsi(const UCF8Text: RawByteString; Codepage: DWORD): RawByteString; overload;
function UTF8ToAnsi(const UCF8Text: RawByteString): RawByteString; overload;
{$ELSE}
function UTF8ToAnsi(const UCF8Text: UTF8String; Codepage: DWORD): AnsiString; overload;
function UTF8ToAnsi(const UCF8Text: UTF8String): AnsiString; overload;
{$ENDIF}
// UTF8ToUTF32
{$IFDEF UNICODE}
function UTF8ToUTF32(const UCF8Text: RawByteString): UCS4String;
{$ELSE}
function UTF8ToUTF32(const UCF8Text: UTF8String): UCS4String;
{$ENDIF}
// UTF8ToUTF16
{$IFDEF UNICODE}
function UTF8ToUTF16(const UCF8Text: RawByteString): WideString;
{$ELSE}
function UTF8ToUTF16(const UCF8Text: UTF8String): WideString;
{$ENDIF}
var
DefaultAnsiCodePage : DWORD;
DefaultOEMCodePage : DWORD;
DefaultLCID : LCID;
implementation
// for Codepage/Locale/CharSet
// -----------------------------------------------------------------------------
// CodePageToFontCharset
function CodePageToFontCharset(Codepage: DWORD): TFontCharset;
var
lpSrc: DWORD;
lpCs: TCharsetInfo;
begin
lpSrc := CodePage;
if TranslateCharsetInfo(lpSrc, lpCs, TCI_SRCCODEPAGE) then
result := lpCs.ciCharset
else
result := DEFAULT_CHARSET;
end;
// FontCharsetToCodePage
function FontCharsetToCodePage(FontCharset: TFontCharset): DWORD;
var
lpSrc: DWORD;
lpCs: TCharsetInfo;
begin
lpSrc := FontCharset;
if TranslateCharsetInfo(lpSrc, lpCs, TCI_SRCCHARSET) then
result := lpCs.ciACP
else
result := DefaultAnsiCodePage;
end;
// IsFarEastCharSet
function IsFarEastCharSet(CharSet: TFontCharSet): Boolean;
begin
case CharSet of
SHIFTJIS_CHARSET,
GB2312_CHARSET,
CHINESEBIG5_CHARSET,
HANGEUL_CHARSET,
JOHAB_CHARSET:
result := True;
else
result := False;
end;
end;
// IsFarEastLCID
function IsFarEastLCID(Locale: LCID): Boolean;
begin
case Locale of
1028, // Taiwan
1041, // Japan
1042, // Korea
2052, // China
3076, // Hong Kong
4100: // Singapore
result := True;
else
result := False;
end;
end;
// for AnsiString
// -----------------------------------------------------------------------------
// MecsGetCodePage
{$IFDEF UNICODE}
function MecsGetCodePage(S: RawByteString): DWORD;
{$ELSE}
function MecsGetCodePage(S: AnsiString): DWORD;
{$ENDIF}
begin
{$IFDEF UNICODE}
result := StringCodePage(S);
if result = CP_ACP then
{$ENDIF}
result := DefaultAnsiCodePage;
end;
// MecsGetLeadBytes
function MecsGetLeadBytes: TLeadBytes;
begin
result := MecsGetLeadBytes(DefaultAnsiCodePage);
end;
function MecsGetLeadBytes(Codepage: DWORD): TLeadBytes;
var
i:Integer;
MBCSCPInfo: TCPInfo;
LB: Byte;
begin
result := [];
if CodePage = CP_UTF8 then
Exit;
GetCPInfo(CodePage, MBCSCPInfo);
with MBCSCPInfo do
begin
i := 0;
while (i < MAX_LEADBYTES) and ((LeadByte[i] or LeadByte[i + 1]) <> 0) do
begin
for LB := LeadByte[i] to LeadByte[i + 1] do
Include(result, AnsiChar(LB));
Inc(i, 2);
end;
end;
end;
// for UnicodeString/WideString
// -----------------------------------------------------------------------------
// MecsEastAsianWidth
function MecsEastAsianWidth(U: UCS4Char): TEastAsianWidth;
// http://unicode.org/Public/UNIDATA/MecsEastAsianWidth.txt
begin
case U of
// Fullwidth
$3000,
$FF01..$FF60,
$FFE0..$FFE6:
result := eawFullwidth;
// Halfwidth
$20A9,
$FF61..$FFBE,
$FFC2..$FFC7,
$FFCA..$FFCF,
$FFD2..$FFD7,
$FFDA..$FFDC,
$FFE8..$FFEE:
result := eawHalfwidth;
// Wide
$1100..$1159,
$115F,
$2329..$232A,
$2E80..$2E99,
$2E9B..$2EF3,
$2F00..$2FD5,
$2FF0..$2FFB,
$3001..$303E,
$3041..$3096,
$3099..$30FF,
$3105..$312D,
$3131..$318E,
$3190..$31B7,
$31C0..$31E3,
$31F0..$321E,
$3220..$3243,
$3250..$32FE,
$3300..$33FF,
$3400..$4DB5,
$4E00..$9FC3,
$A000..$A48C,
$A490..$A4C6,
$AC00..$D7A3,
$F900..$FA2D,
$FA30..$FA6A,
$FA70..$FAD9,
$FE10..$FE19,
$FE30..$FE52,
$FE54..$FE66,
$FE68..$FE6B,
$20000..$2A6D6,
$2A6D7..$2F7FF,
$2F800..$2FA1D,
$2FA1E..$2FFFD,
$30000..$3FFFD:
result := eawWide;
// Narrow
$0020..$007E,
$00A2..$00A3,
$00A5..$00A6,
$00AC,
$00AF,
$27E6..$27ED,
$2985..$2986:
result := eawNarrow;
// Ambiguous
$00A1,
$00A4,
$00A7..$00A8,
$00AA,
$00AD..$00AE,
$00B0..$00B4,
$00B6..$00BA,
$00BC..$00BF,
$00C6,
$00D0,
$00D7..$00D8,
$00DE..$00E1,
$00E6,
$00E8..$00EA,
$00EC..$00ED,
$00F0,
$00F2..$00F3,
$00F7..$00FA,
$00FC,
$00FE,
$0101,
$0111,
$0113,
$011B,
$0126..$0127,
$012B,
$0131..$0133,
$0138,
$013F..$0142,
$0144,
$0148..$014B,
$014D,
$0152..$0153,
$0166..$0167,
$016B,
$01CE,
$01D0,
$01D2,
$01D4,
$01D6,
$01D8,
$01DA,
$01DC,
$0251,
$0261,
$02C4,
$02C7,
$02C9..$02CB,
$02CD,
$02D0,
$02D8..$02DB,
$02DD,
$02DF,
$0300..$036F,
$0391..$03A1,
$03A3..$03A9,
$03B1..$03C1,
$03C3..$03C9,
$0401,
$0410..$044F,
$0451,
$2010,
$2013..$2016,
$2018..$2019,
$201C..$201D,
$2020..$2022,
$2024..$2027,
$2030,
$2032..$2033,
$2035,
$203B,
$203E,
$2074,
$207F,
$2081..$2084,
$20AC,
$2103,
$2105,
$2109,
$2113,
$2116,
$2121..$2122,
$2126,
$212B,
$2153..$2154,
$215B..$215E,
$2160..$216B,
$2170..$2179,
$2190..$2199,
$21B8..$21B9,
$21D2,
$21D4,
$21E7,
$2200,
$2202..$2203,
$2207..$2208,
$220B,
$220F,
$2211,
$2215,
$221A,
$221D..$2220,
$2223,
$2225,
$2227..$222C,
$222E,
$2234..$2237,
$223C..$223D,
$2248,
$224C,
$2252,
$2260..$2261,
$2264..$2267,
$226A..$226B,
$226E..$226F,
$2282..$2283,
$2286..$2287,
$2295,
$2299,
$22A5,
$22BF,
$2312,
$2460..$24E9,
$24EB..$254B,
$2550..$2573,
$2580..$258F,
$2592..$2595,
$25A0..$25A1,
$25A3..$25A9,
$25B2..$25B3,
$25B6..$25B7,
$25BC..$25BD,
$25C0..$25C1,
$25C6..$25C8,
$25CB,
$25CE..$25D1,
$25E2..$25E5,
$25EF,
$2605..$2606,
$2609,
$260E..$260F,
$2614..$2615,
$261C,
$261E,
$2640,
$2642,
$2660..$2661,
$2663..$2665,
$2667..$266A,
$266C..$266D,
$266F,
$273D,
$2776..$277F,
$E000..$F8FF,
$FE00..$FE0F,
$FFFD,
$E0100..$E01EF:
result := eawAmbiguous;
else
result := eawNeutral;
end;
end;
// MecsCombiningType (written by Mae)
function MecsCombiningType(U: UCS4Char): TCombiningType;
begin
case U of
$0300..$036F,
$0483..$0489,
$0591..$05BD,
$05BF..$05BF,
$05C1..$05C2,
$05C4..$05C5,
$05C7..$05C7,
$0610..$061A,
$064B..$065E,
$0670..$0670,
$06D6..$06DC,
$06DE..$06E4,
$06E7..$06E8,
$06EA..$06ED,
$0711..$0711,
$0730..$074A,
$07A6..$07B0,
$07EB..$07F3,
$0901..$0903,
$093C..$093C,
$093E..$094D,
$0951..$0954,
$0962..$0963,
$0981..$0983,
$09BC..$09BC,
$09BE..$09C4,
$09C7..$09C8,
$09CB..$09CD,
$09D7..$09D7,
$09E2..$09E3,
$0A01..$0A03,
$0A3C..$0A3C,
$0A3E..$0A42,
$0A47..$0A48,
$0A4B..$0A4D,
$0A51..$0A51,
$0A70..$0A71,
$0A75..$0A75,
$0A81..$0A83,
$0ABC..$0ABC,
$0ABE..$0AC5,
$0AC7..$0AC9,
$0ACB..$0ACD,
$0AE2..$0AE3,
$0B01..$0B03,
$0B3C..$0B3C,
$0B3E..$0B44,
$0B47..$0B48,
$0B4B..$0B4D,
$0B56..$0B57,
$0B62..$0B63,
$0B82..$0B82,
$0BBE..$0BC2,
$0BC6..$0BC8,
$0BCA..$0BCD,
$0BD7..$0BD7,
$0C01..$0C03,
$0C3E..$0C44,
$0C46..$0C48,
$0C4A..$0C4D,
$0C55..$0C56,
$0C62..$0C63,
$0C82..$0C83,
$0CBC..$0CBC,
$0CBE..$0CBE,
$0CC0..$0CC4,
$0CC7..$0CC8,
$0CCA..$0CCD,
$0CD5..$0CD6,
$0CE2..$0CE3,
$0D02..$0D03,
$0D3E..$0D44,
$0D46..$0D48,
$0D4A..$0D4D,
$0D57..$0D57,
$0D62..$0D63,
$0D82..$0D83,
$0DCA..$0DCA,
$0DCF..$0DD4,
$0DD6..$0DD6,
$0DD8..$0DDF,
$0DF2..$0DF3,
$0E31..$0E31,
$0E34..$0E3A,
$0E47..$0E4E,
$0EB1..$0EB1,
$0EB4..$0EB9,
$0EBB..$0EBC,
$0EC8..$0ECD,
$0F18..$0F19,
$0F35..$0F35,
$0F37..$0F37,
$0F39..$0F39,
$0F3E..$0F3F,
$0F71..$0F84,
$0F86..$0F87,
$0F90..$0F97,
$0F99..$0FBC,
$0FC6..$0FC6,
$102B..$103E,
$1056..$1059,
$105E..$1060,
$1062..$1064,
$1067..$106D,
$1071..$1074,
$1082..$108D,
$108F..$108F,
$135F..$135F,
$1712..$1714,
$1732..$1734,
$1752..$1753,
$1772..$1773,
$17B6..$17D3,
$17DD..$17DD,
$180B..$180D,
$18A9..$18A9,
$1920..$192B,
$1930..$193B,
$19B0..$19C0,
$19C8..$19C9,
$1A17..$1A1B,
$1B00..$1B04,
$1B34..$1B44,
$1B6B..$1B73,
$1B80..$1B82,
$1BA1..$1BAA,
$1C24..$1C37,
$1DC0..$1DE6,
$1DFE..$1DFF,
$20D0..$20F0,
$2DE0..$2DFF,
$302A..$302F,
$3099..$309A,
$A66F..$A672,
$A67C..$A67D,
$A802..$A802,
$A806..$A806,
$A80B..$A80B,
$A823..$A827,
$A880..$A881,
$A8B4..$A8C4,
$A926..$A92D,
$A947..$A953,
$AA29..$AA36,
$AA43..$AA43,
$AA4C..$AA4D,
$FB1E..$FB1E,
$FE00..$FE0F,
$FE20..$FE26,
$101FD..$101FD,
$10A01..$10A03,
$10A05..$10A06,
$10A0C..$10A0F,
$10A38..$10A3A,
$10A3F..$10A3F,
$1D165..$1D169,
$1D16D..$1D172,
$1D17B..$1D182,
$1D185..$1D18B,
$1D1AA..$1D1AD,
$1D242..$1D244,
$E0100..$E01EF:
result := ctCombining;
else
result := ctBase;
end;
end;
// MecsIsNormalized
function MecsIsNormalized(const AText: WideString; NormForm: TNORM_FORM): Boolean;
// -----------------------------------------------------------------------------
// Requires(XP/2003 or later):
// Microsoft Internationalized Domain Names (IDN) Mitigation APIs 1.1
// http://www.microsoft.com/downloads/details.aspx?FamilyID=ad6158d7-ddba-416a-9109-07607425a815&displaylang=en
// (or Internet Explorer 7 or later)
// -----------------------------------------------------------------------------
var
FP: TFarProc;
DLLWnd: THandle;
// http://msdn2.microsoft.com/en-us/library/ms776382(VS.85).aspx
MecsIsNormalizedString: function(NormForm: Integer; lpString: LPCWSTR; cwLength: Integer):Boolean; stdcall;
begin
result := False;
if Length(AText) = 0 then
begin
result := True;
Exit;
end;
{$IFDEF UNICODE}
DLLWnd := LoadLibraryW('normaliz.dll');
{$ELSE}
DLLWnd := LoadLibraryA('normaliz.dll');
{$ENDIF}
try
FP := GetProcAddress(DLLWnd, 'MecsIsNormalizedString');
if FP <> nil then
begin
@MecsIsNormalizedString := FP;
result := MecsIsNormalizedString(Integer(NormForm), PWideChar(AText), Length(AText));
end;
finally
if DLLWnd > 0 then
FreeLibrary(DLLWnd);
end;
end;
// MecsMappingFixJA
// (Fix Unicode to JIS X 0208 mapping)
{$IFDEF UNICODE}
function MecsMappingFixJA(const S: UnicodeString): UnicodeString; overload;
var
i: Integer;
begin
result := S;
for i:=1 to Length(result) do
case result[i] of
#$2225: // U+2225: PARALLEL TO -> U+2016: DOUBLE VERTICAL LINE
result[i] := #$2016;
#$FF0D: // U+FF0D: FULLWIDTH HYPHEN-MINUS -> U+2212: MINUS SIGN
result[i] := #$2212;
#$FF5E: // U+FF5E: FULLWIDTH TILDE -> U+301C: WAVE DASH
result[i] := #$301C;
#$FFE0: // U+FFE0: FULLWIDTH CENT SIGN -> U+00A2: CENT SIGN
result[i] := #$00A2;
#$FFE1: // U+FFE1: FULLWIDTH POUND SIGN -> U+00A3: POUND SIGN
result[i] := #$00A3;
#$FFE2: // U+FFE2: FULLWIDTH NOT SIGN -> U+00AC: NOT SIGN
result[i] := #$00AC;
end;
end;
{$ENDIF}
function MecsMappingFixJA(const S: WideString): WideString; overload;
var
i: Integer;
begin
result := S;
for i:=1 to Length(result) do
case result[i] of
#$2225: // U+2225: PARALLEL TO -> U+2016: DOUBLE VERTICAL LINE
result[i] := #$2016;
#$FF0D: // U+FF0D: FULLWIDTH HYPHEN-MINUS -> U+2212: MINUS SIGN
result[i] := #$2212;
#$FF5E: // U+FF5E: FULLWIDTH TILDE -> U+301C: WAVE DASH
result[i] := #$301C;
#$FFE0: // U+FFE0: FULLWIDTH CENT SIGN -> U+00A2: CENT SIGN
result[i] := #$00A2;
#$FFE1: // U+FFE1: FULLWIDTH POUND SIGN -> U+00A3: POUND SIGN
result[i] := #$00A3;
#$FFE2: // U+FFE2: FULLWIDTH NOT SIGN -> U+00AC: NOT SIGN
result[i] := #$00AC;
end;
end;
// MecsNormalize
function MecsNormalize(const Src: WideString; var Dst: WideString; NormForm: TNORM_FORM): Boolean;
// -----------------------------------------------------------------------------
// NormalizeString Requires(XP/2003 or later):
// Microsoft Internationalized Domain Names (IDN) Mitigation APIs 1.1
// http://www.microsoft.com/downloads/details.aspx?FamilyID=ad6158d7-ddba-416a-9109-07607425a815&displaylang=en
// (or Internet Explorer 7 or later)
// -----------------------------------------------------------------------------
// FoldString requires Windows NT3.1 or later;
// -----------------------------------------------------------------------------
//
// Please examine U+03D3.
// - NFC U+03D3
// - NFD U+03D2 (U+0020) U+0301
// - NFKC U+038E
// - NFKC U+03A5 (U+0020) U+0301
//
// -----------------------------------------------------------------------------
var
i: Integer;
O,P :PWideChar;
BufSize: Integer;
FP: TFarProc;
DLLWnd: THandle;
MapFlags: DWORD;
// http://msdn2.microsoft.com/en-us/library/ms776395(VS.85).aspx
NormalizeString: function(NormForm: Integer; lpSrcString: LPCWSTR; cwSrMecsLength: Integer; lpDstString: LPWSTR; cwDstLength: Integer):Integer; stdcall;
begin
result := False;
Dst := '';
if Length(Src) = 0 then
begin
result := True;
Exit;
end;
{$IFDEF UNICODE}
DLLWnd := LoadLibraryW('normaliz.dll');
{$ELSE}
DLLWnd := LoadLibraryA('normaliz.dll');
{$ENDIF}
try
FP := GetProcAddress(DLLWnd, 'NormalizeString');
if FP <> nil then
begin
@NormalizeString := FP;
BufSize := NormalizeString(Integer(NormForm), PWideChar(Src), Length(Src), nil, 0);
if (GetLastError <> 0) then
Exit;
if (BufSize = 0) then
begin
result := True;
Exit;
end;
SetLength(Dst, BufSize);
P := PWideChar(Dst);
for i:=1 to BufSize do
begin
P^ := #$0000;
Inc(P);
end;
NormalizeString(Integer(NormForm), PWideChar(Src), Length(Src), PWideChar(Dst), Length(Dst));
if (GetLastError <> 0) then
Exit;
P := PWideChar(Dst);
O := P;
while (P^ <> #$0000) do
Inc(P);
SetLength(Dst, P - O);
result := True;
end;
finally
if DLLWnd > 0 then
FreeLibrary(DLLWnd);
end;
if not result then
begin
case NormForm of
NormalizationC:
MapFlags := MAP_PRECOMPOSED;
NormalizationD:
MapFlags := MAP_COMPOSITE;
NormalizationKC:
MapFlags := MAP_FOLDCZONE or MAP_PRECOMPOSED;
NormalizationKD:
MapFlags := MAP_FOLDCZONE or MAP_COMPOSITE;
else
MapFlags := 0;
end;
BufSize := FoldStringW(MapFlags, PWideChar(Src), Length(Src), nil, 0);
if (GetLastError <> 0) then
Exit;
if (BufSize = 0) then
begin
result := True;
Exit;
end;
SetLength(Dst, BufSize);
P := PWideChar(Dst);
for i:=1 to BufSize do
begin
P^ := #$0000;
Inc(P);
end;
FoldStringW(MapFlags, PWideChar(Src), Length(Src), PWideChar(Dst), Length(Dst));
if (GetLastError <> 0) then
Exit;
P := PWideChar(Dst);
O := P;
while (P^ <> #$0000) do
Inc(P);
SetLength(Dst, P - O);
result := True;
end;
end;
// StrLen
{$IFNDEF UNICODE}
function StrLen(const Str: PWideChar): Cardinal;
begin
result := lstrlenW(Str);
end;
{$ENDIF}
// for AnsiString/UnicodeString/WideString (Common)
// -----------------------------------------------------------------------------
// MecsCharToElementIndex
{$IFDEF UNICODE}
function MecsCharToElementIndex(const S: RawByteString; Index: Integer): Integer;
{$ELSE}
function MecsCharToElementIndex(const S: AnsiString; Index: Integer): Integer;
{$ENDIF}
begin
result := MecsCharToElementIndex(S, Index, MecsGetCodePage(S));
end;
{$IFDEF UNICODE}
function MecsCharToElementIndex(const S: RawByteString; Index: Integer; Codepage: DWORD): Integer;
{$ELSE}
function MecsCharToElementIndex(const S: AnsiString; Index: Integer; Codepage: DWORD): Integer;
{$ENDIF}
var
P, O: PAnsiChar;
L, ChrCnt: Integer;
LeadBytes: TLeadBytes;
begin
result := 0;
L := System.Length(S);
if L = 0 then
Exit;
result := 0;
if CodePage = CP_UTF8 then
begin
ChrCnt := 0;
P := PAnsiChar(S);
O := P;
while (P^ <> #$00) and (result = 0) do
begin
if (Byte(P^) and $80) = $00 then
Inc(ChrCnt)
else if (Byte(P^) and $C0) = $C0 then
Inc(ChrCnt);
if ChrCnt = Index then
begin
result := P - O + 1;
Exit;
end;
Inc(P);
end;
end
else
begin
LeadBytes := MecsGetLeadBytes(CodePage);
if LeadBytes = [] then
result := Index
else
begin
ChrCnt := 0;
P := PAnsiChar(S);
O := P;
while (P^ <> #$00) and (result = 0) do
begin
Inc(ChrCnt);
if ChrCnt = Index then
begin
result := P - O + 1;
Exit;
end;
if P^ in LeadBytes then
begin
Inc(P);
if P^ = #$00 then
Break;
end;
Inc(P);
end;
end;
end;
end;
{$IFDEF UNICODE}
function MecsCharToElementIndex(const S: UnicodeString; Index: Integer): Integer;
var
P, O: PUnicodeChar;
L, ChrCnt: Integer;
begin
result := 0;
L := System.Length(S);
if L = 0 then
Exit;
ChrCnt := 0;
P := PUnicodeChar(S);
O := P;
while (P^ <> #$0000) and (result = 0) do
begin
Inc(ChrCnt);
if ChrCnt = Index then
begin
result := P - O + 1;
Break;
end;
if MecsIsLeadElement(P^) then
begin
Inc(P);
if P^ = #$0000 then
Break;
end;
Inc(P);
end;
end;
{$ENDIF}
function MecsCharToElementIndex(const S: WideString; Index: Integer): Integer;
var
P, O: PWideChar;
L, ChrCnt: Integer;
begin
result := 0;
L := System.Length(S);
if L = 0 then
Exit;
ChrCnt := 0;
P := PWideChar(S);
O := P;
while (P^ <> #$0000) and (result = 0) do
begin
Inc(ChrCnt);
if ChrCnt = Index then
begin
result := P - O + 1;
Break;
end;
if MecsIsLeadElement(P^) then
begin
Inc(P);
if P^ = #$0000 then
Break;
end;
Inc(P);
end;
end;
// MecsCharToElementIndexCount
{$IFDEF UNICODE}
function MecsCharToElementIndexCount(const S: RawByteString; Index, Count: Integer): TIndexCount;
{$ELSE}
function MecsCharToElementIndexCount(const S: AnsiString; Index, Count: Integer): TIndexCount;
{$ENDIF}
begin
result := MecsCharToElementIndexCount(S, Index, Count, MecsGetCodePage(S));
end;
{$IFDEF UNICODE}
function MecsCharToElementIndexCount(const S: RawByteString; Index, Count: Integer; Codepage: DWORD): TIndexCount;
{$ELSE}
function MecsCharToElementIndexCount(const S: AnsiString; Index, Count: Integer; Codepage: DWORD): TIndexCount;
{$ENDIF}
var
P, O: PAnsiChar;
L, J, ChrCnt: Integer;
LeadBytes: TLeadBytes;
begin
result.StartIndex := Index;
result.EndIndex := Index + Count - 1;
result.Count := Count;
L := System.Length(S);
if L = 0 then
Exit;
if CodePage = CP_UTF8 then
begin
result.StartIndex := 0;
result.EndIndex := 0;
result.Count := 0;
ChrCnt := 0;
P := PAnsiChar(S);
O := P;
while (P^ <> #$00) and (result.EndIndex = 0) and ((ChrCnt - Index) < Count) do
begin
if (Byte(P^) and $80) = $00 then
begin
Inc(ChrCnt);
if ChrCnt = Index then
result.StartIndex := P - O + 1;
if ChrCnt = (Index + Count - 1) then
result.EndIndex := P - O + 1;
end
else if (Byte(P^) and $C0) = $C0 then
begin
Inc(ChrCnt);
if ChrCnt = Index then
result.StartIndex := P - O + 1;
if ChrCnt = (Index + Count - 1) then
begin
for J:=2 to 6 do
begin
if (Byte(P^) and ($80 shr J)) = 0 then
begin
result.EndIndex := P - O + J;
Break;
end;
end;
end;
end;
Inc(P);
end;
if (result.EndIndex = 0) and ((ChrCnt - Index) < Count) then
result.EndIndex := L;
result.Count := result.EndIndex - result.StartIndex + 1;
end
else
begin
LeadBytes := MecsGetLeadBytes(CodePage);
if LeadBytes = [] then
begin
result.StartIndex := Index;
result.EndIndex := Index + Count - 1;
result.Count := Count;
end
else
begin
result.StartIndex := 0;
result.EndIndex := 0;
result.Count := 0;
ChrCnt := 0;
P := PAnsiChar(S);
O := P;
while (P^ <> #$00) and (result.EndIndex = 0) and ((ChrCnt - Index) < Count) do
begin
Inc(ChrCnt);
if ChrCnt = Index then
result.StartIndex := P - O + 1;
if P^ in LeadBytes then
begin
if ChrCnt = (Index + Count - 1) then
result.EndIndex := P - O + 2;
Inc(P);
if P^ = #$00 then
Break;
end
else if ChrCnt = (Index + Count - 1) then
result.EndIndex := P - O + 1;
Inc(P);
end;
if (result.EndIndex = 0) and ((ChrCnt - Index) < Count) then
result.EndIndex := L;
result.Count := result.EndIndex - result.StartIndex + 1;
end;
end;
end;
{$IFDEF UNICODE}
function MecsCharToElementIndexCount(const S: UnicodeString; Index, Count: Integer): TIndexCount;
var
P, O: PUnicodeChar;
L, ChrCnt: Integer;
begin
result.StartIndex := Index;
result.EndIndex := Index + Count - 1;
result.Count := Count;
L := System.Length(S);
if L = 0 then
Exit;
result.StartIndex := 0;
result.EndIndex := 0;
result.Count := 0;
ChrCnt := 0;
P := PUnicodeChar(S);
O := P;
while (P^ <> #$0000) and (result.EndIndex = 0) and ((ChrCnt - Index) < Count) do
begin
Inc(ChrCnt);
if ChrCnt = Index then
result.StartIndex := P - O + 1;
if MecsIsLeadElement(P^) then
begin
if ChrCnt = (Index + Count - 1) then
result.EndIndex := P - O + 2;
Inc(P);
if P^ = #$0000 then
Break;
end
else if ChrCnt = (Index + Count - 1) then
result.EndIndex := P - O + 1;
Inc(P);
end;
if (result.EndIndex = 0) and ((ChrCnt - Index) < Count) then
result.EndIndex := L;
result.Count := result.EndIndex - result.StartIndex + 1;
end;
{$ENDIF}
function MecsCharToElementIndexCount(const S: WideString; Index, Count: Integer): TIndexCount;
var
P, O: PWideChar;
L, ChrCnt: Integer;
begin
result.StartIndex := Index;
result.EndIndex := Index + Count - 1;
result.Count := Count;
L := System.Length(S);
if L = 0 then
Exit;
result.StartIndex := 0;
result.EndIndex := 0;
result.Count := 0;
ChrCnt := 0;
P := PWideChar(S);
O := P;
while (P^ <> #$0000) and (result.EndIndex = 0) and ((ChrCnt - Index) < Count) do
begin
Inc(ChrCnt);
if ChrCnt = Index then
result.StartIndex := P - O + 1;
if MecsIsLeadElement(P^) then
begin
if ChrCnt = (Index + Count - 1) then
result.EndIndex := P - O + 2;
Inc(P);
if P^ = #$0000 then
Break;
end
else if ChrCnt = (Index + Count - 1) then
result.EndIndex := P - O + 1;
Inc(P);
end;
if (result.EndIndex = 0) and ((ChrCnt - Index) < Count) then
result.EndIndex := L;
result.Count := result.EndIndex - result.StartIndex + 1;
end;
// MecsCharLength
{$IFDEF UNICODE}
function MecsCharLength(const S: RawByteString; Index: Integer): Integer;
{$ELSE}
function MecsCharLength(const S: AnsiString; Index: Integer): Integer;
{$ENDIF}
begin
result := MecsCharLength(S, Index, MecsGetCodePage(S));
end;
{$IFDEF UNICODE}
function MecsCharLength(const S: RawByteString; Index: Integer; Codepage: DWORD): Integer;
{$ELSE}
function MecsCharLength(const S: AnsiString; Index: Integer; Codepage: DWORD): Integer;
{$ENDIF}
var
Idx: Integer;
J: Byte;
begin
result := 1;
if S = '' then
Exit;
Idx := MecsCharToElementIndex(S, Index, Codepage);
if Idx = 0 then
Exit;
if Codepage = CP_UTF8 then
begin
if (Byte(S[Idx]) and $C0) = $C0 then
begin
for J:=2 to 6 do
begin
if (Byte(S[Idx]) and ($80 shr J)) = 0 then
begin
result := J;
Break;
end;
end;
end;
end
else
begin
if MecsIsLeadElement(S[Idx], Codepage) then
result := 2;
end;
end;
{$IFDEF UNICODE}
function MecsCharLength(const S: UnicodeString; Index: Integer): Integer;
begin
result := 1;
if S = '' then
Exit;
if Index <= 0 then
Exit;
if MecsIsLeadElement(S[Index]) then
result := 2;
end;
{$ENDIF}
function MecsCharLength(const S: WideString; Index: Integer): Integer;
begin
result := 1;
if S = '' then
Exit;
if Index <= 0 then
Exit;
if MecsIsLeadElement(S[Index]) then
result := 2;
end;
// MecsCharToElementLen
{$IFDEF UNICODE}
function MecsCharToElementLen(const S: RawByteString; MaxLen: Integer): Integer;
{$ELSE}
function MecsCharToElementLen(const S: AnsiString; MaxLen: Integer): Integer;
{$ENDIF}
begin
result := MecsCharToElementLen(S, MaxLen, MecsGetCodePage(S));
end;
{$IFDEF UNICODE}
function MecsCharToElementLen(const S: RawByteString; MaxLen: Integer; Codepage: DWORD): Integer;
{$ELSE}
function MecsCharToElementLen(const S: AnsiString; MaxLen: Integer; Codepage: DWORD): Integer;
{$ENDIF}
{$IFDEF UNICODE}
function _MecsCharToElementLen(const S: RawByteString; MaxLen: Integer; Codepage: DWORD): Integer;
{$ELSE}
function _MecsCharToElementLen(const S: AnsiString; MaxLen: Integer; Codepage: DWORD): Integer;
{$ENDIF}
var
P, O: PAnsiChar;
L, ChrCnt: Integer;
J: Byte;
LeadBytes: TLeadBytes;
begin
result := 0;
L := System.Length(S);
if L = 0 then
Exit;
if CodePage = CP_UTF8 then
begin
ChrCnt := 0;
P := PAnsiChar(S);
O := P;
while (P^ <> #$00) and (result = 0) do
begin
if (Byte(P^) and $80) = $00 then
begin
Inc(ChrCnt);
if ChrCnt = MaxLen then
begin
result := P - O + 1;
Exit;
end;
end
else if (Byte(P^) and $C0) = $C0 then
begin
Inc(ChrCnt);
if ChrCnt = MaxLen then
begin
for J:=2 to 6 do
begin
if (Byte(P^) and ($80 shr J)) = 0 then
begin
result := P - O + J;
Break;
end;
end;
Exit;
end;
end;
Inc(P);
end;
end
else
begin
LeadBytes := MecsGetLeadBytes(CodePage);
if LeadBytes = [] then
result := MaxLen
else
begin
ChrCnt := 0;
P := PAnsiChar(S);
O := P;
while (P^ <> #$00) and (result = 0) do
begin
Inc(ChrCnt);
if ChrCnt = MaxLen then
result := P - O + 1;
if P^ in LeadBytes then
begin
Inc(P);
if ChrCnt = MaxLen then
begin
result := P - O + 1;
Break;
end;
if P^ = #$00 then
Break;
end;
Inc(P);
end;
end;
end;
end;
begin
result := _MecsCharToElementLen(S, MaxLen, CodePage);
if result < MaxLen then
result := System.Length(S);
end;
{$IFDEF UNICODE}
function MecsCharToElementLen(const S: UnicodeString; MaxLen: Integer): Integer;
begin
result := MecsCharToElementIndex(S, MaxLen + 1) - 1;
if result < MaxLen then
result := System.Length(S);
end;
{$ENDIF}
function MecsCharToElementLen(const S: WideString; MaxLen: Integer): Integer;
begin
result := MecsCharToElementIndex(S, MaxLen + 1) - 1;
if result < MaxLen then
result := System.Length(S);
end;
// MecsCopy
{$IFDEF UNICODE}
function MecsCopy(const S: RawByteString; Index, Count: Integer): RawByteString;
{$ELSE}
function MecsCopy(const S: AnsiString; Index, Count: Integer): AnsiString;
{$ENDIF}
begin
result := MecsCopy(S, Index, Count, MecsGetCodePage(S));
end;
{$IFDEF UNICODE}
function MecsCopy(const S: RawByteString; Index, Count: Integer; Codepage: DWORD): RawByteString;
{$ELSE}
function MecsCopy(const S: AnsiString; Index, Count: Integer; Codepage: DWORD): AnsiString;
{$ENDIF}
var
AIC: TIndexCount;
begin
AIC := MecsCharToElementIndexCount(S, Index, Count, CodePage);
if AIC.StartIndex > 0 then
result := Copy(S, AIC.StartIndex, AIC.Count)
else
result := '';
end;
{$IFDEF UNICODE}
function MecsCopy(const S: UnicodeString; Index, Count: Integer): UnicodeString;
var
WIC: TIndexCount;
begin
WIC := MecsCharToElementIndexCount(S, Index, Count);
if WIC.StartIndex > 0 then
result := Copy(S, WIC.StartIndex, WIC.Count)
else
result := '';
end;
{$ENDIF}
function MecsCopy(const S: WideString; Index, Count: Integer): WideString;
var
WIC: TIndexCount;
begin
WIC := MecsCharToElementIndexCount(S, Index, Count);
if WIC.StartIndex > 0 then
result := Copy(S, WIC.StartIndex, WIC.Count)
else
result := '';
end;
// MecsDelete
{$IFDEF UNICODE}
procedure MecsDelete(var S: RawByteString; Index, Count: Integer);
{$ELSE}
procedure MecsDelete(var S: AnsiString; Index, Count: Integer);
{$ENDIF}
begin
MecsDelete(S, Index, Count, MecsGetCodePage(S));
end;
{$IFDEF UNICODE}
procedure MecsDelete(var S: RawByteString; Index, Count: Integer; Codepage: DWORD);
{$ELSE}
procedure MecsDelete(var S: AnsiString; Index, Count: Integer; Codepage: DWORD);
{$ENDIF}
var
AIC: TIndexCount;
begin
AIC := MecsCharToElementIndexCount(S, Index, Count, CodePage);
if AIC.StartIndex > 0 then
Delete(S, AIC.StartIndex, AIC.Count);
end;
{$IFDEF UNICODE}
procedure MecsDelete(var S: UnicodeString; Index, Count: Integer);
var
UIC: TIndexCount;
begin
UIC := MecsCharToElementIndexCount(S, Index, Count);
if UIC.StartIndex > 0 then
Delete(S, UIC.StartIndex, UIC.Count);
end;
{$ENDIF}
procedure MecsDelete(var S: WideString; Index, Count: Integer);
var
WIC: TIndexCount;
begin
WIC := MecsCharToElementIndexCount(S, Index, Count);
if WIC.StartIndex > 0 then
Delete(S, WIC.StartIndex, WIC.Count);
end;
// MecsElementToCharIndex
{$IFDEF UNICODE}
function MecsElementToCharIndex(const S: RawByteString; Index: Integer): Integer;
{$ELSE}
function MecsElementToCharIndex(const S: AnsiString; Index: Integer): Integer;
{$ENDIF}
begin
result := MecsElementToCharIndex(S, Index, MecsGetCodePage(S));
end;
{$IFDEF UNICODE}
function MecsElementToCharIndex(const S: RawByteString; Index: Integer; Codepage: DWORD): Integer;
{$ELSE}
function MecsElementToCharIndex(const S: AnsiString; Index: Integer; Codepage: DWORD): Integer;
{$ENDIF}
var
P, O: PAnsiChar;
L, ChrCnt: Integer;
LeadBytes: TLeadBytes;
begin
result := 0;
L := System.Length(S);
if L = 0 then
Exit;
result := 0;
if CodePage = CP_UTF8 then
begin
ChrCnt := 0;
P := PAnsiChar(S);
O := P;
while (P^ <> #$00) and (result = 0) do
begin
if (Byte(P^) and $80) = $00 then
Inc(ChrCnt)
else if (Byte(P^) and $C0) = $C0 then
Inc(ChrCnt);
if Index = (P - O + 1) then
begin
result := ChrCnt;
Break;
end;
Inc(P);
end;
end
else
begin
LeadBytes := MecsGetLeadBytes(CodePage);
if LeadBytes = [] then
result := Index
else
begin
ChrCnt := 0;
P := PAnsiChar(S);
O := P;
while (P^ <> #$00) and (result = 0) do
begin
Inc(ChrCnt);
if Index = (P - O + 1) then
begin
result := ChrCnt;
Break;
end;
if P^ in LeadBytes then
begin
Inc(P);
if P^ = #$00 then
Break;
if Index = (P - O + 1) then
begin
result := ChrCnt;
Break;
end;
end;
Inc(P);
end;
end;
end;
end;
{$IFDEF UNICODE}
function MecsElementToCharIndex(const S: UnicodeString; Index: Integer): Integer;
var
P, O: PUnicodeChar;
L, ChrCnt: Integer;
begin
result := 0;
L := System.Length(S);
if L = 0 then
Exit;
ChrCnt := 0;
P := PUnicodeChar(S);
O := P;
while (P^ <> #$0000) and (result = 0) do
begin
Inc(ChrCnt);
if Index = (P - O + 1) then
begin
result := ChrCnt;
Break;
end;
if MecsIsLeadElement(P^) then
begin
Inc(P);
if P^ = #$0000 then
Break;
if Index = (P - O + 1) then
begin
result := ChrCnt;
Break;
end;
end;
Inc(P);
end;
end;
{$ENDIF}
function MecsElementToCharIndex(const S: WideString; Index: Integer): Integer;
var
P, O: PWideChar;
L, ChrCnt: Integer;
begin
result := 0;
L := System.Length(S);
if L = 0 then
Exit;
ChrCnt := 0;
P := PWideChar(S);
O := P;
while (P^ <> #$0000) and (result = 0) do
begin
Inc(ChrCnt);
if Index = (P - O + 1) then
begin
result := ChrCnt;
Break;
end;
if MecsIsLeadElement(P^) then
begin
Inc(P);
if P^ = #$0000 then
Break;
if Index = (P - O + 1) then
begin
result := ChrCnt;
Break;
end;
end;
Inc(P);
end;
end;
// MecsElementToCharLen
{$IFDEF UNICODE}
function MecsElementToCharLen(const S: RawByteString; MaxLen: Integer): Integer;
{$ELSE}
function MecsElementToCharLen(const S: AnsiString; MaxLen: Integer): Integer;
{$ENDIF}
begin
result := MecsElementToCharLen(S, MaxLen, MecsGetCodePage(S));
end;
{$IFDEF UNICODE}
function MecsElementToCharLen(const S: RawByteString; MaxLen: Integer; Codepage: DWORD): Integer;
{$ELSE}
function MecsElementToCharLen(const S: AnsiString; MaxLen: Integer; Codepage: DWORD): Integer;
{$ENDIF}
begin
result := MecsElementToCharIndex(S, MaxLen, CodePage);
if result = 0 then
result := MecsLength(S, CodePage);
end;
{$IFDEF UNICODE}
function MecsElementToCharLen(const S: UnicodeString; MaxLen: Integer): Integer;
begin
result := MecsElementToCharIndex(S, MaxLen);
if result = 0 then
result := MecsLength(S);
end;
{$ENDIF}
function MecsElementToCharLen(const S: WideString; MaxLen: Integer): Integer;
begin
result := MecsElementToCharIndex(S, MaxLen);
if result = 0 then
result := MecsLength(S);
end;
// MecsElementType
{$IFDEF UNICODE}
function MecsElementType(const S: RawByteString; Index: Integer): TElementType;
{$ELSE}
function MecsElementType(const S: AnsiString; Index: Integer): TElementType;
{$ENDIF}
begin
result := MecsStrElementType(PAnsiChar(S), Index - 1, MecsGetCodePage(S));
end;
{$IFDEF UNICODE}
function MecsElementType(const S: RawByteString; Index: Integer; Codepage: DWORD): TElementType;
{$ELSE}
function MecsElementType(const S: AnsiString; Index: Integer; Codepage: DWORD): TElementType;
{$ENDIF}
begin
result := MecsStrElementType(PAnsiChar(S), Index - 1, CodePage);
end;
{$IFDEF UNICODE}
function MecsElementType(const S: UnicodeString; Index: Integer): TElementType;
begin
result := MecsStrElementType(PUnicodeChar(S), Index - 1);
end;
{$ENDIF}
function MecsElementType(const S: WideString; Index: Integer): TElementType;
begin
result := MecsStrElementType(PWideChar(S), Index - 1);
end;
// MecsInsert
{$IFDEF UNICODE}
procedure MecsInsert(const Source: RawByteString; var S: RawByteString; Index: Integer);
{$ELSE}
procedure MecsInsert(const Source: AnsiString; var S: AnsiString; Index: Integer);
{$ENDIF}
begin
MecsInsert(Source, S, Index, MecsGetCodePage(S));
end;
{$IFDEF UNICODE}
procedure MecsInsert(const Source: RawByteString; var S: RawByteString; Index: Integer; Codepage: DWORD);
{$ELSE}
procedure MecsInsert(const Source: AnsiString; var S: AnsiString; Index: Integer; Codepage: DWORD);
{$ENDIF}
var
Idx: Integer;
begin
Idx := MecsCharToElementIndex(S, Index, CodePage);
if Idx > 0 then
Insert(Source, S, Idx)
else
S := S + Source;
end;
{$IFDEF UNICODE}
procedure MecsInsert(const Source: UnicodeString; var S: UnicodeString; Index: Integer);
var
Idx: Integer;
begin
Idx := MecsCharToElementIndex(S, Index);
if Idx > 0 then
Insert(Source, S, Idx)
else
S := S + Source;
end;
{$ENDIF}
procedure MecsInsert(const Source: WideString; var S: WideString; Index: Integer);
var
Idx: Integer;
begin
Idx := MecsCharToElementIndex(S, Index);
if Idx > 0 then
Insert(Source, S, Idx)
else
S := S + Source;
end;
// MecsIsFullWidth
{$IFDEF UNICODE}
function MecsIsFullWidth(const S: RawByteString; CharIndex: Integer): Boolean;
{$ELSE}
function MecsIsFullWidth(const S: AnsiString; CharIndex: Integer): Boolean;
{$ENDIF}
begin
result := MecsIsFullWidth(S, CharIndex, SysLocale.FarEast, MecsGetCodePage(S));
end;
{$IFDEF UNICODE}
function MecsIsFullWidth(const S: RawByteString; CharIndex: Integer; FarEast: Boolean): Boolean;
{$ELSE}
function MecsIsFullWidth(const S: AnsiString; CharIndex: Integer; FarEast: Boolean): Boolean;
{$ENDIF}
begin
result := MecsIsFullWidth(S, CharIndex, FarEast, MecsGetCodePage(S));
end;
{$IFDEF UNICODE}
function MecsIsFullWidth(const S: RawByteString; CharIndex: Integer; FarEast: Boolean; Codepage: DWORD): Boolean;
{$ELSE}
function MecsIsFullWidth(const S: AnsiString; CharIndex: Integer; FarEast: Boolean; Codepage: DWORD): Boolean;
{$ENDIF}
var
IC: TIndexCount;
begin
if CodePage = CP_UTF8 then
{$IFDEF UNICODE}
result := MecsIsFullWidth(UTF8ToUnicodeString(S), CharIndex, FarEast)
{$ELSE}
result := MecsIsFullWidth(MECSUtils.UTF8ToUTF16(S), CharIndex, FarEast)
{$ENDIF}
else
begin
IC := MecsCharToElementIndexCount(S, CharIndex, 1, Codepage);
result := (IC.Count > 1);
end;
end;
{$IFDEF UNICODE}
function MecsIsFullWidth(const S: UnicodeString; CharIndex: Integer): Boolean;
begin
result := MecsIsFullWidth(S, CharIndex, SysLocale.FarEast);
end;
{$ENDIF}
{$IFDEF UNICODE}
function MecsIsFullWidth(const S: UnicodeString; CharIndex: Integer; FarEast: Boolean): Boolean;
var
UCS4: UCS4String;
begin
result := False;
UCS4 := UTF16ToUTF32(S);
if Length(UCS4) < 0 then
Exit;
if CharIndex < 1 then
Exit;
if CharIndex >= Length(UCS4) then
Exit;
case MecsEastAsianWidth(UCS4[CharIndex - 1]) of
eawWide,
eawFullwidth:
result := True;
eawAmbiguous:
result := FarEast;
else
result := False;
end;
end;
{$ENDIF}
function MecsIsFullWidth(const S: WideString; CharIndex: Integer): Boolean;
begin
result := MecsIsFullWidth(S, CharIndex, SysLocale.FarEast);
end;
function MecsIsFullWidth(const S: WideString; CharIndex: Integer; FarEast: Boolean): Boolean;
var
UCS4: UCS4String;
begin
result := False;
UCS4 := UTF16ToUTF32(S);
if Length(UCS4) < 0 then
Exit;
if CharIndex < 1 then
Exit;
if CharIndex >= Length(UCS4) then
Exit;
case MecsEastAsianWidth(UCS4[CharIndex - 1]) of
eawWide,
eawFullwidth:
result := True;
eawAmbiguous:
result := FarEast;
else
result := False;
end;
end;
// MecsIsLeadElement
function MecsIsLeadElement(TestChar: AnsiChar): Boolean;
begin
{ Not recommended }
result := MecsIsLeadElement(TestChar, DefaultAnsiCodePage);
end;
function MecsIsLeadElement(TestChar: AnsiChar; Codepage: DWORD): Boolean;
var
LeadBytes: TLeadBytes;
begin
result := False;
if TestChar = '' then
Exit;
if CodePage = CP_UTF8 then
result := ((Byte(TestChar) and $C0) = $C0)
else
begin
LeadBytes := MecsGetLeadBytes(CodePage);
if LeadBytes = [] then
Exit;
result := (TestChar in LeadBytes);
end;
end;
{$IFDEF UNICODE}
function MecsIsLeadElement(TestChar: UnicodeChar): Boolean;
{$ELSE}
function MecsIsLeadElement(TestChar: WideChar): Boolean;
{$ENDIF}
begin
result := ((WORD(TestChar) shr 8) in [$D8..$DB]);
end;
// MecsIsMECElement
function MecsIsMECElement(TestChar: AnsiChar): Boolean; overload;
begin
{ Not recommended }
result := MecsIsMECElement(TestChar, DefaultAnsiCodePage);
end;
function MecsIsMECElement(TestChar: AnsiChar; Codepage: DWORD): Boolean; overload;
begin
result := (MecsElementType(TestChar, 1, Codepage) <> etSingle);
end;
{$IFDEF UNICODE}
function MecsIsMECElement(TestChar: UnicodeChar): Boolean;
{$ELSE}
function MecsIsMECElement(TestChar: WideChar): Boolean;
{$ENDIF}
begin
result := ((WORD(TestChar) and $D800) = $D800);
end;
// MecsIsTrailElement
function MecsIsTrailElement(TestChar: AnsiChar): Boolean;
begin
{ Not recommended }
result := MecsIsTrailElement(TestChar, DefaultAnsiCodePage);
end;
function MecsIsTrailElement(TestChar: AnsiChar; Codepage: DWORD): Boolean;
begin
result := (MecsElementType(TestChar, 1, Codepage) = etTrail);
end;
{$IFDEF UNICODE}
function MecsIsTrailElement(TestChar: UnicodeChar): Boolean;
{$ELSE}
function MecsIsTrailElement(TestChar: WideChar): Boolean;
{$ENDIF}
begin
result := ((WORD(TestChar) shr 8) in [$DC..$DF]);
end;
// MecsLeftStr
{$IFDEF UNICODE}
function MecsLeftStr(const AText: RawByteString; const ACount: Integer): RawByteString;
{$ELSE}
function MecsLeftStr(const AText: AnsiString; const ACount: Integer): AnsiString;
{$ENDIF}
begin
result := MecsCopy(AText, 1, ACount, MecsGetCodePage(AText));
end;
{$IFDEF UNICODE}
function MecsLeftStr(const AText: RawByteString; const ACount: Integer; Codepage: DWORD): RawByteString;
{$ELSE}
function MecsLeftStr(const AText: AnsiString; const ACount: Integer; Codepage: DWORD): AnsiString;
{$ENDIF}
begin
result := MecsCopy(AText, 1, ACount, CodePage);
end;
{$IFDEF UNICODE}
function MecsLeftStr(const AText: UnicodeString; const ACount: Integer): UnicodeString;
begin
result := MecsCopy(AText, 1, ACount);
end;
{$ENDIF}
function MecsLeftStr(const AText: WideString; const ACount: Integer): WideString;
begin
result := MecsCopy(AText, 1, ACount);
end;
// MecsLength
{$IFDEF UNICODE}
function MecsLength(const S: RawByteString): Integer;
{$ELSE}
function MecsLength(const S: AnsiString): Integer;
{$ENDIF}
begin
result := MecsStrLen(PAnsiChar(S), MecsGetCodePage(S));
end;
{$IFDEF UNICODE}
function MecsLength(const S: RawByteString; Codepage: DWORD): Integer;
{$ELSE}
function MecsLength(const S: AnsiString; Codepage: DWORD): Integer;
{$ENDIF}
begin
result := MecsStrLen(PAnsiChar(S), CodePage);
end;
{$IFDEF UNICODE}
function MecsLength(const S: UnicodeString): Integer;
begin
result := MecsStrLen(PUnicodeChar(S));
end;
{$ENDIF}
function MecsLength(const S: WideString): Integer;
begin
result := MecsStrLen(PWideChar(S));
end;
// MecsMidStr
{$IFDEF UNICODE}
function MecsMidStr(const AText: RawByteString; const AStart, ACount: Integer): RawByteString;
{$ELSE}
function MecsMidStr(const AText: AnsiString; const AStart, ACount: Integer): AnsiString;
{$ENDIF}
begin
result := MecsCopy(AText, AStart, ACount);
end;
{$IFDEF UNICODE}
function MecsMidStr(const AText: RawByteString; const AStart, ACount: Integer; Codepage: DWORD): RawByteString;
{$ELSE}
function MecsMidStr(const AText: AnsiString; const AStart, ACount: Integer; Codepage: DWORD): AnsiString;
{$ENDIF}
begin
result := MecsCopy(AText, AStart, ACount, Codepage);
end;
{$IFDEF UNICODE}
function MecsMidStr(const AText: UnicodeString; const AStart, ACount: Integer): UnicodeString;
begin
result := MecsCopy(AText, AStart, ACount);
end;
{$ENDIF}
function MecsMidStr(const AText: WideString; const AStart, ACount: Integer): WideString;
begin
result := MecsCopy(AText, AStart, ACount);
end;
// MecsNextCharIndex
{$IFDEF UNICODE}
function MecsNextCharIndex(const S: RawByteString; Index: Integer): Integer;
{$ELSE}
function MecsNextCharIndex(const S: AnsiString; Index: Integer): Integer;
{$ENDIF}
begin
result := MecsNextCharIndex(S, Index, MecsGetCodePage(S));
end;
{$IFDEF UNICODE}
function MecsNextCharIndex(const S: RawByteString; Index: Integer; Codepage: DWORD): Integer;
{$ELSE}
function MecsNextCharIndex(const S: AnsiString; Index: Integer; Codepage: DWORD): Integer;
{$ENDIF}
var
Idx: Integer;
begin
Idx := MecsElementToCharIndex(S, Index, CodePage);
result := MecsCharToElementLen(S, Idx, CodePage) + 1;
end;
{$IFDEF UNICODE}
function MecsNextCharIndex(const S: UnicodeString; Index: Integer): Integer;
var
Idx: Integer;
begin
Idx := MecsElementToCharIndex(S, Index);
result := MecsCharToElementLen(S, Idx) + 1;
end;
{$ENDIF}
function MecsNextCharIndex(const S: WideString; Index: Integer): Integer;
var
Idx: Integer;
begin
Idx := MecsElementToCharIndex(S, Index);
result := MecsCharToElementLen(S, Idx) + 1;
end;
// MecsReverseString
{$IFDEF UNICODE}
function MecsReverseString(const AText: RawByteString): RawByteString;
{$ELSE}
function MecsReverseString(const AText: AnsiString): AnsiString;
{$ENDIF}
begin
result := MecsReverseString(AText, MecsGetCodePage(AText));
end;
{$IFDEF UNICODE}
function MecsReverseString(const AText: RawByteString; Codepage: DWORD): RawByteString;
{$ELSE}
function MecsReverseString(const AText: AnsiString; Codepage: DWORD): AnsiString;
{$ENDIF}
var
i: Integer;
begin
result := '';
for i:=1 to MecsLength(AText) do
result := MecsCopy(AText, i, 1) + result;
end;
{$IFDEF UNICODE}
function MecsReverseString(const AText: UnicodeString): UnicodeString;
var
i: Integer;
begin
result := '';
for i:=1 to System.Length(AText) do
begin
case MecsElementType(AText, i) of
etLead:;
etTrail:
result := AText[i-1] + AText[i] + result;
else
result := AText[i] + result;
end;
end;
end;
{$ENDIF}
function MecsReverseString(const AText: WideString): WideString;
var
i: Integer;
begin
result := '';
for i:=1 to System.Length(AText) do
begin
case MecsElementType(AText, i) of
etLead:;
etTrail:
result := AText[i-1] + AText[i] + result;
else
result := AText[i] + result;
end;
end;
end;
// MecsRightStr
{$IFDEF UNICODE}
function MecsRightStr(const AText: RawByteString; const ACount: Integer; Codepage: DWORD): RawByteString;
{$ELSE}
function MecsRightStr(const AText: AnsiString; const ACount: Integer; Codepage: DWORD): AnsiString;
{$ENDIF}
var
Idx: Integer;
begin
Idx := MecsLength(AText, CodePage);
if (Idx - ACount + 1) > 0 then
result := MecsCopy(AText, Idx - ACount + 1, ACount, CodePage)
else
result := AText;
end;
{$IFDEF UNICODE}
function MecsRightStr(const AText: RawByteString; const ACount: Integer): RawByteString;
{$ELSE}
function MecsRightStr(const AText: AnsiString; const ACount: Integer): AnsiString;
{$ENDIF}
begin
result := MecsRightStr(AText, ACount, MecsGetCodePage(AText));
end;
{$IFDEF UNICODE}
function MecsRightStr(const AText: UnicodeString; const ACount: Integer): UnicodeString;
var
Idx: Integer;
begin
Idx := MecsLength(AText);
if (Idx - ACount + 1) > 0 then
result := MecsCopy(AText, Idx - ACount + 1, ACount)
else
result := AText;
end;
{$ENDIF}
function MecsRightStr(const AText: WideString; const ACount: Integer): WideString;
var
Idx: Integer;
begin
Idx := MecsLength(AText);
if (Idx - ACount + 1) > 0 then
result := MecsCopy(AText, Idx - ACount + 1, ACount)
else
result := AText;
end;
// MecsStrCharLength
function MecsStrCharLength(const Str: PAnsiChar): Integer;
begin
{ Not recommended }
result := MecsStrCharLength(Str, DefaultAnsiCodePage);
end;
function MecsStrCharLength(const Str: PAnsiChar; Codepage: DWORD): Integer;
begin
if (Str = nil) or (Str = '') then
result := 0
else
result := MecsCharLength(StrPas(Str), 1, Codepage);
end;
{$IFDEF UNICODE}
function MecsStrCharLength(const Str: PUnicodeChar): Integer;
begin
if (Str = nil) or (Str = '') then
result := 0
else
result := MecsCharLength(Str^, 1);
end;
{$ENDIF}
function MecsStrCharLength(const Str: PWideChar): Integer;
begin
if (Str = nil) or (Str = '') then
result := 0
else
result := MecsCharLength(Str^, 1);
end;
// MecsStrElementType
function MecsStrElementType(const Str: PAnsiChar; Index: Cardinal): TElementType;
begin
{ Not recommended }
result := MecsStrElementType(Str, Index, DefaultAnsiCodePage);
end;
function MecsStrElementType(const Str: PAnsiChar; Index: Cardinal; Codepage: DWORD): TElementType;
var
P: PAnsiChar;
i: Integer;
LeadBytes: TLeadBytes;
begin
result := etSingle;
if Str = nil then
Exit;
if (Index <= 0) or (Index + 1 > SysUtils.StrLen(Str)) then
Exit;
P := Str;
if CodePage = CP_UTF8 then
begin
Inc(P, Index);
if (Byte(P^) and $80) = $00 then
result := etSingle
else if (Byte(P^) and $C0) = $C0 then
result := etLead
else
result := etTrail;
end
else
begin
LeadBytes := MecsGetLeadBytes(CodePage);
if LeadBytes = [] then
Exit;
if (P = nil) or ((P + Index)^ = #$00) then
Exit;
if (Index = 0) then
begin
if P^ in LeadBytes then
result := etLead;
end
else
begin
i := Index - 1;
while (i >= 0) and ((P + i)^ in LeadBytes) do
Dec(i);
if ((Index - Cardinal(i)) mod 2) = 0 then
result := etTrail
else if (P + Index)^ in LeadBytes then
result := etLead;
end;
end;
end;
{$IFDEF UNICODE}
function MecsStrElementType(const Str: PUnicodeChar; Index: Cardinal): TElementType;
begin
result := etSingle;
if Str = nil then
Exit;
if (Index <= 0) or ((Index + 1) > Cardinal(SysUtils.StrLen(PWideChar(Str)))) then
Exit;
if MecsIsLeadElement(Str[Index]) then
result := etLead
else if MecsIsTrailElement(Str[Index]) then
result := etTrail;
end;
{$ENDIF}
function MecsStrElementType(const Str: PWideChar; Index: Cardinal): TElementType;
begin
result := etSingle;
if Str = nil then
Exit;
{$IFDEF UNICODE}
if (Index <= 0) or ((Index + 1) > Cardinal(SysUtils.StrLen(Str))) then
{$ELSE}
if (Index <= 0) or ((Index + 1) > Cardinal(MECSUtils.StrLen(Str))) then
{$ENDIF}
Exit;
if MecsIsLeadElement(Str[Index]) then
result := etLead
else if MecsIsTrailElement(Str[Index]) then
result := etTrail;
end;
// MecsStrLen
function MecsStrLen(const Str: PAnsiChar): Cardinal;
begin
{ Not recommended }
result := MecsStrLen(Str, DefaultAnsiCodePage);
end;
function MecsStrLen(const Str: PAnsiChar; Codepage: DWORD): Cardinal;
var
P: PAnsiChar;
L, ChrCnt: Integer;
LeadBytes: TLeadBytes;
begin
result := 0;
L := SysUtils.StrLen(Str);
if L = 0 then
Exit;
if CodePage = CP_UTF8 then
begin
ChrCnt := 0;
P := Str;
while P^ <> #$00 do
begin
if (Byte(P^) and $80) = $00 then
Inc(ChrCnt)
else if (Byte(P^) and $C0) = $C0 then
Inc(ChrCnt);
Inc(P);
end;
end
else
begin
LeadBytes := MecsGetLeadBytes(CodePage);
if LeadBytes = [] then
ChrCnt := L
else
begin
ChrCnt := 0;
P := Str;
while P^ <> #$00 do
begin
Inc(ChrCnt);
if P^ in LeadBytes then
begin
Inc(P);
if P^ = #$00 then
Break;
end;
Inc(P);
end;
end;
end;
result := ChrCnt;
end;
{$IFDEF UNICODE}
function MecsStrLen(const Str: PUnicodeChar): Cardinal;
var
P: PUnicodeChar;
ChrCnt: Integer;
begin
ChrCnt := 0;
P := Str;
while P^ <> #$0000 do
begin
if MecsIsLeadElement(P^) then
Inc(ChrCnt)
else if not MecsIsTrailElement(P^) then
Inc(ChrCnt);
Inc(P);
end;
result := ChrCnt;
end;
{$ENDIF}
function MecsStrLen(const Str: PWideChar): Cardinal;
var
P: PWideChar;
ChrCnt: Integer;
begin
ChrCnt := 0;
P := Str;
while P^ <> #$0000 do
begin
if MecsIsLeadElement(P^) then
Inc(ChrCnt)
else if not MecsIsTrailElement(P^) then
Inc(ChrCnt);
Inc(P);
end;
result := ChrCnt;
end;
// MecsStrNextChar
function MecsStrNextChar(const Str: PAnsiChar): PAnsiChar;
begin
{ Not recommended }
result := MecsStrNextChar(Str, DefaultAnsiCodePage);
end;
function MecsStrNextChar(const Str: PAnsiChar; Codepage: DWORD): PAnsiChar;
begin
result := str + MecsStrCharLength(str, CodePage);
end;
{$IFDEF UNICODE}
function MecsStrNextChar(const Str: PUnicodeChar): PUnicodeChar;
begin
result := str + MecsStrCharLength(str);
end;
{$ENDIF}
function MecsStrNextChar(const Str: PWideChar): PWideChar;
begin
result := str + MecsStrCharLength(str);
end;
// for UnicodeString/WideString (Combining Character Sequence)
// -----------------------------------------------------------------------------
// MecsCCSLength
{$IFDEF UNICODE}
function MecsCCSLength(const S: UnicodeString; Index: Integer): Integer; overload;
var
IC: TIndexCount;
begin
IC := MecsCCSToElementIndexCount(S, Index, 1);
if IC.StartIndex = 0 then
result := 1
else
result := IC.Count;
end;
{$ENDIF}
function MecsCCSLength(const S: WideString; Index: Integer): Integer; overload;
var
IC: TIndexCount;
begin
IC := MecsCCSToElementIndexCount(S, Index, 1);
if IC.StartIndex = 0 then
result := 1
else
result := IC.Count;
end;
// MecsCCSToElementIndex
{$IFDEF UNICODE}
function MecsCCSToElementIndex(const S: UnicodeString; Index: Integer): Integer; overload;
var
L, ChrCnt: Integer;
UCS4 : UCS4String;
i: Integer;
begin
result := 0;
L := System.Length(S);
if L = 0 then
Exit;
ChrCnt := 0;
UCS4 := UTF16toUTF32(S);
for i:=0 to Length(UCS4) - 1 do
begin
if MecsCombiningType(UCS4[i]) = ctBase then
Inc(ChrCnt);
if (ChrCnt = Index) then
begin
result := MecsCharToElementIndex(S, i + 1);
break;
end;
end;
end;
{$ENDIF}
function MecsCCSToElementIndex(const S: WideString; Index: Integer): Integer; overload;
var
L, ChrCnt: Integer;
UCS4 : UCS4String;
i: Integer;
begin
result := 0;
L := System.Length(S);
if L = 0 then
Exit;
ChrCnt := 0;
UCS4 := UTF16toUTF32(S);
for i:=0 to Length(UCS4) - 1 do
begin
if MecsCombiningType(UCS4[i]) = ctBase then
Inc(ChrCnt);
if (ChrCnt = Index) then
begin
result := MecsCharToElementIndex(S, i + 1);
break;
end;
end;
end;
// MecsCCSToElementIndexCount
{$IFDEF UNICODE}
function MecsCCSToElementIndexCount(const S: UnicodeString; Index, Count: Integer): TIndexCount; overload;
var
ChrCnt, L: Integer;
UCS4 : UCS4String;
i: Integer;
begin
result.StartIndex := Index;
result.EndIndex := Index + Count - 1;
result.Count := Count;
L := System.Length(S);
if L = 0 then
Exit;
result.StartIndex := 0;
result.EndIndex := 0;
result.Count := 0;
ChrCnt := 0;
UCS4 := UTF16toUTF32(S);
for i:=0 to Length(UCS4) - 1 do
begin
if MecsCombiningType(UCS4[i]) = ctBase then
Inc(ChrCnt);
if (ChrCnt = Index) and (result.StartIndex = 0) then
result.StartIndex := MecsCharToElementIndex(S, i + 1);
if (ChrCnt = (Index + Count - 1)) and (result.StartIndex > 0) then
result.EndIndex := MecsCharToElementLen(S, i + 1)
else if ChrCnt > (Index + Count - 1) then
break;
end;
if (result.EndIndex = 0) and ((ChrCnt - Index) < Count) then
result.EndIndex := L;
result.Count := result.EndIndex - result.StartIndex + 1;
end;
{$ENDIF}
function MecsCCSToElementIndexCount(const S: WideString; Index, Count: Integer): TIndexCount; overload;
var
ChrCnt, L: Integer;
UCS4 : UCS4String;
i: Integer;
begin
result.StartIndex := Index;
result.EndIndex := Index + Count - 1;
result.Count := Count;
L := System.Length(S);
if L = 0 then
Exit;
result.StartIndex := 0;
result.EndIndex := 0;
result.Count := 0;
ChrCnt := 0;
UCS4 := UTF16toUTF32(S);
for i:=0 to Length(UCS4) - 1 do
begin
if MecsCombiningType(UCS4[i]) = ctBase then
Inc(ChrCnt);
if (ChrCnt = Index) and (result.StartIndex = 0) then
result.StartIndex := MecsCharToElementIndex(S, i + 1);
if (ChrCnt = (Index + Count - 1)) and (result.StartIndex > 0) then
result.EndIndex := MecsCharToElementLen(S, i + 1)
else if ChrCnt > (Index + Count - 1) then
break;
end;
if (result.EndIndex = 0) and ((ChrCnt - Index) < Count) then
result.EndIndex := L;
result.Count := result.EndIndex - result.StartIndex + 1;
end;
// MecsCCSToElementLen
{$IFDEF UNICODE}
function MecsCCSToElementLen(const S: UnicodeString; MaxLen: Integer): Integer; overload;
begin
result := MecsCCSToElementIndex(S, MaxLen + 1) - 1;
if result < MaxLen then
result := System.Length(S);
end;
{$ENDIF}
function MecsCCSToElementLen(const S: WideString; MaxLen: Integer): Integer; overload;
begin
result := MecsCCSToElementIndex(S, MaxLen + 1) - 1;
if result < MaxLen then
result := System.Length(S);
end;
// MecsElementToCCSIndex
{$IFDEF UNICODE}
function MecsElementToCCSIndex(const S: UnicodeString; Index: Integer): Integer; overload;
var
L, ChrCnt: Integer;
UCS4: UCS4String;
i, Idx: Integer;
begin
result := 0;
L := System.Length(S);
if L = 0 then
Exit;
Idx := MecsElementToCharIndex(S, Index);
ChrCnt := 0;
UCS4 := UTF16ToUTF32(S);
for i:=0 to Length(UCS4)-1 do
begin
if MecsCombiningType(UCS4[i]) = ctBase then
Inc(ChrCnt);
if i = (Idx - 1) then
begin
result := ChrCnt;
Break;
end;
end;
end;
{$ENDIF}
function MecsElementToCCSIndex(const S: WideString; Index: Integer): Integer; overload;
var
L, ChrCnt: Integer;
UCS4: UCS4String;
i, Idx: Integer;
begin
result := 0;
L := System.Length(S);
if L = 0 then
Exit;
Idx := MecsElementToCharIndex(S, Index);
ChrCnt := 0;
UCS4 := UTF16ToUTF32(S);
for i:=0 to Length(UCS4)-1 do
begin
if MecsCombiningType(UCS4[i]) = ctBase then
Inc(ChrCnt);
if i = (Idx - 1) then
begin
result := ChrCnt;
Break;
end;
end;
end;
// MecsElementToCCSLen
{$IFDEF UNICODE}
function MecsElementToCCSLen(const S: UnicodeString; MaxLen: Integer): Integer; overload;
begin
result := MecsElementToCCSIndex(S, MaxLen);
if result = 0 then
result := MecsLengthC(S);
end;
{$ENDIF}
function MecsElementToCCSLen(const S: WideString; MaxLen: Integer): Integer; overload;
begin
result := MecsElementToCCSIndex(S, MaxLen);
if result = 0 then
result := MecsLengthC(S);
end;
// MecsDeleteC
{$IFDEF UNICODE}
procedure MecsDeleteC(var S: UnicodeString; Index, Count: Integer); overload;
var
AIC: TIndexCount;
begin
AIC := MecsCCSToElementIndexCount(S, Index, Count);
if AIC.StartIndex > 0 then
Delete(S, AIC.StartIndex, AIC.Count);
end;
{$ENDIF}
procedure MecsDeleteC(var S: WideString; Index, Count: Integer); overload;
var
AIC: TIndexCount;
begin
AIC := MecsCCSToElementIndexCount(S, Index, Count);
if AIC.StartIndex > 0 then
Delete(S, AIC.StartIndex, AIC.Count);
end;
// MecsLengthC
{$IFDEF UNICODE}
function MecsLengthC(const S: UnicodeString): Integer; overload;
var
Cnt: Integer;
UCS4 : UCS4String;
i: Integer;
begin
Cnt := 0;
if Length(S) > 0 then
begin
UCS4 := UTF16toUTF32(S);
Cnt := Length(UCS4) - 1;
for i:=0 to Length(UCS4) - 1 do
begin
if MecsCombiningType(UCS4[i]) = ctCombining then
Dec(Cnt);
end;
end;
result := Cnt;
end;
{$ENDIF}
function MecsLengthC(const S: WideString): Integer; overload;
var
Cnt: Integer;
UCS4 : UCS4String;
i: Integer;
begin
Cnt := 0;
if Length(S) > 0 then
begin
UCS4 := UTF16toUTF32(S);
Cnt := Length(UCS4) - 1;
for i:=0 to Length(UCS4) - 1 do
begin
if MecsCombiningType(UCS4[i]) = ctCombining then
Dec(Cnt);
end;
end;
result := Cnt;
end;
// MecsCopyC
{$IFDEF UNICODE}
function MecsCopyC(const S: UnicodeString; Index, Count: Integer): UnicodeString; overload;
var
AIC: TIndexCount;
begin
AIC := MecsCCSToElementIndexCount(S, Index, Count);
if AIC.StartIndex > 0 then
result := Copy(S, AIC.StartIndex, AIC.Count)
else
result := '';
end;
{$ENDIF}
function MecsCopyC(const S: WideString; Index, Count: Integer): WideString; overload;
var
AIC: TIndexCount;
begin
AIC := MecsCCSToElementIndexCount(S, Index, Count);
if AIC.StartIndex > 0 then
result := Copy(S, AIC.StartIndex, AIC.Count)
else
result := '';
end;
// MecsInsertC
{$IFDEF UNICODE}
procedure MecsInsertC(const Source: UnicodeString; var S: UnicodeString; Index: Integer); overload;
var
Idx: Integer;
begin
Idx := MecsCCSToElementIndex(S, Index);
if Idx > 0 then
Insert(Source, S, Idx)
else
S := S + Source;
end;
{$ENDIF}
procedure MecsInsertC(const Source: WideString; var S: WideString; Index: Integer); overload;
var
Idx: Integer;
begin
Idx := MecsCCSToElementIndex(S, Index);
if Idx > 0 then
Insert(Source, S, Idx)
else
S := S + Source;
end;
// MecsLeftStrC
{$IFDEF UNICODE}
function MecsLeftStrC(const AText: UnicodeString; const ACount: Integer): UnicodeString; overload;
begin
result := MecsCopyC(AText, 1, ACount);
end;
{$ENDIF}
function MecsLeftStrC(const AText: WideString; const ACount: Integer): WideString; overload;
begin
result := MecsCopyC(AText, 1, ACount);
end;
// MecsMidStr
{$IFDEF UNICODE}
function MecsMidStrC(const AText: UnicodeString; const AStart, ACount: Integer): UnicodeString; overload;
begin
result := MecsCopyC(AText, AStart, ACount);
end;
{$ENDIF}
function MecsMidStrC(const AText: WideString; const AStart, ACount: Integer): WideString; overload;
begin
result := MecsCopyC(AText, AStart, ACount);
end;
// MecsNextCCSIndex
{$IFDEF UNICODE}
function MecsNextCCSIndex(const S: UnicodeString; Index: Integer): Integer; overload;
var
Idx: Integer;
begin
Idx := MecsElementToCCSIndex(S, Index);
result := MecsCCSToElementLen(S, Idx) + 1;
end;
{$ENDIF}
function MecsNextCCSIndex(const S: WideString; Index: Integer): Integer; overload;
var
Idx: Integer;
begin
Idx := MecsElementToCCSIndex(S, Index);
result := MecsCCSToElementLen(S, Idx) + 1;
end;
// MecsReverseStringC
{$IFDEF UNICODE}
function MecsReverseStringC(const AText: UnicodeString): UnicodeString; overload;
var
i: Integer;
begin
result := '';
for i := MECSLengthC(AText) downto 1 do
result := result + MecsCopyC(AText, i, 1);
end;
{$ENDIF}
function MecsReverseStringC(const AText: WideString): WideString; overload;
var
i: Integer;
begin
result := '';
for i := MecsLengthC(AText) downto 1 do
result := result + MecsCopyC(AText, i, 1);
end;
// MecsRightStrC
{$IFDEF UNICODE}
function MecsRightStrC(const AText: UnicodeString; const ACount: Integer): UnicodeString; overload;
var
Idx: Integer;
begin
Idx := MecsLengthC(AText);
if (Idx - ACount + 1) > 0 then
result := MecsCopyC(AText, Idx - ACount + 1, ACount)
else
result := AText;
end;
{$ENDIF}
function MecsRightStrC(const AText: WideString; const ACount: Integer): WideString; overload;
var
Idx: Integer;
begin
Idx := MecsLengthC(AText);
if (Idx - ACount + 1) > 0 then
result := MecsCopyC(AText, Idx - ACount + 1, ACount)
else
result := AText;
end;
// MecsStrCCSLength
{$IFDEF UNICODE}
function MecsStrCCSLength(const Str: PUnicodeChar): Integer; overload;
begin
result := MecsCCSLength(StrPas(PWideChar(Str)), 1);
end;
{$ENDIF}
function MecsStrCCSLength(const Str: PWideChar): Integer; overload;
begin
result := MecsCCSLength(WideString(Str), 1);
end;
// MecsStrLenC
{$IFDEF UNICODE}
function MecsStrLenC(const Str: PUnicodeChar): Cardinal; overload;
begin
result := MECSLengthC(StrPas(PWideChar(Str)));
end;
{$ENDIF}
function MecsStrLenC(const Str: PWideChar): Cardinal; overload;
begin
result := MECSLengthC(WideString(Str));
end;
// MecsStrNextCCS
{$IFDEF UNICODE}
function MecsStrNextCCS(const Str: PUnicodeChar): PUnicodeChar; overload;
begin
result := str + MecsStrCCSLength(str);
end;
{$ENDIF}
function MecsStrNextCCS(const Str: PWideChar): PWideChar; overload;
begin
result := str + MecsStrCCSLength(str);
end;
// for Convert
// -----------------------------------------------------------------------------
// IsWin95
function IsWin95: Boolean;
begin
result := (Win32Platform = VER_PLATFORM_WIN32_WINDOWS) and (Win32MinorVersion = 0);
end;
// IsWin2000
function IsWin2000: Boolean;
begin
result := (Win32Platform = VER_PLATFORM_WIN32_NT) and (Win32MajorVersion = 5) and (Win32MinorVersion = 0);
end;
// AnsiToUTF16
{$IFDEF UNICODE}
function AnsiToUTF16(const AText: RawByteString): WideString;
{$ELSE}
function AnsiToUTF16(const AText: AnsiString): WideString;
{$ENDIF}
begin
result := AnsiToUTF16(AText, MecsGetCodePage(AText));
end;
{$IFDEF UNICODE}
function AnsiToUTF16(const AText: RawByteString; Codepage: DWORD): WideString;
{$ELSE}
function AnsiToUTF16(const AText: AnsiString; Codepage: DWORD): WideString;
{$ENDIF}
// MultiByteToWideChar
// http://msdn2.microsoft.com/en-us/library/ms776413(VS.85).aspx
// CP_UTF8(CP_UTF7) is *not* supported in Win95.
var
BufSize: Integer;
dwFlags: DWORD;
begin
result := '';
if Length(AText) = 0 then
Exit;
{
case CodePage of
CP_UTF7,
CP_UTF8:
dwFlags := 0;
else
dwFlags := MB_PRECOMPOSED;
end;
}
dwFlags := 0;
if IsWin95 and (dwFlags = 0) then
ConvertMultiByteToUnicode(Codepage, AText, result)
else
begin
BufSize := MultiByteToWideChar(CodePage, dwFlags, PAnsiChar(AText), Length(AText), nil, 0);
SetLength(result, BufSize);
MultiByteToWideChar(CodePage, dwFlags, PANsiChar(AText), Length(AText), PWideChar(result), BufSize);
end;
end;
// AnsiToUTF32
{$IFDEF UNICODE}
function AnsiToUTF32(const AText: RawByteString): UCS4String;
{$ELSE}
function AnsiToUTF32(const AText: AnsiString): UCS4String;
{$ENDIF}
begin
result := AnsiToUTF32(AText, MecsGetCodePage(AText));
end;
{$IFDEF UNICODE}
function AnsiToUTF32(const AText: RawByteString; Codepage: DWORD): UCS4String;
{$ELSE}
function AnsiToUTF32(const AText: AnsiString; Codepage: DWORD): UCS4String;
{$ENDIF}
begin
result := UTF16ToUTF32(AnsiToUTF16(AText, CodePage));
end;
// AnsiToUTF8
{$IFDEF UNICODE}
function AnsiToUTF8(const AText: RawByteString): RawByteString;
{$ELSE}
function AnsiToUTF8(const AText: AnsiString): UTF8String;
{$ENDIF}
begin
result := AnsiToUTF8(AText, MecsGetCodePage(AText));
end;
{$IFDEF UNICODE}
function AnsiToUTF8(const AText: RawByteString; Codepage: DWORD): RawByteString;
{$ELSE}
function AnsiToUTF8(const AText: AnsiString; Codepage: DWORD): UTF8String;
{$ENDIF}
begin
if CodePage = CP_UTF8 then
result := AText
else
begin
if IsWin95 then
{$IFDEF UNICODE}
ConvertString(Codepage, CP_UTF8, AText, RawByteString(result))
{$ELSE}
ConvertString(Codepage, CP_UTF8, AText, AnsiString(result))
{$ENDIF}
else
result := UTF16ToAnsi(AnsiToUTF16(AText, CodePage), CP_UTF8);
end;
end;
function ConvertINetString(lpdwMode: LPDWORD; dwSrcEncoding: DWORD; dwDstEncoding: DWORD; lpSrcStr: LPCSTR; lpnSrcSize: PInteger; lpDstStr: Pointer; lpnDstSize: PInteger):Hresult;
var
FP: TFarProc;
DLLWnd: THandle;
// http://msdn2.microsoft.com/en-us/library/aa741106(VS.85).aspx
// Win95 and IE5.5 or later
_ConvertINetString: function(lpdwMode: LPDWORD; dwSrcEncoding: DWORD; dwDstEncoding: DWORD; lpSrcStr: LPCSTR; lpnSrcSize: PInteger; lpDstStr: Pointer; lpnDstSize: PInteger):Hresult; stdcall;
begin
result := S_FALSE;
{$IFDEF UNICODE}
DLLWnd := LoadLibraryW('mlang.dll');
{$ELSE}
DLLWnd := LoadLibraryA('mlang.dll');
{$ENDIF}
try
FP := GetProcAddress(DLLWnd, 'ConvertINetString');
if FP <> nil then
begin
@_ConvertINetString := FP;
result := _ConvertINetString(lpdwMode, dwSrcEncoding, dwDstEncoding, lpSrcStr, lpnSrcSize, lpDstStr, lpnDstSize);
end;
finally
if DLLWnd > 0 then
FreeLibrary(DLLWnd);
end;
end;
// CodePointToUTF16
function CodePointToUTF16(const UCS4: UCS4Char): WideString;
var
U: UCS4String;
begin
SetLength(U, 2);
U[0] := UCS4;
U[1] := $00000000;
result := UTF32ToUTF16(U);
end;
// CodePointToUTF8
{$IFDEF UNICODE}
function CodePointToUTF8(const UCS4: UCS4Char): RawByteString;
{$ELSE}
function CodePointToUTF8(const UCS4: UCS4Char): UTF8String;
{$ENDIF}
var
U: UCS4String;
begin
SetLength(U, 2);
U[0] := UCS4;
U[1] := $00000000;
result := UTF32ToUTF8(U);
end;
// ConvertString
{$IFDEF UNICODE}
function ConvertString(SrcCodepage, DstCodepage: DWORD; const SrcStr: RawByteString; var DstStr: RawByteString): Boolean;
{$ELSE}
function ConvertString(SrcCodepage, DstCodepage: DWORD; const SrcStr: AnsiString; var DstStr: AnsiString): Boolean;
{$ENDIF}
var
dwMode: DWORD;
SrcSize, DstSize: Integer;
begin
dwMode := 0;
SetLength(DstStr, Length(SrcStr) * 3);
SrcSize := Length(SrcStr);
DstSize := Length(DstStr);
result := (ConvertINetString(@dwMode, SrcCodepage, DstCodepage, PAnsiChar(SrcStr), @SrcSize, PAnsiChar(DstStr), @DstSize) = S_OK);
if result then
SetLength(DstStr, DstSize)
else
DstStr := '';
end;
function ConvertINetUnicodeToMultiByte(lpdwMode: LPDWORD; dwEncoding: DWORD; lpSrcStr: LPCWSTR; lpnWideCharCount: PInteger; lpDstStr: LPSTR; lpnMultiCharCount: PInteger):Hresult;
var
FP: TFarProc;
DLLWnd: THandle;
// http://msdn2.microsoft.com/en-us/library/aa741107(VS.85).aspx
// Win95 and IE5.5 or later
_ConvertINetUnicodeToMultiByte: function(lpdwMode: LPDWORD; dwEncoding: DWORD; lpSrcStr: LPCWSTR; lpnWideCharCount: PInteger; lpDstStr: LPSTR; lpnMultiCharCount: PInteger):Hresult; stdcall;
begin
result := S_FALSE;
{$IFDEF UNICODE}
DLLWnd := LoadLibraryW('mlang.dll');
{$ELSE}
DLLWnd := LoadLibraryA('mlang.dll');
{$ENDIF}
try
FP := GetProcAddress(DLLWnd, 'ConvertINetUnicodeToMultiByte');
if FP <> nil then
begin
@_ConvertINetUnicodeToMultiByte := FP;
result := _ConvertINetUnicodeToMultiByte(lpdwMode, dwEncoding, lpSrcStr, lpnWideCharCount, lpDstStr, lpnMultiCharCount);
end;
finally
if DLLWnd > 0 then
FreeLibrary(DLLWnd);
end;
end;
// ConvertUnicodeToMultiByte
{$IFDEF UNICODE}
function ConvertUnicodeToMultiByte(DstCodepage: DWORD; const SrcStr: WideString; var DstStr: RawByteString): Boolean;
{$ELSE}
function ConvertUnicodeToMultiByte(DstCodepage: DWORD; const SrcStr: WideString; var DstStr: AnsiString): Boolean;
{$ENDIF}
var
dwMode: DWORD;
SrcSize, DstSize: Integer;
begin
dwMode := 0;
SetLength(DstStr, Length(SrcStr) * 3);
SrcSize := Length(SrcStr);
DstSize := Length(DstStr);
result := (ConvertINetUnicodeToMultiByte(@dwMode, DstCodepage, PWideChar(SrcStr), @SrcSize, PAnsiChar(DstStr), @DstSize) = S_OK);
if result then
SetLength(DstStr, DstSize)
else
DstStr := '';
end;
function ConvertINetMultiByteToUnicode(lpdwMode: LPDWORD; dwSrcEncoding: DWORD; lpSrcStr: LPCSTR; lpnMultiCharCount: PInteger; lpDstStr: LPWSTR; lpnWideCharCount: PInteger):Hresult;
var
FP: TFarProc;
DLLWnd: THandle;
// http://msdn2.microsoft.com/en-us/library/aa741105(VS.85).aspx
// Win95 and IE5.5 or later
_ConvertINetMultiByteToUnicode: function(lpdwMode: LPDWORD; dwSrcEncoding: DWORD; lpSrcStr: LPCSTR; lpnMultiCharCount: PInteger; lpDstStr: LPWSTR; lpnWideCharCount: PInteger):Hresult; stdcall;
begin
result := S_FALSE;
{$IFDEF UNICODE}
DLLWnd := LoadLibraryW('mlang.dll');
{$ELSE}
DLLWnd := LoadLibraryA('mlang.dll');
{$ENDIF}
try
FP := GetProcAddress(DLLWnd, 'ConvertINetMultiByteToUnicode');
if FP <> nil then
begin
@_ConvertINetMultiByteToUnicode := FP;
result := _ConvertINetMultiByteToUnicode(lpdwMode, dwSrcEncoding, lpSrcStr, lpnMultiCharCount, lpDstStr, lpnWideCharCount);
end;
finally
if DLLWnd > 0 then
FreeLibrary(DLLWnd);
end;
end;
// ConvertMultiByteToUnicode
{$IFDEF UNICODE}
function ConvertMultiByteToUnicode(SrcCodepage: DWORD; const SrcStr: RawByteString; var DstStr: WideString): Boolean;
{$ELSE}
function ConvertMultiByteToUnicode(SrcCodepage: DWORD; const SrcStr: AnsiString; var DstStr: WideString): Boolean;
{$ENDIF}
var
dwMode: DWORD;
SrcSize, DstSize: Integer;
begin
dwMode := 0;
SetLength(DstStr, Length(SrcStr) * 3);
SrcSize := Length(SrcStr);
DstSize := Length(DstStr);
result := (ConvertINetMultiByteToUnicode(@dwMode, SrcCodepage, PAnsiChar(SrcStr), @SrcSize, PWideChar(DstStr), @DstSize) = S_OK);
if result then
SetLength(DstStr, DstSize)
else
DstStr := '';
end;
// UTF32ToAnsi
{$IFDEF UNICODE}
function UTF32ToAnsi(const UCS4Text: UCS4String): RawByteString;
{$ELSE}
function UTF32ToAnsi(const UCS4Text: UCS4String): AnsiString;
{$ENDIF}
begin
result := UTF32ToAnsi(UCS4Text, DefaultAnsiCodePage);
end;
{$IFDEF UNICODE}
function UTF32ToAnsi(const UCS4Text: UCS4String; Codepage: DWORD): RawByteString;
{$ELSE}
function UTF32ToAnsi(const UCS4Text: UCS4String; Codepage: DWORD): AnsiString;
{$ENDIF}
begin
result := UTF16ToAnsi(UTF32ToUTF16(UCS4Text), CodePage);
end;
// UTF32ToUTF16
function UTF32ToUTF16(const UCS4Text: UCS4String): WideString;
var
Counter,
i: Integer;
U: UCS4Char;
PU, OU: PUCS4Char;
PW: PWideChar;
begin
result := '';
if Length(UCS4Text) = 0 then
Exit;
Counter := 0;
PU := PUCS4Char(UCS4Text);
OU := PU;
for i := 0 to Length(UCS4Text) - 1 do
begin
if PU^ < $10000 then
Inc(Counter)
else
Inc(Counter, 2);
Inc(PU);
end;
SetLength(result, Counter);
PU := OU;
PW := PWideChar(result);
for i := 0 to Length(UCS4Text) - 1 do
begin
if PU^ < $10000 then
begin
PW^ := WideChar(PU^);
Inc(PW);
end
else
begin
// Surrogate Pair
U := PU^ - $10000;
PW^ := WideChar((U shr 10) or $D800);
Inc(PW);
PW^ := WideChar((U and $000003FF) or $DC00);
Inc(PW);
end;
Inc(PU);
end;
end;
// UTF32ToUTF8
{$IFDEF UNICODE}
function UTF32ToUTF8(const UCS4Text: UCS4String): RawByteString;
{$ELSE}
function UTF32ToUTF8(const UCS4Text: UCS4String): UTF8String;
{$ENDIF}
var
Counter: Integer;
SrcLen, SrcLen2, i: Integer;
J, CharLen: Byte;
U: UCS4Char;
PU, PO: PUCS4Char;
PR: PUTF8Char;
begin
result := '';
SrcLen := Length(UCS4Text);
SrcLen2 := SrcLen - 1;
if SrcLen = 0 then
Exit;
Counter := 0;
PU := PUCS4Char(UCS4Text);
PO := PU;
for i:=0 to SrcLen2-1 do
begin
U := PU^;
case U of
$00..$7F:
CharLen := 1;
$80..$7FF:
CharLen := 2;
$800..$FFFF:
CharLen := 3;
$10000..$1FFFFF:
CharLen := 4;
$200000..$3FFFFFF:
CharLen := 5;
$4000000..$7FFFFFFF:
CharLen := 6;
else
CharLen := 0;
end;
if CharLen > 0 then
Inc(Counter, CharLen);
Inc(PU);
end;
SetLength(result, Counter);
PU := PO;
PR := PUTF8Char(result);
for i:=0 to SrcLen2-1 do
begin
U := PU^;
case U of
$00..$7F:
begin
PR^ := UTF8Char(Byte(U));
Inc(PR);
CharLen := 1;
end;
$80..$7FF:
CharLen := 2;
$800..$FFFF:
CharLen := 3;
$10000..$1FFFFF:
CharLen := 4;
$200000..$3FFFFFF:
CharLen := 5;
$4000000..$7FFFFFFF:
CharLen := 6;
else
CharLen := 0;
end;
if CharLen >= 2 then
begin
Inc(PR, CharLen - 1);
for J := CharLen downto 2 do
begin
PR^ := UTF8Char(Byte((U and $3F) or $80));
U := U shr 6;
Dec(PR);
end;
PR^ := UTF8Char(Byte(U or ($FE shl (7 - CharLen))));
Inc(PR, CharLen);
end;
Inc(PU);
end;
end;
// UTF16ToAnsi
{$IFDEF UNICODE}
function UTF16ToAnsi(const WText: WideString): RawByteString;
{$ELSE}
function UTF16ToAnsi(const WText: WideString): AnsiString;
{$ENDIF}
begin
result := UTF16ToAnsi(WText, DefaultAnsiCodePage);
end;
{$IFDEF UNICODE}
function UTF16ToAnsi(const WText: WideString; Codepage: DWORD): RawByteString;
{$ELSE}
function UTF16ToAnsi(const WText: WideString; Codepage: DWORD): AnsiString;
{$ENDIF}
// WideCharToMultiByte
// http://msdn2.microsoft.com/en-us/library/ms776420(VS.85).aspx
// CP_UTF8(CP_UTF7) is *not* supported in Win95.
var
BufSize: Integer;
dwFlags: DWORD;
dCodePage: WORD;
NeedFix: Boolean;
begin
{$IFDEF UNICODE}
dCodePage := StringCodePage(result);
{$ENDIF}
result := '';
if Length(WText) = 0 then
Exit;
{ case CodePage of
CP_UTF7,
CP_UTF8:
dwFlags := 0;
else
dwFlags := WC_COMPOSITECHECK;
end;
}
dwFlags := 0;
if IsWin95 and (dwFlags = 0) then
ConvertUnicodeToMultiByte(Codepage, WText, result)
else
begin
BufSize := 0;
NeedFix := False;
if IsWin2000 then
case Codepage of
50220,
50221,
50222:
NeedFix := True;
end;
if not NeedFix then
begin
BufSize := WideCharToMultiByte(CodePage, dwFlags, PWideChar(WText), -1 , nil, 0, nil, nil);
NeedFix := (BufSize = 0);
end;
if NeedFix then
begin
BufSize := Length(WText) * 3;
SetLength(result, BufSize);
BufSize := WideCharToMultiByte(CodePage, dwFlags, PWideChar(WText), -1 , PAnsiChar(result), BufSize, nil, nil);
SetLength(result, BufSize);
end
else
begin
SetLength(result, BufSize);
WideCharToMultiByte(CodePage, dwFlags, PWideChar(WText), -1 , PAnsiChar(result), BufSize, nil, nil);
end;
{$IFDEF UNICODE}
case CodePage of
CP_UTF7,
CP_UTF8:
SetCodePage(result, CodePage, False);
end;
if CodePage <> dCodePage then
SetCodePage(result, dCodePage, True);
{$ENDIF}
end;
end;
// UTF16ToUTF32
function UTF16ToUTF32(const WText: WideString): UCS4String;
var
Counter,
i: Integer;
PW, OW: PWideChar;
PR: PUCS4Char;
SrcLen: Integer;
SrcLen2: Integer;
begin
result := nil;
SrcLen := Length(WText);
SrcLen2 := SrcLen-1;
if SrcLen = 0 then
Exit;
Counter := 0;
PW := PWideChar(WText);
OW := PW;
for i := 0 to SrcLen2 do
begin
if MecsIsLeadElement(PW^) then
begin
// Surrogate Pair
if MecsIsTrailElement((PW + 1)^) then
Inc(Counter);
end
else if not MecsIsTrailElement(PW^) then
Inc(Counter);
Inc(PW);
end;
SetLength(result, Counter + 1);
PW := OW;
PR := PUCS4Char(result);
for i := 0 to SrcLen2 do
begin
if MecsIsLeadElement(PW^) then
begin
// Surrogate Pair
if MecsIsTrailElement((PW + 1)^) then
begin
PR^ := ((WORD(PW^) and $03FF) shl 10) +
((WORD((PW + 1)^) and $03FF) + $10000);
Inc(PR);
end;
end
else if not MecsIsTrailElement(PW^) then
begin
PR^ := UCS4Char(PW^);
Inc(PR);
end;
Inc(PW);
end;
PR^ := 0;
end;
// UTF16ToUTF8
{$IFDEF UNICODE}
function UTF16ToUTF8(const WText: WideString): RawByteString;
{$ELSE}
function UTF16ToUTF8(const WText: WideString): UTF8String;
{$ENDIF}
begin
if IsWin95 then
{$IFDEF UNICODE}
ConvertUnicodeToMultiByte(CP_UTF8, WText, RawByteString(result))
{$ELSE}
ConvertUnicodeToMultiByte(CP_UTF8, WText, AnsiString(result))
{$ENDIF}
else
result := UTF16ToAnsi(WText, CP_UTF8);
end;
// UTF8ToAnsi
{$IFDEF UNICODE}
function UTF8ToAnsi(const UCF8Text: RawByteString): RawByteString;
{$ELSE}
function UTF8ToAnsi(const UCF8Text: UTF8String): AnsiString;
{$ENDIF}
begin
result := UTF8ToAnsi(UCF8Text, DefaultAnsiCodePage);
end;
{$IFDEF UNICODE}
function UTF8ToAnsi(const UCF8Text: RawByteString; Codepage: DWORD): RawByteString;
{$ELSE}
function UTF8ToAnsi(const UCF8Text: UTF8String; Codepage: DWORD): AnsiString;
{$ENDIF}
begin
if CodePage = CP_UTF8 then
result := UCF8Text
else
begin
if IsWin95 then
ConvertString(CP_UTF8, Codepage, UCF8Text, result)
else
result := UTF16ToAnsi(AnsiToUTF16(UCF8Text, CP_UTF8), CodePage);
end;
end;
// UTF8ToUTF16
{$IFDEF UNICODE}
function UTF8ToUTF16(const UCF8Text: RawByteString): WideString;
{$ELSE}
function UTF8ToUTF16(const UCF8Text: UTF8String): WideString;
{$ENDIF}
begin
if IsWin95 then
ConvertMultiByteToUnicode(CP_UTF8, UCF8Text, result)
else
result := AnsiToUTF16(UCF8Text, CP_UTF8);
end;
// UTF8ToUTF32
{$IFDEF UNICODE}
function UTF8ToUTF32(const UCF8Text: RawByteString): UCS4String;
{$ELSE}
function UTF8ToUTF32(const UCF8Text: UTF8String): UCS4String;
{$ENDIF}
var
P, O: PUTF8Char;
J, CharLen: Byte;
Counter, SrcLen: Integer;
U: UCS4Char;
PU: PUCS4Char;
begin
result := nil;
SrcLen := Length(UCF8Text);
if SrcLen = 0 then
Exit;
Counter := 0;
P := PUTF8Char(UCF8Text);
O := P;
while (P - O) < SrcLen do
begin
if (Byte(P^) and $80) = $00 then
Inc(Counter)
else if(Byte(P^) and $C0) = $C0 then
begin
CharLen := 0;
for J := 2 to 6 do
begin
if (Byte(P^) and ($80 shr J)) = 0 then
begin
CharLen := J;
Break;
end;
end;
if (P - O) <= (SrcLen - CharLen) then
begin
for J := 2 to CharLen do
Inc(P);
Inc(Counter);
end;
end;
Inc(P);
end;
SetLength(result, Counter + 1);
P := O;
PU := PUCS4Char(result);
while (P - O) < SrcLen do
begin
if (Byte(P^) and $80) = $00 then
begin
PU^ := Byte(P^);
Inc(PU);
end
else if(Byte(P^) and $C0) = $C0 then
begin
CharLen := 0;
for J := 2 to 6 do
begin
if (Byte(P^) and ($80 shr J)) = 0 then
begin
CharLen := J;
Break;
end;
end;
if (P - O) <= (SrcLen - CharLen) then
begin
U := Byte((Byte(P^) shl (CharLen + 1)));
U := U shr (CharLen + 1);
for J := 2 to CharLen do
begin
Inc(P);
U := (U shl 6);
U := U or (Byte(P^) and $3F);
end;
PU^ := U;
Inc(PU);
end;
end;
Inc(P);
end;
PU^ := $00000000;
end;
initialization
DefaultAnsiCodePage := GetACP;
DefaultOEMCodePage := GetOEMCP;
DefaultLCID := GetUserDefaultLCID;
end.
[/Quote]
Copyright © 2014 DelphiW.com 开发 源码 文档 技巧 All Rights Reserved
晋ICP备14006235号-8 晋公网安备 14108102000087号
执行时间: 0.033227920532227 seconds