summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGraeme Geldenhuys <graeme@mastermaths.co.za>2012-03-12 22:31:29 +0200
committerGraeme Geldenhuys <graeme@mastermaths.co.za>2012-03-12 22:31:29 +0200
commit5960aebbdf47454b128288605d9d0c565d7f4d73 (patch)
tree78f902859e8192fb9f97bb3122c9c8bc53f50f39
parent8cc80d7f1d566d58b4136b2b2c8674d5fdf16816 (diff)
downloadfpGUI-5960aebbdf47454b128288605d9d0c565d7f4d73.tar.xz
new unicode method added to fpg_stringutils
-rw-r--r--src/corelib/fpg_stringutils.pas52
1 files changed, 52 insertions, 0 deletions
diff --git a/src/corelib/fpg_stringutils.pas b/src/corelib/fpg_stringutils.pas
index 1cc992db..b5ca0993 100644
--- a/src/corelib/fpg_stringutils.pas
+++ b/src/corelib/fpg_stringutils.pas
@@ -30,6 +30,7 @@ type
function UTF8CharacterLength(p: PChar): integer;
+function UTF8CharToUnicode(p: PChar; out CharLen: longint): longword;
function UTF8CharStart(UTF8Str: PChar; Len, Index: integer): PChar;
function UTF8Copy(const s: string; StartCharIndex, CharCount: integer): string;
function UTF8CStringToUTF8String(SourceStart: PChar; SourceLen: SizeInt): string;
@@ -112,6 +113,57 @@ begin
Result := 0;
end;
+function UTF8CharToUnicode(p: PChar; out CharLen: longint): longword;
+begin
+ if p=nil then begin
+ Result:=0;
+ CharLen:=0;
+ exit;
+ end;
+ if ord(p^) < %11000000 then begin
+ // regular single byte character (#0 is a normal char, this is pascal ;)
+ end
+ else if ((ord(p^) and %11100000) = %11000000) then begin
+ // could be double byte character
+ if (ord(p[1]) and %11000000) = %10000000 then begin
+ Result:=((ord(p^) and %00011111) shl 6)
+ or (ord(p[1]) and %00111111);
+ CharLen:=2;
+ exit;
+ end;
+ end
+ else if ((ord(p^) and %11110000) = %11100000) then begin
+ // could be triple byte character
+ if ((ord(p[1]) and %11000000) = %10000000)
+ and ((ord(p[2]) and %11000000) = %10000000) then begin
+ Result:=((ord(p^) and %00011111) shl 12)
+ or ((ord(p[1]) and %00111111) shl 6)
+ or (ord(p[2]) and %00111111);
+ CharLen:=3;
+ exit;
+ end;
+ end
+ else if ((ord(p^) and %11111000) = %11110000) then begin
+ // could be 4 byte character
+ if ((ord(p[1]) and %11000000) = %10000000)
+ and ((ord(p[2]) and %11000000) = %10000000)
+ and ((ord(p[3]) and %11000000) = %10000000) then begin
+ Result:=((ord(p^) and %00001111) shl 18)
+ or ((ord(p[1]) and %00111111) shl 12)
+ or ((ord(p[2]) and %00111111) shl 6)
+ or (ord(p[3]) and %00111111);
+ CharLen:=4;
+ exit;
+ end;
+ end
+ else begin
+ // invalid character
+ end;
+ Result:=ord(p^);
+ CharLen:=1;
+end;
+
+
{ Returns the character starting position as PChar in the UTF8Str string. }
function UTF8CharStart(UTF8Str: PChar; Len, Index: integer): PChar;
var