From 8eb1030c56d6a1228d3145b247f75c733576e511 Mon Sep 17 00:00:00 2001 From: graemeg Date: Sat, 27 Sep 2008 18:24:54 +0000 Subject: * Rename all corelib units to the new naming convention. * Updated the UI Designer to use the new unit names. --- src/corelib/fpg_stringutils.pas | 316 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 316 insertions(+) create mode 100644 src/corelib/fpg_stringutils.pas (limited to 'src/corelib/fpg_stringutils.pas') diff --git a/src/corelib/fpg_stringutils.pas b/src/corelib/fpg_stringutils.pas new file mode 100644 index 00000000..d7a0ca77 --- /dev/null +++ b/src/corelib/fpg_stringutils.pas @@ -0,0 +1,316 @@ +{ + fpGUI - Free Pascal GUI Library + + Copyright (C) 2006 - 2008 See the file AUTHORS.txt, included in this + distribution, for details of the copyright. + + See the file COPYING.modifiedLGPL, included in this distribution, + for details about redistributing fpGUI. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + Description: + Some handly UTF8 functions copied from the Lazarus LCL. Comes from the + LCLProc unit. Surely we can move this into FPC? +} + +unit fpg_stringutils; + +{$mode objfpc}{$H+} + +interface + +uses + Classes, SysUtils; + + +function UTF8CharacterLength(p: PChar): integer; +function UTF8CharStart(UTF8Str: PChar; Len, Index: integer): PChar; +function UTF8Copy(const s: string; StartCharIndex, CharCount: integer): string; +function UTF8CStringToUTF8String(SourceStart: PChar; SourceLen: SizeInt): string; +function UTF8Length(const s: string): integer; +function UTF8Length(p: PChar; ByteCount: integer): integer; +function UTF8Pos(const SearchForText, SearchInText: string): integer; +procedure UTF8Delete(var S: string; Index, Size: integer); +procedure UTF8Insert(const Source: string; var S: string; Index: integer); +function UTF8CharAtByte(const s: string; BytePos: integer; var aChar: string): integer; + + +// short form (alias or convenience) functions for the UTF8 ones above +function Copy8(const s: string; StartCharIndex, CharCount: integer): string; +function Length8(const s: string): integer; +function Pos8(const SearchForText, SearchInText: string): integer; +procedure Delete8(var S: string; Index, Size: integer); +procedure Insert8(const Source: string; var S: string; Index: integer); + + +implementation + + +{ If it's a multibyte character, the first byte specifies the amount of bytes + used. All subsequent bytes will start with 10xxxxxx. It is assumed that p + points to the beginning of a single or multibyte character. } +function UTF8CharacterLength(p: PChar): integer; +begin + if p <> nil then + begin + if ord(p^) < %11000000 then // 00000000-01111111 + begin + // regular single byte character (#0 is a character, this is pascal ;) + Result := 1; + end + else if ((ord(p^) and %11100000) = %11000000) then // 11000010-11011111 + begin + // could be 2 byte character + if (ord(p[1]) and %11000000) = %10000000 then + Result := 2 + else + Result := 1; + end + else if ((ord(p^) and %11110000) = %11100000) then // 11100000-11101111 + begin + // could be 3 byte character + if ((ord(p[1]) and %11000000) = %10000000) + and ((ord(p[2]) and %11000000) = %10000000) then + Result := 3 + else + Result := 1; + end + else if ((ord(p^) and %11111000) = %11110000) then // 11110000-11110100 + begin + // could be 4 byte character + if ((ord(p[1]) and %11000000) = %10000000) + and ((ord(p[2]) and %11000000) = %10000000) + and ((ord(p[3]) and %11000000) = %10000000) then + Result := 4 + else + Result := 1; + end + else + Result := 1; + end + else + Result := 0; +end; + +{ Returns the character starting position as PChar in the UTF8Str string. } +function UTF8CharStart(UTF8Str: PChar; Len, Index: integer): PChar; +var + CharLen: LongInt; +begin + Result := UTF8Str; + if Result <> nil then + begin + while (Index > 0) and (Len > 0) do + begin + CharLen := UTF8CharacterLength(Result); + dec(Len, CharLen); + dec(Index); + inc(Result, CharLen); + end; + if (Index > 0) or (Len < 0) then + Result := nil; + end; +end; + +// returns substring +function UTF8Copy(const s: string; StartCharIndex, CharCount: integer): string; +var + StartBytePos: PChar; + EndBytePos: PChar; + MaxBytes: PtrInt; +begin + Result := ''; + // Some sanity checks + if (Length(s) = 0) then + Exit; //==> + if CharCount = 0 then + Exit; //==> + + StartBytePos := UTF8CharStart(PChar(s),length(s),StartCharIndex-1); + if StartBytePos = nil then + Result := '' + else + begin + MaxBytes := PtrInt(PChar(s)+length(s)-StartBytePos); + EndBytePos := UTF8CharStart(StartBytePos,MaxBytes,CharCount); + if EndBytePos = nil then + Result := copy(s,StartBytePos-PChar(s)+1,MaxBytes) + else + Result := copy(s,StartBytePos-PChar(s)+1,EndBytePos-StartBytePos); + end; +end; + +function UTF8CStringToUTF8String(SourceStart: PChar; SourceLen: SizeInt): string; +var + Source: PChar; + Dest: PChar; + SourceEnd: PChar; + CharLen: integer; + SourceCopied: PChar; + + // Copies from SourceStart till Source to Dest and updates Dest + procedure CopyPart; inline; + var + CopyLength: SizeInt; + begin + CopyLength := Source - SourceCopied; + if CopyLength=0 then exit; + move(SourceCopied^ , Dest^, CopyLength); + SourceCopied:=Source; + inc(Dest, CopyLength); + end; + +begin + SetLength(Result, SourceLen); + if SourceLen=0 then + Exit; //==> + SourceCopied:=SourceStart; + Source:=SourceStart; + Dest:=PChar(Result); + SourceEnd := Source + SourceLen; + while Source 0) do + begin + inc(Result); + CharLen := UTF8CharacterLength(p); + inc(p, CharLen); + dec(ByteCount, CharLen); + end; +end; + +// returns the character index, where the SearchForText starts in SearchInText +function UTF8Pos(const SearchForText, SearchInText: string): integer; +var + p: LongInt; +begin + p := System.Pos(SearchForText, SearchInText); + if p > 0 then + Result := UTF8Length(PChar(SearchInText), p-1) + 1 + else + Result := 0; +end; + +procedure UTF8Delete(var S: string; Index, Size: integer); +var + ls: integer; + b: string; + e: string; +begin + ls := UTF8Length(S); + if (Index > ls) or (Index <= 0) or (Size <= 0) then + Exit; //==> + b := UTF8Copy(S, 1, Index-1); // beginning string + e := UTF8Copy(S, Index+Size, UTF8Length(S)-(Index+Size-1)); // ending string + S := b + e; +end; + +procedure UTF8Insert(const Source: string; var S: string; Index: integer); +var + b: string; + e: string; +begin + if UTF8Length(Source) = 0 then + Exit; //==> + b := UTF8Copy(S, 1, Index-1); // beginning string + e := UTF8Copy(S, Index, UTF8Length(S)-Index+1); // ending string + S := b + Source + e; +end; + +function UTF8CharAtByte(const s: string; BytePos: integer; + var aChar: string): integer; +var + CharLen: Integer; +begin + if BytePos > 0 then + begin + CharLen := UTF8CharacterLength(@s[BytePos]); + aChar := Copy(s, BytePos, CharLen); + Result := BytePos + CharLen; + end else + begin + aChar := ''; + Result := 1; + end; +end; + +function Copy8(const s: string; StartCharIndex, CharCount: integer): string; +begin + Result := UTF8Copy(s, StartCharIndex, CharCount); +end; + +function Length8(const s: string): integer; +begin + Result := UTF8Length(s); +end; + +function Pos8(const SearchForText, SearchInText: string): integer; +begin + Result := UTF8Pos(SearchForText, SearchInText); +end; + +procedure Delete8(var S: string; Index, Size: integer); +begin + UTF8Delete(S, Index, Size); +end; + +procedure Insert8(const Source: string; var S: string; Index: integer); +begin + UTF8Insert(Source, S, Index); +end; + + +end. + -- cgit v1.2.3-54-g00ecf