From 0d643c624b0195c6c24fceb4ae4019956a6579ac Mon Sep 17 00:00:00 2001 From: Darkvater Date: Sun, 4 Mar 2007 00:49:40 +0000 Subject: (svn r9003) -Codechange: Introduce a function Utf8PrevCharLen that finds the starting character of an UTF-8 sequence from a given position and returns the length to the first UTF-8 encoding byte of that sequence. --- src/string.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'src/string.h') diff --git a/src/string.h b/src/string.h index 6b9924d63..da07f08d5 100644 --- a/src/string.h +++ b/src/string.h @@ -106,6 +106,28 @@ static inline bool IsUtf8Part(char c) return GB(c, 6, 2) == 2; } +/** + * Retrieve the (partial) length of the previous UNICODE character + * in an UTF-8 encoded string. + * @param s char pointer pointing to the first char of the next character + * @returns the decoded length in bytes (size) of the UNICODE character + * that was just before the one where 's' is pointing to + * @note If 's' is not pointing to the first byte of the next UNICODE character + * only a partial length of the sequence will be returned. + * For example given this sequence: 0xE3 0x85 0x80, 0xE3 0x81 0x9E + * 1. 's' is pointing to the second 0xE3, return value is 3 + * 2. 's' is pointing to 0x80, return value is 2. + * So take care with the return values of this function. To get the real length + * for an (invalid) sequence, pass the string offset of this function's return + * value to Utf8EncodedCharLen() or Utf8Decode() + */ +static inline size_t Utf8PrevCharLen(const char *s) +{ + size_t len = 1; + while (IsUtf8Part(*--s)) len++; + return len; +} + static inline bool IsPrintable(WChar c) { -- cgit v1.2.3-54-g00ecf