diff options
author | Darkvater <darkvater@openttd.org> | 2007-03-05 00:45:56 +0000 |
---|---|---|
committer | Darkvater <darkvater@openttd.org> | 2007-03-05 00:45:56 +0000 |
commit | 915ae8ffc28aee2e0bb9d9c623a711b4fc1f7faa (patch) | |
tree | 347173f94845f2ed93206df1d8a324c41c93e2bd /src/string.h | |
parent | aea64adbb9ff755be303515bebb428d92e7bcc92 (diff) | |
download | openttd-915ae8ffc28aee2e0bb9d9c623a711b4fc1f7faa.tar.xz |
(svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
-Codechange: Add a function Utf8TrimString() that properly trims a string to an UTF8 encoding seperation instead of somewhere in the wild (and use it in the chat area)
Diffstat (limited to 'src/string.h')
-rw-r--r-- | src/string.h | 33 |
1 files changed, 33 insertions, 0 deletions
diff --git a/src/string.h b/src/string.h index 8b6d6792f..eba7eff63 100644 --- a/src/string.h +++ b/src/string.h @@ -74,6 +74,7 @@ bool IsValidChar(WChar key, CharSetFilter afilter); size_t Utf8Decode(WChar *c, const char *s); size_t Utf8Encode(char *buf, WChar c); +size_t Utf8TrimString(char *s, size_t maxlen); static inline WChar Utf8Consume(const char **s) @@ -100,6 +101,23 @@ static inline size_t Utf8CharLen(WChar c) } +/** + * Return the length of an UTF-8 encoded value based on a single char. This + * char should be the first byte of the UTF-8 encoding. If not, or encoding + * is invalid, return value is 0 + */ +static inline size_t Utf8EncodedCharLen(char c) +{ + if (GB(c, 3, 5) == 0x1E) return 4; + if (GB(c, 4, 4) == 0x0E) return 3; + if (GB(c, 5, 3) == 0x06) return 2; + if (GB(c, 7, 1) == 0x00) return 1; + + /* Invalid UTF8 start encoding */ + return 0; +} + + /* Check if the given character is part of a UTF8 sequence */ static inline bool IsUtf8Part(char c) { @@ -129,5 +147,20 @@ static inline bool IsPrintable(WChar c) return true; } +/** + * Check whether UNICODE character is whitespace or not + * @param c UNICODE character to check + * @return a boolean value whether 'c' is a whitespace character or not + * @see http://www.fileformat.info/info/unicode/category/Zs/list.htm + */ +static inline bool IsWhitespace(WChar c) +{ + return + c == 0x0020 /* SPACE */ || + c == 0x00A0 /* NO-BREAK SPACE */ || + c == 0x3000 /* IDEOGRAPHIC SPACE */ + ; +} + #endif /* STRING_H */ |