diff options
author | Darkvater <Darkvater@openttd.org> | 2007-03-05 00:45:56 +0000 |
---|---|---|
committer | Darkvater <Darkvater@openttd.org> | 2007-03-05 00:45:56 +0000 |
commit | 2ff94ab0004f6274a79ccf1f88090dd7d69073c6 (patch) | |
tree | 347173f94845f2ed93206df1d8a324c41c93e2bd | |
parent | ca4c8562474608ac15b79198230fd30b020de36c (diff) | |
download | openttd-2ff94ab0004f6274a79ccf1f88090dd7d69073c6.tar.xz |
(svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
-Codechange: Add a function Utf8TrimString() that properly trims a string to an UTF8 encoding seperation instead of somewhere in the wild (and use it in the chat area)
-rw-r--r-- | src/gfx.cpp | 18 | ||||
-rw-r--r-- | src/string.cpp | 26 | ||||
-rw-r--r-- | src/string.h | 33 | ||||
-rw-r--r-- | src/texteff.cpp | 5 |
4 files changed, 76 insertions, 6 deletions
diff --git a/src/gfx.cpp b/src/gfx.cpp index 4ed1ea424..2599344a5 100644 --- a/src/gfx.cpp +++ b/src/gfx.cpp @@ -296,7 +296,7 @@ static int TruncateString(char *str, int maxw) if (w >= maxw) { /* string got too big... insert dotdotdot */ ddd_pos[0] = ddd_pos[1] = ddd_pos[2] = '.'; - ddd_pos[3] = 0; + ddd_pos[3] = '\0'; return ddd_w; } } else { @@ -440,7 +440,7 @@ uint32 FormatStringLinebreaks(char *str, int maxw) for (;;) { WChar c = Utf8Consume((const char **)&str); /* whitespace is where we will insert the line-break */ - if (c == ' ') last_space = str; + if (IsWhitespace(c)) last_space = str; if (IsPrintable(c)) { w += GetCharacterWidth(size, c); @@ -451,7 +451,7 @@ uint32 FormatStringLinebreaks(char *str, int maxw) * 2. In all other cases force a linebreak at the last seen whitespace */ if (w > maxw) { if (last_space == NULL) { - str[-1] = '\0'; + *Utf8PrevChar(str) = '\0'; return num + (size << 16); } str = last_space; @@ -469,9 +469,17 @@ uint32 FormatStringLinebreaks(char *str, int maxw) } } end_of_inner_loop: - /* string didn't fit on line, so 'dummy' terminate and increase linecount */ + /* String didn't fit on line (or a '\n' was encountered), so 'dummy' terminate + * and increase linecount. We use Utf8PrevChar() as also non 1 char long + * whitespace seperators are supported */ num++; - str[-1] = '\0'; + char *s = Utf8PrevChar(str); + *s++ = '\0'; + + /* In which case (see above) we will shift remainder to left and close the gap */ + if (str - s >= 1) { + for (; str[-1] != '\0';) *s++ = *str++; + } } } diff --git a/src/string.cpp b/src/string.cpp index d55d9a70e..d5c499d0a 100644 --- a/src/string.cpp +++ b/src/string.cpp @@ -268,3 +268,29 @@ size_t Utf8Encode(char *buf, WChar c) *buf = '?'; return 1; } + +/** + * Properly terminate an UTF8 string to some maximum length + * @param s string to check if it needs additional trimming + * @param maxlen the maximum length the buffer can have. + * @return the new length in bytes of the string (eg. strlen(new_string)) + * @NOTE maxlen is the string length _INCLUDING_ the terminating '\0' + */ +size_t Utf8TrimString(char *s, size_t maxlen) +{ + size_t length = 0; + + for (const char *ptr = strchr(s, '\0'); *s != '\0';) { + size_t len = Utf8EncodedCharLen(*s); + if (len == 0) break; // invalid encoding + + /* Take care when a hard cutoff was made for the string and + * the last UTF8 sequence is invalid */ + if (length + len >= maxlen || (s + len > ptr)) break; + s += len; + length += len; + } + + *s = '\0'; + return length; +}
\ No newline at end of file diff --git a/src/string.h b/src/string.h index 8b6d6792f..eba7eff63 100644 --- a/src/string.h +++ b/src/string.h @@ -74,6 +74,7 @@ bool IsValidChar(WChar key, CharSetFilter afilter); size_t Utf8Decode(WChar *c, const char *s); size_t Utf8Encode(char *buf, WChar c); +size_t Utf8TrimString(char *s, size_t maxlen); static inline WChar Utf8Consume(const char **s) @@ -100,6 +101,23 @@ static inline size_t Utf8CharLen(WChar c) } +/** + * Return the length of an UTF-8 encoded value based on a single char. This + * char should be the first byte of the UTF-8 encoding. If not, or encoding + * is invalid, return value is 0 + */ +static inline size_t Utf8EncodedCharLen(char c) +{ + if (GB(c, 3, 5) == 0x1E) return 4; + if (GB(c, 4, 4) == 0x0E) return 3; + if (GB(c, 5, 3) == 0x06) return 2; + if (GB(c, 7, 1) == 0x00) return 1; + + /* Invalid UTF8 start encoding */ + return 0; +} + + /* Check if the given character is part of a UTF8 sequence */ static inline bool IsUtf8Part(char c) { @@ -129,5 +147,20 @@ static inline bool IsPrintable(WChar c) return true; } +/** + * Check whether UNICODE character is whitespace or not + * @param c UNICODE character to check + * @return a boolean value whether 'c' is a whitespace character or not + * @see http://www.fileformat.info/info/unicode/category/Zs/list.htm + */ +static inline bool IsWhitespace(WChar c) +{ + return + c == 0x0020 /* SPACE */ || + c == 0x00A0 /* NO-BREAK SPACE */ || + c == 0x3000 /* IDEOGRAPHIC SPACE */ + ; +} + #endif /* STRING_H */ diff --git a/src/texteff.cpp b/src/texteff.cpp index 54c456364..ff2cf1b49 100644 --- a/src/texteff.cpp +++ b/src/texteff.cpp @@ -17,7 +17,7 @@ #include "date.h" enum { - MAX_TEXTMESSAGE_LENGTH = 150, + MAX_TEXTMESSAGE_LENGTH = 200, MAX_TEXT_MESSAGES = 30, MAX_CHAT_MESSAGES = 10, MAX_ANIMATED_TILES = 256, @@ -82,6 +82,9 @@ void CDECL AddTextMessage(uint16 color, uint8 duration, const char *message, ... vsnprintf(buf, lengthof(buf), message, va); va_end(va); + + Utf8TrimString(buf, MAX_TEXTMESSAGE_LENGTH); + /* Force linebreaks for strings that are too long */ lines = GB(FormatStringLinebreaks(buf, _textmsg_box.width - 8), 0, 16) + 1; if (lines >= MAX_CHAT_MESSAGES) return; |