summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarkvater <Darkvater@openttd.org>2007-03-05 00:45:56 +0000
committerDarkvater <Darkvater@openttd.org>2007-03-05 00:45:56 +0000
commit2ff94ab0004f6274a79ccf1f88090dd7d69073c6 (patch)
tree347173f94845f2ed93206df1d8a324c41c93e2bd
parentca4c8562474608ac15b79198230fd30b020de36c (diff)
downloadopenttd-2ff94ab0004f6274a79ccf1f88090dd7d69073c6.tar.xz
(svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
-Codechange: Add a function Utf8TrimString() that properly trims a string to an UTF8 encoding seperation instead of somewhere in the wild (and use it in the chat area)
-rw-r--r--src/gfx.cpp18
-rw-r--r--src/string.cpp26
-rw-r--r--src/string.h33
-rw-r--r--src/texteff.cpp5
4 files changed, 76 insertions, 6 deletions
diff --git a/src/gfx.cpp b/src/gfx.cpp
index 4ed1ea424..2599344a5 100644
--- a/src/gfx.cpp
+++ b/src/gfx.cpp
@@ -296,7 +296,7 @@ static int TruncateString(char *str, int maxw)
if (w >= maxw) {
/* string got too big... insert dotdotdot */
ddd_pos[0] = ddd_pos[1] = ddd_pos[2] = '.';
- ddd_pos[3] = 0;
+ ddd_pos[3] = '\0';
return ddd_w;
}
} else {
@@ -440,7 +440,7 @@ uint32 FormatStringLinebreaks(char *str, int maxw)
for (;;) {
WChar c = Utf8Consume((const char **)&str);
/* whitespace is where we will insert the line-break */
- if (c == ' ') last_space = str;
+ if (IsWhitespace(c)) last_space = str;
if (IsPrintable(c)) {
w += GetCharacterWidth(size, c);
@@ -451,7 +451,7 @@ uint32 FormatStringLinebreaks(char *str, int maxw)
* 2. In all other cases force a linebreak at the last seen whitespace */
if (w > maxw) {
if (last_space == NULL) {
- str[-1] = '\0';
+ *Utf8PrevChar(str) = '\0';
return num + (size << 16);
}
str = last_space;
@@ -469,9 +469,17 @@ uint32 FormatStringLinebreaks(char *str, int maxw)
}
}
end_of_inner_loop:
- /* string didn't fit on line, so 'dummy' terminate and increase linecount */
+ /* String didn't fit on line (or a '\n' was encountered), so 'dummy' terminate
+ * and increase linecount. We use Utf8PrevChar() as also non 1 char long
+ * whitespace seperators are supported */
num++;
- str[-1] = '\0';
+ char *s = Utf8PrevChar(str);
+ *s++ = '\0';
+
+ /* In which case (see above) we will shift remainder to left and close the gap */
+ if (str - s >= 1) {
+ for (; str[-1] != '\0';) *s++ = *str++;
+ }
}
}
diff --git a/src/string.cpp b/src/string.cpp
index d55d9a70e..d5c499d0a 100644
--- a/src/string.cpp
+++ b/src/string.cpp
@@ -268,3 +268,29 @@ size_t Utf8Encode(char *buf, WChar c)
*buf = '?';
return 1;
}
+
+/**
+ * Properly terminate an UTF8 string to some maximum length
+ * @param s string to check if it needs additional trimming
+ * @param maxlen the maximum length the buffer can have.
+ * @return the new length in bytes of the string (eg. strlen(new_string))
+ * @NOTE maxlen is the string length _INCLUDING_ the terminating '\0'
+ */
+size_t Utf8TrimString(char *s, size_t maxlen)
+{
+ size_t length = 0;
+
+ for (const char *ptr = strchr(s, '\0'); *s != '\0';) {
+ size_t len = Utf8EncodedCharLen(*s);
+ if (len == 0) break; // invalid encoding
+
+ /* Take care when a hard cutoff was made for the string and
+ * the last UTF8 sequence is invalid */
+ if (length + len >= maxlen || (s + len > ptr)) break;
+ s += len;
+ length += len;
+ }
+
+ *s = '\0';
+ return length;
+} \ No newline at end of file
diff --git a/src/string.h b/src/string.h
index 8b6d6792f..eba7eff63 100644
--- a/src/string.h
+++ b/src/string.h
@@ -74,6 +74,7 @@ bool IsValidChar(WChar key, CharSetFilter afilter);
size_t Utf8Decode(WChar *c, const char *s);
size_t Utf8Encode(char *buf, WChar c);
+size_t Utf8TrimString(char *s, size_t maxlen);
static inline WChar Utf8Consume(const char **s)
@@ -100,6 +101,23 @@ static inline size_t Utf8CharLen(WChar c)
}
+/**
+ * Return the length of an UTF-8 encoded value based on a single char. This
+ * char should be the first byte of the UTF-8 encoding. If not, or encoding
+ * is invalid, return value is 0
+ */
+static inline size_t Utf8EncodedCharLen(char c)
+{
+ if (GB(c, 3, 5) == 0x1E) return 4;
+ if (GB(c, 4, 4) == 0x0E) return 3;
+ if (GB(c, 5, 3) == 0x06) return 2;
+ if (GB(c, 7, 1) == 0x00) return 1;
+
+ /* Invalid UTF8 start encoding */
+ return 0;
+}
+
+
/* Check if the given character is part of a UTF8 sequence */
static inline bool IsUtf8Part(char c)
{
@@ -129,5 +147,20 @@ static inline bool IsPrintable(WChar c)
return true;
}
+/**
+ * Check whether UNICODE character is whitespace or not
+ * @param c UNICODE character to check
+ * @return a boolean value whether 'c' is a whitespace character or not
+ * @see http://www.fileformat.info/info/unicode/category/Zs/list.htm
+ */
+static inline bool IsWhitespace(WChar c)
+{
+ return
+ c == 0x0020 /* SPACE */ ||
+ c == 0x00A0 /* NO-BREAK SPACE */ ||
+ c == 0x3000 /* IDEOGRAPHIC SPACE */
+ ;
+}
+
#endif /* STRING_H */
diff --git a/src/texteff.cpp b/src/texteff.cpp
index 54c456364..ff2cf1b49 100644
--- a/src/texteff.cpp
+++ b/src/texteff.cpp
@@ -17,7 +17,7 @@
#include "date.h"
enum {
- MAX_TEXTMESSAGE_LENGTH = 150,
+ MAX_TEXTMESSAGE_LENGTH = 200,
MAX_TEXT_MESSAGES = 30,
MAX_CHAT_MESSAGES = 10,
MAX_ANIMATED_TILES = 256,
@@ -82,6 +82,9 @@ void CDECL AddTextMessage(uint16 color, uint8 duration, const char *message, ...
vsnprintf(buf, lengthof(buf), message, va);
va_end(va);
+
+ Utf8TrimString(buf, MAX_TEXTMESSAGE_LENGTH);
+
/* Force linebreaks for strings that are too long */
lines = GB(FormatStringLinebreaks(buf, _textmsg_box.width - 8), 0, 16) + 1;
if (lines >= MAX_CHAT_MESSAGES) return;