summaryrefslogtreecommitdiff
path: root/strgen
diff options
context:
space:
mode:
authorpeter1138 <peter1138@openttd.org>2006-11-17 07:46:02 +0000
committerpeter1138 <peter1138@openttd.org>2006-11-17 07:46:02 +0000
commit8a49fa12ffd80526dd262f28da4f9000d6d9923d (patch)
tree62b33ceb4fb2826b0272894cc137fa0ab1408435 /strgen
parent4db17dcf9b9cf6b8214e0a23a010780dc1fa8699 (diff)
downloadopenttd-8a49fa12ffd80526dd262f28da4f9000d6d9923d.tar.xz
(svn r7185) -Codechange: Make strgen validate strings for UTF-8 well-formed-ness-ness
Diffstat (limited to 'strgen')
-rw-r--r--strgen/strgen.c35
1 files changed, 35 insertions, 0 deletions
diff --git a/strgen/strgen.c b/strgen/strgen.c
index 487cc9eff..baa348e4f 100644
--- a/strgen/strgen.c
+++ b/strgen/strgen.c
@@ -222,6 +222,31 @@ static void PutUtf8(uint32 value)
}
+size_t Utf8Validate(const char *s)
+{
+ uint32 c;
+
+ if (!HASBIT(s[0], 7)) {
+ /* 1 byte */
+ return 1;
+ } else if (GB(s[0], 5, 3) == 6 && IsUtf8Part(s[1])) {
+ /* 2 bytes */
+ c = GB(s[0], 0, 5) << 6 | GB(s[1], 0, 6);
+ if (c >= 0x80) return 2;
+ } else if (GB(s[0], 4, 4) == 14 && IsUtf8Part(s[1]) && IsUtf8Part(s[2])) {
+ /* 3 bytes */
+ c = GB(s[0], 0, 4) << 12 | GB(s[1], 0, 6) << 6 | GB(s[2], 0, 6);
+ if (c >= 0x800) return 3;
+ } else if (GB(s[0], 3, 5) == 30 && IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) {
+ /* 4 bytes */
+ c = GB(s[0], 0, 3) << 18 | GB(s[1], 0, 6) << 12 | GB(s[2], 0, 6) << 6 | GB(s[3], 0, 6);
+ if (c >= 0x10000 && c <= 0x10FFFF) return 4;
+ }
+
+ return 0;
+}
+
+
static void EmitSingleChar(char *buf, int value)
{
if (*buf != '\0') warning("Ignoring trailing letters in command");
@@ -781,6 +806,16 @@ static void HandleString(char *str, bool master)
*t = 0;
s++;
+ /* Check string is valid UTF-8 */
+ {
+ const char *tmp;
+ for (tmp = s; *tmp != '\0';) {
+ size_t len = Utf8Validate(tmp);
+ if (len == 0) fatal("Invalid UTF-8 sequence in '%s'", s);
+ tmp += len;
+ }
+ }
+
// Check if the string has a case..
// The syntax for cases is IDENTNAME.case
casep = strchr(str, '.');