diff options
author | peter1138 <peter1138@openttd.org> | 2006-11-17 07:46:02 +0000 |
---|---|---|
committer | peter1138 <peter1138@openttd.org> | 2006-11-17 07:46:02 +0000 |
commit | 7d497f2b01a20e5d3aeec3f9ff47b2ba0abc26d4 (patch) | |
tree | 62b33ceb4fb2826b0272894cc137fa0ab1408435 | |
parent | 08b0b77e8a571583847cfac644971c46ef2a7c6b (diff) | |
download | openttd-7d497f2b01a20e5d3aeec3f9ff47b2ba0abc26d4.tar.xz |
(svn r7185) -Codechange: Make strgen validate strings for UTF-8 well-formed-ness-ness
-rw-r--r-- | strgen/strgen.c | 35 |
1 files changed, 35 insertions, 0 deletions
diff --git a/strgen/strgen.c b/strgen/strgen.c index 487cc9eff..baa348e4f 100644 --- a/strgen/strgen.c +++ b/strgen/strgen.c @@ -222,6 +222,31 @@ static void PutUtf8(uint32 value) } +size_t Utf8Validate(const char *s) +{ + uint32 c; + + if (!HASBIT(s[0], 7)) { + /* 1 byte */ + return 1; + } else if (GB(s[0], 5, 3) == 6 && IsUtf8Part(s[1])) { + /* 2 bytes */ + c = GB(s[0], 0, 5) << 6 | GB(s[1], 0, 6); + if (c >= 0x80) return 2; + } else if (GB(s[0], 4, 4) == 14 && IsUtf8Part(s[1]) && IsUtf8Part(s[2])) { + /* 3 bytes */ + c = GB(s[0], 0, 4) << 12 | GB(s[1], 0, 6) << 6 | GB(s[2], 0, 6); + if (c >= 0x800) return 3; + } else if (GB(s[0], 3, 5) == 30 && IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) { + /* 4 bytes */ + c = GB(s[0], 0, 3) << 18 | GB(s[1], 0, 6) << 12 | GB(s[2], 0, 6) << 6 | GB(s[3], 0, 6); + if (c >= 0x10000 && c <= 0x10FFFF) return 4; + } + + return 0; +} + + static void EmitSingleChar(char *buf, int value) { if (*buf != '\0') warning("Ignoring trailing letters in command"); @@ -781,6 +806,16 @@ static void HandleString(char *str, bool master) *t = 0; s++; + /* Check string is valid UTF-8 */ + { + const char *tmp; + for (tmp = s; *tmp != '\0';) { + size_t len = Utf8Validate(tmp); + if (len == 0) fatal("Invalid UTF-8 sequence in '%s'", s); + tmp += len; + } + } + // Check if the string has a case.. // The syntax for cases is IDENTNAME.case casep = strchr(str, '.'); |