From 86ca408d469811d13a15d5c7a671feda38126eb0 Mon Sep 17 00:00:00 2001 From: rubidium Date: Fri, 6 Mar 2009 01:23:25 +0000 Subject: (svn r15626) -Fix [FS#2698]: UTF8 string handling could cause buffer overruns. --- src/string.cpp | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'src/string.cpp') diff --git a/src/string.cpp b/src/string.cpp index 52dfc260c..fc818255f 100644 --- a/src/string.cpp +++ b/src/string.cpp @@ -97,13 +97,27 @@ char *CDECL str_fmt(const char *str, ...) } -void str_validate(char *str, bool allow_newlines, bool ignore) +void str_validate(char *str, const char *last, bool allow_newlines, bool ignore) { + /* Assume the ABSOLUTE WORST to be in str as it comes from the outside. */ + char *dst = str; - WChar c; - size_t len; + while (*str != '\0') { + size_t len = Utf8EncodedCharLen(*str); + /* If the character is unknown, i.e. encoded length is 0 + * we assume worst case for the length check. + * The length check is needed to prevent Utf8Decode to read + * over the terminating '\0' if that happens to be placed + * within the encoding of an UTF8 character. */ + if ((len == 0 && str + 4 > last) || str + len > last) break; + + WChar c; + len = Utf8Decode(&c, str); + /* It's possible to encode the string termination character + * into a multiple bytes. This prevents those termination + * characters to be skipped */ + if (c == '\0') break; - for (len = Utf8Decode(&c, str); c != '\0'; len = Utf8Decode(&c, str)) { if (IsPrintable(c) && (c < SCC_SPRITE_START || c > SCC_SPRITE_END)) { /* Copy the character back. Even if dst is current the same as str * (i.e. no characters have been changed) this is quicker than -- cgit v1.2.3-54-g00ecf