From 86ca408d469811d13a15d5c7a671feda38126eb0 Mon Sep 17 00:00:00 2001
From: rubidium <rubidium@openttd.org>
Date: Fri, 6 Mar 2009 01:23:25 +0000
Subject: (svn r15626) -Fix [FS#2698]: UTF8 string handling could cause buffer
 overruns.

---
 src/string.cpp | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

(limited to 'src/string.cpp')

diff --git a/src/string.cpp b/src/string.cpp
index 52dfc260c..fc818255f 100644
--- a/src/string.cpp
+++ b/src/string.cpp
@@ -97,13 +97,27 @@ char *CDECL str_fmt(const char *str, ...)
 }
 
 
-void str_validate(char *str, bool allow_newlines, bool ignore)
+void str_validate(char *str, const char *last, bool allow_newlines, bool ignore)
 {
+	/* Assume the ABSOLUTE WORST to be in str as it comes from the outside. */
+
 	char *dst = str;
-	WChar c;
-	size_t len;
+	while (*str != '\0') {
+		size_t len = Utf8EncodedCharLen(*str);
+		/* If the character is unknown, i.e. encoded length is 0
+		 * we assume worst case for the length check.
+		 * The length check is needed to prevent Utf8Decode to read
+		 * over the terminating '\0' if that happens to be placed
+		 * within the encoding of an UTF8 character. */
+		if ((len == 0 && str + 4 > last) || str + len > last) break;
+
+		WChar c;
+		len = Utf8Decode(&c, str);
+		/* It's possible to encode the string termination character
+		 * into a multiple bytes. This prevents those termination
+		 * characters to be skipped */
+		if (c == '\0') break;
 
-	for (len = Utf8Decode(&c, str); c != '\0'; len = Utf8Decode(&c, str)) {
 		if (IsPrintable(c) && (c < SCC_SPRITE_START || c > SCC_SPRITE_END)) {
 			/* Copy the character back. Even if dst is current the same as str
 			 * (i.e. no characters have been changed) this is quicker than
-- 
cgit v1.2.3-54-g00ecf