(svn r23576) -Codechange: split the base of strgen with the strgen code that creates the actual .lng files

author: rubidium <rubidium@openttd.org> 2011-12-17 18:34:03 +0000
committer: rubidium <rubidium@openttd.org> 2011-12-17 18:34:03 +0000
commit: 1f083c3ac8961158cf7788a625e1786960d590d4 (patch)
tree: f7c33084901af698c52c51c72c9009d5f5e5711e /src/strgen
parent: c97b2a5224e350380e5f31b0612bf057d1a689ab (diff)
download: openttd-1f083c3ac8961158cf7788a625e1786960d590d4.tar.xz
3 files changed, 1054 insertions, 1010 deletions
diff --git a/src/strgen/strgen.cpp b/src/strgen/strgen.cpp
index 110e87fa0..4ca02a1a7 100644
--- a/src/strgen/strgen.cpp
+++ b/src/strgen/strgen.cpp
@@ -40,219 +40,6 @@
 
 #include "../table/strgen_tables.h"
 
-/* Compiles a list of strings into a compiled string list */
-
-static bool _translated;                     ///< Whether the current language is not the master language
-static bool _translation;                    ///< Is the current file actually a translation or not
-static const char *_file = "(unknown file)"; ///< The filename of the input, so we can refer to it in errors/warnings
-static int _cur_line;                        ///< The current line we're parsing in the input file
-static int _errors, _warnings, _show_todo;
-
-static const ptrdiff_t MAX_COMMAND_PARAM_SIZE = 100; ///< Maximum size of every command block, not counting the name of the command itself
-static const CmdStruct *ParseCommandString(const char **str, char *param, int *argno, int *casei);
-
-/**
- * Create a new case.
- * @param caseidx The index of the case.
- * @param string  The translation of the case.
- * @param next    The next chained case.
- */
-Case::Case(int caseidx, const char *string, Case *next) :
-		caseidx(caseidx), string(strdup(string)), next(next)
-{
-}
-
-/** Free everything we allocated. */
-Case::~Case()
-{
-	free(this->string);
-	delete this->next;
-}
-
-/**
- * Create a new string.
- * @param name    The name of the string.
- * @param english The english "translation" of the string.
- * @param index   The index in the string table.
- * @param line    The line this string was found on.
- */
-LangString::LangString(const char *name, const char *english, int index, int line) :
-		name(strdup(name)), english(strdup(english)), translated(NULL),
-		hash_next(0), index(index), line(line), translated_case(NULL)
-{
-}
-
-/** Free everything we allocated. */
-LangString::~LangString()
-{
-	free(this->name);
-	free(this->english);
-	free(this->translated);
-	delete this->translated_case;
-}
-
-/** Free all data related to the translation. */
-void LangString::FreeTranslation()
-{
-	free(this->translated);
-	this->translated = NULL;
-
-	delete this->translated_case;
-	this->translated_case = NULL;
-}
-
-/**
- * Create a new string data container.
- * @param max_strings The maximum number of strings.
- */
-StringData::StringData(size_t tabs) : tabs(tabs), max_strings(tabs * STRINGS_IN_TAB)
-{
-	this->strings = CallocT<LangString *>(max_strings);
-	this->hash_heads = CallocT<uint16>(max_strings);
-	this->next_string_id = 0;
-}
-
-/** Free everything we allocated. */
-StringData::~StringData()
-{
-	for (size_t i = 0; i < this->max_strings; i++) delete this->strings[i];
-	free(this->strings);
-	free(this->hash_heads);
-}
-
-/** Free all data related to the translation. */
-void StringData::FreeTranslation()
-{
-	for (size_t i = 0; i < this->max_strings; i++) {
-		LangString *ls = this->strings[i];
-		if (ls != NULL) ls->FreeTranslation();
-	}
-}
-
-/**
- * Create a hash of the string for finding them back quickly.
- * @param s The string to hash.
- * @return The hashed string.
- */
-uint StringData::HashStr(const char *s) const
-{
-	uint hash = 0;
-	for (; *s != '\0'; s++) hash = ROL(hash, 3) ^ *s;
-	return hash % this->max_strings;
-}
-
-/**
- * Add a newly created LangString.
- * @param s  The name of the string.
- * @param ls The string to add.
- */
-void StringData::Add(const char *s, LangString *ls)
-{
-	uint hash = this->HashStr(s);
-	ls->hash_next = this->hash_heads[hash];
-	/* Off-by-one for hash find. */
-	this->hash_heads[hash] = ls->index + 1;
-	this->strings[ls->index] = ls;
-}
-
-/**
- * Find a LangString based on the string name.
- * @param s The string name to search on.
- * @return The LangString or NULL if it is not known.
- */
-LangString *StringData::Find(const char *s)
-{
-	int idx = this->hash_heads[this->HashStr(s)];
-
-	while (--idx >= 0) {
-		LangString *ls = this->strings[idx];
-
-		if (strcmp(ls->name, s) == 0) return ls;
-		idx = ls->hash_next;
-	}
-	return NULL;
-}
-
-/**
- * Create a compound hash.
- * @param hash The hash to add the string hash to.
- * @param s    The string hash.
- * @return The new hash.
- */
-uint StringData::VersionHashStr(uint hash, const char *s) const
-{
-	for (; *s != '\0'; s++) {
-		hash = ROL(hash, 3) ^ *s;
-		hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
-	}
-	return hash;
-}
-
-/**
- * Make a hash of the file to get a unique "version number"
- * @return The version number.
- */
-uint StringData::Version() const
-{
-	uint hash = 0;
-
-	for (size_t i = 0; i < this->max_strings; i++) {
-		const LangString *ls = this->strings[i];
-
-		if (ls != NULL) {
-			const CmdStruct *cs;
-			const char *s;
-			char buf[MAX_COMMAND_PARAM_SIZE];
-			int argno;
-			int casei;
-
-			s = ls->name;
-			hash ^= i * 0x717239;
-			hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
-			hash = this->VersionHashStr(hash, s + 1);
-
-			s = ls->english;
-			while ((cs = ParseCommandString(&s, buf, &argno, &casei)) != NULL) {
-				if (cs->flags & C_DONTCOUNT) continue;
-
-				hash ^= (cs - _cmd_structs) * 0x1234567;
-				hash = (hash & 1 ? hash >> 1 ^ 0xF00BAA4 : hash >> 1);
-			}
-		}
-	}
-
-	return hash;
-}
-
-/**
- * Count the number of tab elements that are in use.
- * @param tab The tab to count the elements of.
- */
-uint StringData::CountInUse(uint tab) const
-{
-	int i;
-	for (i = STRINGS_IN_TAB; --i >= 0;) if (this->strings[(tab * STRINGS_IN_TAB) + i] != NULL) break;
-	return i + 1;
-}
-
-static LanguagePackHeader _lang; ///< Header information about a language.
-
-static const char *_cur_ident;
-
-struct CmdPair {
-	const CmdStruct *a;
-	const char *v;
-};
-
-struct ParsedCommandStruct {
-	uint np;
-	CmdPair pairs[32];
-	const CmdStruct *cmd[32]; // ordered by param #
-};
-
-/* Used when generating some advanced commands. */
-static ParsedCommandStruct _cur_pcs;
-static int _cur_argidx;
 
 #ifdef _MSC_VER
 # define LINE_NUM_FMT(s) "%s (%d): warning: %s (" s ")\n"
@@ -260,9 +47,7 @@ static int _cur_argidx;
 # define LINE_NUM_FMT(s) "%s:%d: " s ": %s\n"
 #endif
 
-static void CDECL strgen_warning(const char *s, ...) WARN_FORMAT(1, 2);
-
-static void CDECL strgen_warning(const char *s, ...)
+void CDECL strgen_warning(const char *s, ...)
 {
 	char buf[1024];
 	va_list va;
@@ -273,9 +58,7 @@ static void CDECL strgen_warning(const char *s, ...)
 	_warnings++;
 }
 
-static void CDECL strgen_error(const char *s, ...) WARN_FORMAT(1, 2);
-
-static void CDECL strgen_error(const char *s, ...)
+void CDECL strgen_error(const char *s, ...)
 {
 	char buf[1024];
 	va_list va;
@@ -286,7 +69,7 @@ static void CDECL strgen_error(const char *s, ...)
 	_errors++;
 }
 
-void NORETURN CDECL error(const char *s, ...)
+void NORETURN CDECL strgen_fatal(const char *s, ...)
 {
 	char buf[1024];
 	va_list va;
@@ -300,356 +83,18 @@ void NORETURN CDECL error(const char *s, ...)
 	throw std::exception();
 }
 
-/** The buffer for writing a single string. */
-struct Buffer : SmallVector<byte, 256> {
-	/**
-	 * Conveniance method for adding a byte.
-	 * @param value The value to add.
-	 */
-	void AppendByte(byte value)
-	{
-		*this->Append() = value;
-	}
-
-	/**
-	 * Add an Unicode character encoded in UTF-8 to the buffer.
-	 * @param value The character to add.
-	 */
-	void AppendUtf8(uint32 value)
-	{
-		if (value < 0x80) {
-			*this->Append() = value;
-		} else if (value < 0x800) {
-			*this->Append() = 0xC0 + GB(value,  6, 5);
-			*this->Append() = 0x80 + GB(value,  0, 6);
-		} else if (value < 0x10000) {
-			*this->Append() = 0xE0 + GB(value, 12, 4);
-			*this->Append() = 0x80 + GB(value,  6, 6);
-			*this->Append() = 0x80 + GB(value,  0, 6);
-		} else if (value < 0x110000) {
-			*this->Append() = 0xF0 + GB(value, 18, 3);
-			*this->Append() = 0x80 + GB(value, 12, 6);
-			*this->Append() = 0x80 + GB(value,  6, 6);
-			*this->Append() = 0x80 + GB(value,  0, 6);
-		} else {
-			strgen_warning("Invalid unicode value U+0x%X", value);
-		}
-	}
-};
-
-size_t Utf8Validate(const char *s)
-{
-	uint32 c;
-
-	if (!HasBit(s[0], 7)) {
-		/* 1 byte */
-		return 1;
-	} else if (GB(s[0], 5, 3) == 6 && IsUtf8Part(s[1])) {
-		/* 2 bytes */
-		c = GB(s[0], 0, 5) << 6 | GB(s[1], 0, 6);
-		if (c >= 0x80) return 2;
-	} else if (GB(s[0], 4, 4) == 14 && IsUtf8Part(s[1]) && IsUtf8Part(s[2])) {
-		/* 3 bytes */
-		c = GB(s[0], 0, 4) << 12 | GB(s[1], 0, 6) << 6 | GB(s[2], 0, 6);
-		if (c >= 0x800) return 3;
-	} else if (GB(s[0], 3, 5) == 30 && IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) {
-		/* 4 bytes */
-		c = GB(s[0], 0, 3) << 18 | GB(s[1], 0, 6) << 12 | GB(s[2], 0, 6) << 6 | GB(s[3], 0, 6);
-		if (c >= 0x10000 && c <= 0x10FFFF) return 4;
-	}
-
-	return 0;
-}
-
-
-static void EmitSingleChar(Buffer *buffer, char *buf, int value)
-{
-	if (*buf != '\0') strgen_warning("Ignoring trailing letters in command");
-	buffer->AppendUtf8(value);
-}
-
-
-/* The plural specifier looks like
- * {NUM} {PLURAL -1 passenger passengers} then it picks either passenger/passengers depending on the count in NUM */
-
-/* This is encoded like
- *  CommandByte <ARG#> <NUM> {Length of each string} {each string} */
-
-bool ParseRelNum(char **buf, int *value, int *offset)
-{
-	const char *s = *buf;
-	char *end;
-	bool rel = false;
-
-	while (*s == ' ' || *s == '\t') s++;
-	if (*s == '+') {
-		rel = true;
-		s++;
-	}
-	int v = strtol(s, &end, 0);
-	if (end == s) return false;
-	if (rel || v < 0) {
-		*value += v;
-	} else {
-		*value = v;
-	}
-	if (offset != NULL && *end == ':') {
-		/* Take the Nth within */
-		s = end + 1;
-		*offset = strtol(s, &end, 0);
-		if (end == s) return false;
-	}
-	*buf = end;
-	return true;
-}
-
-/* Parse out the next word, or NULL */
-char *ParseWord(char **buf)
-{
-	char *s = *buf, *r;
-
-	while (*s == ' ' || *s == '\t') s++;
-	if (*s == '\0') return NULL;
-
-	if (*s == '"') {
-		r = ++s;
-		/* parse until next " or NUL */
-		for (;;) {
-			if (*s == '\0') break;
-			if (*s == '"') {
-				*s++ = '\0';
-				break;
-			}
-			s++;
-		}
-	} else {
-		/* proceed until whitespace or NUL */
-		r = s;
-		for (;;) {
-			if (*s == '\0') break;
-			if (*s == ' ' || *s == '\t') {
-				*s++ = '\0';
-				break;
-			}
-			s++;
-		}
-	}
-	*buf = s;
-	return r;
-}
-
-/* Forward declaration */
-static int TranslateArgumentIdx(int arg, int offset = 0);
-
-static void EmitWordList(Buffer *buffer, const char * const *words, uint nw)
-{
-	buffer->AppendByte(nw);
-	for (uint i = 0; i < nw; i++) buffer->AppendByte(strlen(words[i]) + 1);
-	for (uint i = 0; i < nw; i++) {
-		for (uint j = 0; words[i][j] != '\0'; j++) buffer->AppendByte(words[i][j]);
-		buffer->AppendByte(0);
-	}
-}
-
-static void EmitPlural(Buffer *buffer, char *buf, int value)
-{
-	int argidx = _cur_argidx;
-	int offset = 0;
-	const char *words[5];
-	int nw = 0;
-
-	/* Parse out the number, if one exists. Otherwise default to prev arg. */
-	if (!ParseRelNum(&buf, &argidx, &offset)) argidx--;
-
-	/* Parse each string */
-	for (nw = 0; nw < 5; nw++) {
-		words[nw] = ParseWord(&buf);
-		if (words[nw] == NULL) break;
-	}
-
-	if (nw == 0) {
-		error("%s: No plural words", _cur_ident);
-	}
-
-	if (_plural_forms[_lang.plural_form].plural_count != nw) {
-		if (_translated) {
-			error("%s: Invalid number of plural forms. Expecting %d, found %d.", _cur_ident,
-				_plural_forms[_lang.plural_form].plural_count, nw);
-		} else {
-			if ((_show_todo & 2) != 0) strgen_warning("'%s' is untranslated. Tweaking english string to allow compilation for plural forms", _cur_ident);
-			if (nw > _plural_forms[_lang.plural_form].plural_count) {
-				nw = _plural_forms[_lang.plural_form].plural_count;
-			} else {
-				for (; nw < _plural_forms[_lang.plural_form].plural_count; nw++) {
-					words[nw] = words[nw - 1];
-				}
-			}
-		}
-	}
-
-	buffer->AppendUtf8(SCC_PLURAL_LIST);
-	buffer->AppendByte(_lang.plural_form);
-	buffer->AppendByte(TranslateArgumentIdx(argidx, offset));
-	EmitWordList(buffer, words, nw);
-}
-
-
-static void EmitGender(Buffer *buffer, char *buf, int value)
-{
-	int argidx = _cur_argidx;
-	int offset = 0;
-	uint nw;
-
-	if (buf[0] == '=') {
-		buf++;
-
-		/* This is a {G=DER} command */
-		nw = _lang.GetGenderIndex(buf);
-		if (nw >= MAX_NUM_GENDERS) error("G argument '%s' invalid", buf);
-
-		/* now nw contains the gender index */
-		buffer->AppendUtf8(SCC_GENDER_INDEX);
-		buffer->AppendByte(nw);
-	} else {
-		const char *words[MAX_NUM_GENDERS];
-
-		/* This is a {G 0 foo bar two} command.
-		 * If no relative number exists, default to +0 */
-		if (!ParseRelNum(&buf, &argidx, &offset)) {}
-
-		const CmdStruct *cmd = _cur_pcs.cmd[argidx];
-		if (cmd == NULL || (cmd->flags & C_GENDER) == 0) {
-			error("Command '%s' can't have a gender", cmd == NULL ? "<empty>" : cmd->cmd);
-		}
-
-		for (nw = 0; nw < MAX_NUM_GENDERS; nw++) {
-			words[nw] = ParseWord(&buf);
-			if (words[nw] == NULL) break;
-		}
-		if (nw != _lang.num_genders) error("Bad # of arguments for gender command");
-
-		assert(IsInsideBS(cmd->value, SCC_CONTROL_START, UINT8_MAX));
-		buffer->AppendUtf8(SCC_GENDER_LIST);
-		buffer->AppendByte(TranslateArgumentIdx(argidx, offset));
-		EmitWordList(buffer, words, nw);
-	}
-}
-
-static const CmdStruct *FindCmd(const char *s, int len)
-{
-	for (const CmdStruct *cs = _cmd_structs; cs != endof(_cmd_structs); cs++) {
-		if (strncmp(cs->cmd, s, len) == 0 && cs->cmd[len] == '\0') return cs;
-	}
-	return NULL;
-}
-
-static uint ResolveCaseName(const char *str, uint len)
-{
-	/* First get a clean copy of only the case name, then resolve it. */
-	char case_str[CASE_GENDER_LEN];
-	len = min(lengthof(case_str) - 1, len);
-	memcpy(case_str, str, len);
-	case_str[len] = '\0';
-
-	uint8 case_idx = _lang.GetCaseIndex(case_str);
-	if (case_idx >= MAX_NUM_CASES) error("Invalid case-name '%s'", case_str);
-	return case_idx + 1;
-}
-
-
-/* returns NULL on eof
- * else returns command struct */
-static const CmdStruct *ParseCommandString(const char **str, char *param, int *argno, int *casei)
-{
-	const char *s = *str, *start;
-	char c;
-
-	*argno = -1;
-	*casei = -1;
-
-	/* Scan to the next command, exit if there's no next command. */
-	for (; *s != '{'; s++) {
-		if (*s == '\0') return NULL;
-	}
-	s++; // Skip past the {
-
-	if (*s >= '0' && *s <= '9') {
-		char *end;
-
-		*argno = strtoul(s, &end, 0);
-		if (*end != ':') error("missing arg #");
-		s = end + 1;
-	}
-
-	/* parse command name */
-	start = s;
-	do {
-		c = *s++;
-	} while (c != '}' && c != ' ' && c != '=' && c != '.' && c != 0);
-
-	const CmdStruct *cmd = FindCmd(start, s - start - 1);
-	if (cmd == NULL) {
-		strgen_error("Undefined command '%.*s'", (int)(s - start - 1), start);
-		return NULL;
-	}
-
-	if (c == '.') {
-		const char *casep = s;
-
-		if (!(cmd->flags & C_CASE)) {
-			error("Command '%s' can't have a case", cmd->cmd);
-		}
-
-		do {
-			c = *s++;
-		} while (c != '}' && c != ' ' && c != '\0');
-		*casei = ResolveCaseName(casep, s - casep - 1);
-	}
-
-	if (c == '\0') {
-		strgen_error("Missing } from command '%s'", start);
-		return NULL;
-	}
-
-
-	if (c != '}') {
-		if (c == '=') s--;
-		/* copy params */
-		start = s;
-		for (;;) {
-			c = *s++;
-			if (c == '}') break;
-			if (c == '\0') {
-				strgen_error("Missing } from command '%s'", start);
-				return NULL;
-			}
-			if (s - start == MAX_COMMAND_PARAM_SIZE) error("param command too long");
-			*param++ = c;
-		}
-	}
-	*param = '\0';
-
-	*str = s;
-
-	return cmd;
-}
-
-/**
- * Prepare reading.
- * @param data        The data to fill during reading.
- * @param file        The file we are reading.
- * @param master      Are we reading the master file?
- * @param translation Are we reading a translation?
- */
-StringReader::StringReader(StringData &data, const char *file, bool master, bool translation) :
-		data(data), file(strdup(file)), master(master), translation(translation)
-{
-}
-
-/** Make sure the right reader gets freed. */
-StringReader::~StringReader()
+void NORETURN CDECL error(const char *s, ...)
 {
-	free(file);
+	char buf[1024];
+	va_list va;
+	va_start(va, s);
+	vsnprintf(buf, lengthof(buf), s, va);
+	va_end(va);
+	fprintf(stderr, LINE_NUM_FMT("FATAL"), _file, _cur_line, buf);
+#ifdef _MSC_VER
+	fprintf(stderr, LINE_NUM_FMT("warning"), _file, _cur_line, "language is not compiled");
+#endif
+	exit(2);
 }
 
 /** A reader that simply reads using fopen. */
@@ -768,233 +213,6 @@ void FileStringReader::HandlePragma(char *str)
 	}
 }
 
-static void ExtractCommandString(ParsedCommandStruct *p, const char *s, bool warnings)
-{
-	char param[MAX_COMMAND_PARAM_SIZE];
-	int argno;
-	int argidx = 0;
-	int casei;
-
-	memset(p, 0, sizeof(*p));
-
-	for (;;) {
-		/* read until next command from a. */
-		const CmdStruct *ar = ParseCommandString(&s, param, &argno, &casei);
-
-		if (ar == NULL) break;
-
-		/* Sanity checking */
-		if (argno != -1 && ar->consumes == 0) error("Non consumer param can't have a paramindex");
-
-		if (ar->consumes) {
-			if (argno != -1) argidx = argno;
-			if (argidx < 0 || (uint)argidx >= lengthof(p->cmd)) error("invalid param idx %d", argidx);
-			if (p->cmd[argidx] != NULL && p->cmd[argidx] != ar) error("duplicate param idx %d", argidx);
-
-			p->cmd[argidx++] = ar;
-		} else if (!(ar->flags & C_DONTCOUNT)) { // Ignore some of them
-			if (p->np >= lengthof(p->pairs)) error("too many commands in string, max " PRINTF_SIZE, lengthof(p->pairs));
-			p->pairs[p->np].a = ar;
-			p->pairs[p->np].v = param[0] != '\0' ? strdup(param) : "";
-			p->np++;
-		}
-	}
-}
-
-
-static const CmdStruct *TranslateCmdForCompare(const CmdStruct *a)
-{
-	if (a == NULL) return NULL;
-
-	if (strcmp(a->cmd, "STRING1") == 0 ||
-			strcmp(a->cmd, "STRING2") == 0 ||
-			strcmp(a->cmd, "STRING3") == 0 ||
-			strcmp(a->cmd, "STRING4") == 0 ||
-			strcmp(a->cmd, "STRING5") == 0 ||
-			strcmp(a->cmd, "RAW_STRING") == 0) {
-		return FindCmd("STRING", 6);
-	}
-
-	return a;
-}
-
-
-static bool CheckCommandsMatch(char *a, char *b, const char *name)
-{
-	/* If we're not translating, i.e. we're compiling the base language,
-	 * it is pointless to do all these checks as it'll always be correct.
-	 * After all, all checks are based on the base language.
-	 */
-	if (!_translation) return true;
-
-	ParsedCommandStruct templ;
-	ParsedCommandStruct lang;
-	bool result = true;
-
-	ExtractCommandString(&templ, b, true);
-	ExtractCommandString(&lang, a, true);
-
-	/* For each string in templ, see if we find it in lang */
-	if (templ.np != lang.np) {
-		strgen_warning("%s: template string and language string have a different # of commands", name);
-		result = false;
-	}
-
-	for (uint i = 0; i < templ.np; i++) {
-		/* see if we find it in lang, and zero it out */
-		bool found = false;
-		for (uint j = 0; j < lang.np; j++) {
-			if (templ.pairs[i].a == lang.pairs[j].a &&
-					strcmp(templ.pairs[i].v, lang.pairs[j].v) == 0) {
-				/* it was found in both. zero it out from lang so we don't find it again */
-				lang.pairs[j].a = NULL;
-				found = true;
-				break;
-			}
-		}
-
-		if (!found) {
-			strgen_warning("%s: command '%s' exists in template file but not in language file", name, templ.pairs[i].a->cmd);
-			result = false;
-		}
-	}
-
-	/* if we reach here, all non consumer commands match up.
-	 * Check if the non consumer commands match up also. */
-	for (uint i = 0; i < lengthof(templ.cmd); i++) {
-		if (TranslateCmdForCompare(templ.cmd[i]) != lang.cmd[i]) {
-			strgen_warning("%s: Param idx #%d '%s' doesn't match with template command '%s'", name, i,
-				lang.cmd[i]  == NULL ? "<empty>" : TranslateCmdForCompare(lang.cmd[i])->cmd,
-				templ.cmd[i] == NULL ? "<empty>" : templ.cmd[i]->cmd);
-			result = false;
-		}
-	}
-
-	return result;
-}
-
-void StringReader::HandleString(char *str)
-{
-	if (*str == '#') {
-		if (str[1] == '#' && str[2] != '#') this->HandlePragma(str + 2);
-		return;
-	}
-
-	/* Ignore comments & blank lines */
-	if (*str == ';' || *str == ' ' || *str == '\0') return;
-
-	char *s = strchr(str, ':');
-	if (s == NULL) {
-		strgen_error("Line has no ':' delimiter");
-		return;
-	}
-
-	char *t;
-	/* Trim spaces.
-	 * After this str points to the command name, and s points to the command contents */
-	for (t = s; t > str && (t[-1] == ' ' || t[-1] == '\t'); t--) {}
-	*t = 0;
-	s++;
-
-	/* Check string is valid UTF-8 */
-	const char *tmp;
-	for (tmp = s; *tmp != '\0';) {
-		size_t len = Utf8Validate(tmp);
-		if (len == 0) error("Invalid UTF-8 sequence in '%s'", s);
-
-		WChar c;
-		Utf8Decode(&c, tmp);
-		if (c <= 0x001F || // ASCII control character range
-				(c >= 0xE000 && c <= 0xF8FF) || // Private range
-				(c >= 0xFFF0 && c <= 0xFFFF)) { // Specials range
-			error("Unwanted UTF-8 character U+%04X in sequence '%s'", c, s);
-		}
-
-		tmp += len;
-	}
-
-	/* Check if the string has a case..
-	 * The syntax for cases is IDENTNAME.case */
-	char *casep = strchr(str, '.');
-	if (casep != NULL) *casep++ = '\0';
-
-	/* Check if this string already exists.. */
-	LangString *ent = this->data.Find(str);
-
-	if (this->master) {
-		if (casep != NULL) {
-			strgen_error("Cases in the base translation are not supported.");
-			return;
-		}
-
-		if (ent != NULL) {
-			strgen_error("String name '%s' is used multiple times", str);
-			return;
-		}
-
-		if (this->data.strings[this->data.next_string_id] != NULL) {
-			strgen_error("String ID 0x%X for '%s' already in use by '%s'", this->data.next_string_id, str, this->data.strings[this->data.next_string_id]->name);
-			return;
-		}
-
-		/* Allocate a new LangString */
-		this->data.Add(str, new LangString(str, s, this->data.next_string_id++, _cur_line));
-	} else {
-		if (ent == NULL) {
-			strgen_warning("String name '%s' does not exist in master file", str);
-			return;
-		}
-
-		if (ent->translated && casep == NULL) {
-			strgen_error("String name '%s' is used multiple times", str);
-			return;
-		}
-
-		/* make sure that the commands match */
-		if (!CheckCommandsMatch(s, ent->english, str)) return;
-
-		if (casep != NULL) {
-			ent->translated_case = new Case(ResolveCaseName(casep, strlen(casep)), s, ent->translated_case);
-		} else {
-			ent->translated = strdup(s);
-			/* If the string was translated, use the line from the
-			 * translated language so errors in the translated file
-			 * are properly referenced to. */
-			ent->line = _cur_line;
-		}
-	}
-}
-
-
-static void rstrip(char *buf)
-{
-	int i = strlen(buf);
-	while (i > 0 && (buf[i - 1] == '\r' || buf[i - 1] == '\n' || buf[i - 1] == ' ')) i--;
-	buf[i] = '\0';
-}
-
-void StringReader::ParseFile()
-{
-	char buf[2048];
-	_warnings = _errors = 0;
-
-	_translation = this->master || this->translation;
-	_file = this->file;
-
-	/* For each new file we parse, reset the genders, and language codes. */
-	MemSetT(&_lang, 0);
-	strecpy(_lang.digit_group_separator, ",", lastof(_lang.digit_group_separator));
-	strecpy(_lang.digit_group_separator_currency, ",", lastof(_lang.digit_group_separator_currency));
-	strecpy(_lang.digit_decimal_separator, ".", lastof(_lang.digit_decimal_separator));
-
-	_cur_line = 1;
-	while (this->ReadLine(buf, sizeof(buf)) != NULL) {
-		rstrip(buf);
-		this->HandleString(buf);
-		_cur_line++;
-	}
-}
-
 bool CompareFiles(const char *n1, const char *n2)
 {
 	FILE *f2 = fopen(n2, "rb");
@@ -1060,23 +278,6 @@ struct FileWriter {
 	}
 };
 
-/**
- * Write the header information.
- * @param data The data about the string.
- */
-void HeaderWriter::WriteHeader(const StringData &data)
-{
-	int last = 0;
-	for (size_t i = 0; i < data.max_strings; i++) {
-		if (data.strings[i] != NULL) {
-			this->WriteStringID(data.strings[i]->name, i);
-			last = i;
-		}
-	}
-
-	this->WriteStringID("STR_LAST_STRINGID", last);
-}
-
 struct HeaderFileWriter : HeaderWriter, FileWriter {
 	/** The real file name we eventually want to write to. */
 	const char *real_filename;
@@ -1135,203 +336,6 @@ struct HeaderFileWriter : HeaderWriter, FileWriter {
 	}
 };
 
-static int TranslateArgumentIdx(int argidx, int offset)
-{
-	int sum;
-
-	if (argidx < 0 || (uint)argidx >= lengthof(_cur_pcs.cmd)) {
-		error("invalid argidx %d", argidx);
-	}
-	const CmdStruct *cs = _cur_pcs.cmd[argidx];
-	if (cs != NULL && cs->consumes <= offset) {
-		error("invalid argidx offset %d:%d", argidx, offset);
-	}
-
-	if (_cur_pcs.cmd[argidx] == NULL) {
-		error("no command for this argidx %d", argidx);
-	}
-
-	for (int i = sum = 0; i < argidx; i++) {
-		const CmdStruct *cs = _cur_pcs.cmd[i];
-
-		sum += (cs != NULL) ? cs->consumes : 1;
-	}
-
-	return sum + offset;
-}
-
-static void PutArgidxCommand(Buffer *buffer)
-{
-	buffer->AppendUtf8(SCC_ARG_INDEX);
-	buffer->AppendByte(TranslateArgumentIdx(_cur_argidx));
-}
-
-
-static void PutCommandString(Buffer *buffer, const char *str)
-{
-	_cur_argidx = 0;
-
-	while (*str != '\0') {
-		/* Process characters as they are until we encounter a { */
-		if (*str != '{') {
-			buffer->AppendByte(*str++);
-			continue;
-		}
-
-		char param[MAX_COMMAND_PARAM_SIZE];
-		int argno;
-		int casei;
-		const CmdStruct *cs = ParseCommandString(&str, param, &argno, &casei);
-		if (cs == NULL) break;
-
-		if (casei != -1) {
-			buffer->AppendUtf8(SCC_SET_CASE); // {SET_CASE}
-			buffer->AppendByte(casei);
-		}
-
-		/* For params that consume values, we need to handle the argindex properly */
-		if (cs->consumes > 0) {
-			/* Check if we need to output a move-param command */
-			if (argno != -1 && argno != _cur_argidx) {
-				_cur_argidx = argno;
-				PutArgidxCommand(buffer);
-			}
-
-			/* Output the one from the master string... it's always accurate. */
-			cs = _cur_pcs.cmd[_cur_argidx++];
-			if (cs == NULL) {
-				error("%s: No argument exists at position %d", _cur_ident, _cur_argidx - 1);
-			}
-		}
-
-		cs->proc(buffer, param, cs->value);
-	}
-}
-
-/**
- * Write the length as a simple gamma.
- * @param length The number to write.
- */
-void LanguageWriter::WriteLength(uint length)
-{
-	char buffer[2];
-	int offs = 0;
-	if (length >= 0x4000) {
-		error("string too long");
-	}
-
-	if (length >= 0xC0) {
-		buffer[offs++] = (length >> 8) | 0xC0;
-	}
-	buffer[offs++] = length & 0xFF;
-	this->Write((byte*)buffer, offs);
-}
-
-/**
- * Actually write the language.
- * @param data The data about the string.
- */
-void LanguageWriter::WriteLang(const StringData &data)
-{
-	uint *in_use = AllocaM(uint, data.tabs);
-	for (size_t tab = 0; tab < data.tabs; tab++) {
-		uint n = data.CountInUse(tab);
-
-		in_use[tab] = n;
-		_lang.offsets[tab] = TO_LE16(n);
-
-		for (uint j = 0; j != in_use[tab]; j++) {
-			const LangString *ls = data.strings[(tab * StringData::STRINGS_IN_TAB) + j];
-			if (ls != NULL && ls->translated == NULL) _lang.missing++;
-		}
-	}
-
-	_lang.ident = TO_LE32(LanguagePackHeader::IDENT);
-	_lang.version = TO_LE32(data.Version());
-	_lang.missing = TO_LE16(_lang.missing);
-	_lang.winlangid = TO_LE16(_lang.winlangid);
-
-	this->WriteHeader(&_lang);
-	Buffer buffer;
-
-	for (size_t tab = 0; tab < data.tabs; tab++) {
-		for (uint j = 0; j != in_use[tab]; j++) {
-			const LangString *ls = data.strings[(tab * StringData::STRINGS_IN_TAB) + j];
-			const Case *casep;
-			const char *cmdp;
-
-			/* For undefined strings, just set that it's an empty string */
-			if (ls == NULL) {
-				this->WriteLength(0);
-				continue;
-			}
-
-			_cur_ident = ls->name;
-			_cur_line = ls->line;
-
-			/* Produce a message if a string doesn't have a translation. */
-			if (_show_todo > 0 && ls->translated == NULL) {
-				if ((_show_todo & 2) != 0) {
-					strgen_warning("'%s' is untranslated", ls->name);
-				}
-				if ((_show_todo & 1) != 0) {
-					const char *s = "<TODO> ";
-					while (*s != '\0') buffer.AppendByte(*s++);
-				}
-			}
-
-			/* Extract the strings and stuff from the english command string */
-			ExtractCommandString(&_cur_pcs, ls->english, false);
-
-			if (ls->translated_case != NULL || ls->translated != NULL) {
-				casep = ls->translated_case;
-				cmdp = ls->translated;
-			} else {
-				casep = NULL;
-				cmdp = ls->english;
-			}
-
-			_translated = cmdp != ls->english;
-
-			if (casep != NULL) {
-				const Case *c;
-				uint num;
-
-				/* Need to output a case-switch.
-				 * It has this format
-				 * <0x9E> <NUM CASES> <CASE1> <LEN1> <STRING1> <CASE2> <LEN2> <STRING2> <CASE3> <LEN3> <STRING3> <STRINGDEFAULT>
-				 * Each LEN is printed using 2 bytes in big endian order. */
-				buffer.AppendUtf8(SCC_SWITCH_CASE);
-				/* Count the number of cases */
-				for (num = 0, c = casep; c; c = c->next) num++;
-				buffer.AppendByte(num);
-
-				/* Write each case */
-				for (c = casep; c != NULL; c = c->next) {
-					buffer.AppendByte(c->caseidx);
-					/* Make some space for the 16-bit length */
-					size_t pos = buffer.Length();
-					buffer.AppendByte(0);
-					buffer.AppendByte(0);
-					/* Write string */
-					PutCommandString(&buffer, c->string);
-					buffer.AppendByte(0); // terminate with a zero
-					/* Fill in the length */
-					size_t size = buffer.Length() - (pos + 2);
-					buffer[pos + 0] = GB(size, 8, 8);
-					buffer[pos + 1] = GB(size, 0, 8);
-				}
-			}
-
-			if (cmdp != NULL) PutCommandString(&buffer, cmdp);
-
-			this->WriteLength(buffer.Length());
-			this->Write(buffer.Begin(), buffer.Length());
-			buffer.Clear();
-		}
-	}
-}
-
 /** Class for writing a language to disk. */
 struct LanguageFileWriter : LanguageWriter, FileWriter {
 	/**
diff --git a/src/strgen/strgen.h b/src/strgen/strgen.h
index 80737f6eb..b3584ab5f 100644
--- a/src/strgen/strgen.h
+++ b/src/strgen/strgen.h
@@ -138,4 +138,14 @@ struct LanguageWriter {
 	void WriteLang(const StringData &data);
 };
 
+void CDECL strgen_warning(const char *s, ...) WARN_FORMAT(1, 2);
+void CDECL strgen_error(const char *s, ...) WARN_FORMAT(1, 2);
+void NORETURN CDECL strgen_fatal(const char *s, ...) WARN_FORMAT(1, 2);
+char *ParseWord(char **buf);
+
+extern const char *_file;
+extern int _cur_line;
+extern int _errors, _warnings, _show_todo;
+extern LanguagePackHeader _lang;
+
 #endif /* STRGEN_H */
diff --git a/src/strgen/strgen_base.cpp b/src/strgen/strgen_base.cpp
new file mode 100644
index 000000000..98e5aefb1
--- /dev/null
+++ b/src/strgen/strgen_base.cpp
@@ -0,0 +1,1030 @@
+/* $Id$ */
+
+/*
+ * This file is part of OpenTTD.
+ * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
+ * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/** @file strgen.cpp Tool to create computer readable (stand-alone) translation files. */
+
+#include "../stdafx.h"
+#include "../core/endian_func.hpp"
+#include "../string_func.h"
+#include "../strings_type.h"
+#include "../language.h"
+#include "../table/control_codes.h"
+
+#include "strgen.h"
+
+#include <stdarg.h>
+#include <exception>
+
+#include "../table/strgen_tables.h"
+
+/* Compiles a list of strings into a compiled string list */
+
+static bool _translated;              ///< Whether the current language is not the master language
+static bool _translation;             ///< Is the current file actually a translation or not
+const char *_file = "(unknown file)"; ///< The filename of the input, so we can refer to it in errors/warnings
+int _cur_line;                        ///< The current line we're parsing in the input file
+int _errors, _warnings, _show_todo;
+LanguagePackHeader _lang;             ///< Header information about a language.
+
+static const ptrdiff_t MAX_COMMAND_PARAM_SIZE = 100; ///< Maximum size of every command block, not counting the name of the command itself
+static const CmdStruct *ParseCommandString(const char **str, char *param, int *argno, int *casei);
+
+/**
+ * Create a new case.
+ * @param caseidx The index of the case.
+ * @param string  The translation of the case.
+ * @param next    The next chained case.
+ */
+Case::Case(int caseidx, const char *string, Case *next) :
+		caseidx(caseidx), string(strdup(string)), next(next)
+{
+}
+
+/** Free everything we allocated. */
+Case::~Case()
+{
+	free(this->string);
+	delete this->next;
+}
+
+/**
+ * Create a new string.
+ * @param name    The name of the string.
+ * @param english The english "translation" of the string.
+ * @param index   The index in the string table.
+ * @param line    The line this string was found on.
+ */
+LangString::LangString(const char *name, const char *english, int index, int line) :
+		name(strdup(name)), english(strdup(english)), translated(NULL),
+		hash_next(0), index(index), line(line), translated_case(NULL)
+{
+}
+
+/** Free everything we allocated. */
+LangString::~LangString()
+{
+	free(this->name);
+	free(this->english);
+	free(this->translated);
+	delete this->translated_case;
+}
+
+/** Free all data related to the translation. */
+void LangString::FreeTranslation()
+{
+	free(this->translated);
+	this->translated = NULL;
+
+	delete this->translated_case;
+	this->translated_case = NULL;
+}
+
+/**
+ * Create a new string data container.
+ * @param max_strings The maximum number of strings.
+ */
+StringData::StringData(size_t tabs) : tabs(tabs), max_strings(tabs * STRINGS_IN_TAB)
+{
+	this->strings = CallocT<LangString *>(max_strings);
+	this->hash_heads = CallocT<uint16>(max_strings);
+	this->next_string_id = 0;
+}
+
+/** Free everything we allocated. */
+StringData::~StringData()
+{
+	for (size_t i = 0; i < this->max_strings; i++) delete this->strings[i];
+	free(this->strings);
+	free(this->hash_heads);
+}
+
+/** Free all data related to the translation. */
+void StringData::FreeTranslation()
+{
+	for (size_t i = 0; i < this->max_strings; i++) {
+		LangString *ls = this->strings[i];
+		if (ls != NULL) ls->FreeTranslation();
+	}
+}
+
+/**
+ * Create a hash of the string for finding them back quickly.
+ * @param s The string to hash.
+ * @return The hashed string.
+ */
+uint StringData::HashStr(const char *s) const
+{
+	uint hash = 0;
+	for (; *s != '\0'; s++) hash = ROL(hash, 3) ^ *s;
+	return hash % this->max_strings;
+}
+
+/**
+ * Add a newly created LangString.
+ * @param s  The name of the string.
+ * @param ls The string to add.
+ */
+void StringData::Add(const char *s, LangString *ls)
+{
+	uint hash = this->HashStr(s);
+	ls->hash_next = this->hash_heads[hash];
+	/* Off-by-one for hash find. */
+	this->hash_heads[hash] = ls->index + 1;
+	this->strings[ls->index] = ls;
+}
+
+/**
+ * Find a LangString based on the string name.
+ * @param s The string name to search on.
+ * @return The LangString or NULL if it is not known.
+ */
+LangString *StringData::Find(const char *s)
+{
+	int idx = this->hash_heads[this->HashStr(s)];
+
+	while (--idx >= 0) {
+		LangString *ls = this->strings[idx];
+
+		if (strcmp(ls->name, s) == 0) return ls;
+		idx = ls->hash_next;
+	}
+	return NULL;
+}
+
+/**
+ * Create a compound hash.
+ * @param hash The hash to add the string hash to.
+ * @param s    The string hash.
+ * @return The new hash.
+ */
+uint StringData::VersionHashStr(uint hash, const char *s) const
+{
+	for (; *s != '\0'; s++) {
+		hash = ROL(hash, 3) ^ *s;
+		hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
+	}
+	return hash;
+}
+
+/**
+ * Make a hash of the file to get a unique "version number"
+ * @return The version number.
+ */
+uint StringData::Version() const
+{
+	uint hash = 0;
+
+	for (size_t i = 0; i < this->max_strings; i++) {
+		const LangString *ls = this->strings[i];
+
+		if (ls != NULL) {
+			const CmdStruct *cs;
+			const char *s;
+			char buf[MAX_COMMAND_PARAM_SIZE];
+			int argno;
+			int casei;
+
+			s = ls->name;
+			hash ^= i * 0x717239;
+			hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
+			hash = this->VersionHashStr(hash, s + 1);
+
+			s = ls->english;
+			while ((cs = ParseCommandString(&s, buf, &argno, &casei)) != NULL) {
+				if (cs->flags & C_DONTCOUNT) continue;
+
+				hash ^= (cs - _cmd_structs) * 0x1234567;
+				hash = (hash & 1 ? hash >> 1 ^ 0xF00BAA4 : hash >> 1);
+			}
+		}
+	}
+
+	return hash;
+}
+
+/**
+ * Count the number of tab elements that are in use.
+ * @param tab The tab to count the elements of.
+ */
+uint StringData::CountInUse(uint tab) const
+{
+	int i;
+	for (i = STRINGS_IN_TAB; --i >= 0;) if (this->strings[(tab * STRINGS_IN_TAB) + i] != NULL) break;
+	return i + 1;
+}
+
+static const char *_cur_ident;
+
+struct CmdPair {
+	const CmdStruct *a;
+	const char *v;
+};
+
+struct ParsedCommandStruct {
+	uint np;
+	CmdPair pairs[32];
+	const CmdStruct *cmd[32]; // ordered by param #
+};
+
+/* Used when generating some advanced commands. */
+static ParsedCommandStruct _cur_pcs;
+static int _cur_argidx;
+
+/** The buffer for writing a single string. */
+struct Buffer : SmallVector<byte, 256> {
+	/**
+	 * Conveniance method for adding a byte.
+	 * @param value The value to add.
+	 */
+	void AppendByte(byte value)
+	{
+		*this->Append() = value;
+	}
+
+	/**
+	 * Add an Unicode character encoded in UTF-8 to the buffer.
+	 * @param value The character to add.
+	 */
+	void AppendUtf8(uint32 value)
+	{
+		if (value < 0x80) {
+			*this->Append() = value;
+		} else if (value < 0x800) {
+			*this->Append() = 0xC0 + GB(value,  6, 5);
+			*this->Append() = 0x80 + GB(value,  0, 6);
+		} else if (value < 0x10000) {
+			*this->Append() = 0xE0 + GB(value, 12, 4);
+			*this->Append() = 0x80 + GB(value,  6, 6);
+			*this->Append() = 0x80 + GB(value,  0, 6);
+		} else if (value < 0x110000) {
+			*this->Append() = 0xF0 + GB(value, 18, 3);
+			*this->Append() = 0x80 + GB(value, 12, 6);
+			*this->Append() = 0x80 + GB(value,  6, 6);
+			*this->Append() = 0x80 + GB(value,  0, 6);
+		} else {
+			strgen_warning("Invalid unicode value U+0x%X", value);
+		}
+	}
+};
+
+size_t Utf8Validate(const char *s)
+{
+	uint32 c;
+
+	if (!HasBit(s[0], 7)) {
+		/* 1 byte */
+		return 1;
+	} else if (GB(s[0], 5, 3) == 6 && IsUtf8Part(s[1])) {
+		/* 2 bytes */
+		c = GB(s[0], 0, 5) << 6 | GB(s[1], 0, 6);
+		if (c >= 0x80) return 2;
+	} else if (GB(s[0], 4, 4) == 14 && IsUtf8Part(s[1]) && IsUtf8Part(s[2])) {
+		/* 3 bytes */
+		c = GB(s[0], 0, 4) << 12 | GB(s[1], 0, 6) << 6 | GB(s[2], 0, 6);
+		if (c >= 0x800) return 3;
+	} else if (GB(s[0], 3, 5) == 30 && IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) {
+		/* 4 bytes */
+		c = GB(s[0], 0, 3) << 18 | GB(s[1], 0, 6) << 12 | GB(s[2], 0, 6) << 6 | GB(s[3], 0, 6);
+		if (c >= 0x10000 && c <= 0x10FFFF) return 4;
+	}
+
+	return 0;
+}
+
+
+void EmitSingleChar(Buffer *buffer, char *buf, int value)
+{
+	if (*buf != '\0') strgen_warning("Ignoring trailing letters in command");
+	buffer->AppendUtf8(value);
+}
+
+
+/* The plural specifier looks like
+ * {NUM} {PLURAL -1 passenger passengers} then it picks either passenger/passengers depending on the count in NUM */
+
+/* This is encoded like
+ *  CommandByte <ARG#> <NUM> {Length of each string} {each string} */
+
+bool ParseRelNum(char **buf, int *value, int *offset)
+{
+	const char *s = *buf;
+	char *end;
+	bool rel = false;
+
+	while (*s == ' ' || *s == '\t') s++;
+	if (*s == '+') {
+		rel = true;
+		s++;
+	}
+	int v = strtol(s, &end, 0);
+	if (end == s) return false;
+	if (rel || v < 0) {
+		*value += v;
+	} else {
+		*value = v;
+	}
+	if (offset != NULL && *end == ':') {
+		/* Take the Nth within */
+		s = end + 1;
+		*offset = strtol(s, &end, 0);
+		if (end == s) return false;
+	}
+	*buf = end;
+	return true;
+}
+
+/* Parse out the next word, or NULL */
+char *ParseWord(char **buf)
+{
+	char *s = *buf, *r;
+
+	while (*s == ' ' || *s == '\t') s++;
+	if (*s == '\0') return NULL;
+
+	if (*s == '"') {
+		r = ++s;
+		/* parse until next " or NUL */
+		for (;;) {
+			if (*s == '\0') break;
+			if (*s == '"') {
+				*s++ = '\0';
+				break;
+			}
+			s++;
+		}
+	} else {
+		/* proceed until whitespace or NUL */
+		r = s;
+		for (;;) {
+			if (*s == '\0') break;
+			if (*s == ' ' || *s == '\t') {
+				*s++ = '\0';
+				break;
+			}
+			s++;
+		}
+	}
+	*buf = s;
+	return r;
+}
+
+/* Forward declaration */
+static int TranslateArgumentIdx(int arg, int offset = 0);
+
+static void EmitWordList(Buffer *buffer, const char * const *words, uint nw)
+{
+	buffer->AppendByte(nw);
+	for (uint i = 0; i < nw; i++) buffer->AppendByte(strlen(words[i]) + 1);
+	for (uint i = 0; i < nw; i++) {
+		for (uint j = 0; words[i][j] != '\0'; j++) buffer->AppendByte(words[i][j]);
+		buffer->AppendByte(0);
+	}
+}
+
+void EmitPlural(Buffer *buffer, char *buf, int value)
+{
+	int argidx = _cur_argidx;
+	int offset = 0;
+	const char *words[5];
+	int nw = 0;
+
+	/* Parse out the number, if one exists. Otherwise default to prev arg. */
+	if (!ParseRelNum(&buf, &argidx, &offset)) argidx--;
+
+	/* Parse each string */
+	for (nw = 0; nw < 5; nw++) {
+		words[nw] = ParseWord(&buf);
+		if (words[nw] == NULL) break;
+	}
+
+	if (nw == 0) {
+		strgen_fatal("%s: No plural words", _cur_ident);
+	}
+
+	if (_plural_forms[_lang.plural_form].plural_count != nw) {
+		if (_translated) {
+			strgen_fatal("%s: Invalid number of plural forms. Expecting %d, found %d.", _cur_ident,
+				_plural_forms[_lang.plural_form].plural_count, nw);
+		} else {
+			if ((_show_todo & 2) != 0) strgen_warning("'%s' is untranslated. Tweaking english string to allow compilation for plural forms", _cur_ident);
+			if (nw > _plural_forms[_lang.plural_form].plural_count) {
+				nw = _plural_forms[_lang.plural_form].plural_count;
+			} else {
+				for (; nw < _plural_forms[_lang.plural_form].plural_count; nw++) {
+					words[nw] = words[nw - 1];
+				}
+			}
+		}
+	}
+
+	buffer->AppendUtf8(SCC_PLURAL_LIST);
+	buffer->AppendByte(_lang.plural_form);
+	buffer->AppendByte(TranslateArgumentIdx(argidx, offset));
+	EmitWordList(buffer, words, nw);
+}
+
+
+void EmitGender(Buffer *buffer, char *buf, int value)
+{
+	int argidx = _cur_argidx;
+	int offset = 0;
+	uint nw;
+
+	if (buf[0] == '=') {
+		buf++;
+
+		/* This is a {G=DER} command */
+		nw = _lang.GetGenderIndex(buf);
+		if (nw >= MAX_NUM_GENDERS) strgen_fatal("G argument '%s' invalid", buf);
+
+		/* now nw contains the gender index */
+		buffer->AppendUtf8(SCC_GENDER_INDEX);
+		buffer->AppendByte(nw);
+	} else {
+		const char *words[MAX_NUM_GENDERS];
+
+		/* This is a {G 0 foo bar two} command.
+		 * If no relative number exists, default to +0 */
+		if (!ParseRelNum(&buf, &argidx, &offset)) {}
+
+		const CmdStruct *cmd = _cur_pcs.cmd[argidx];
+		if (cmd == NULL || (cmd->flags & C_GENDER) == 0) {
+			strgen_fatal("Command '%s' can't have a gender", cmd == NULL ? "<empty>" : cmd->cmd);
+		}
+
+		for (nw = 0; nw < MAX_NUM_GENDERS; nw++) {
+			words[nw] = ParseWord(&buf);
+			if (words[nw] == NULL) break;
+		}
+		if (nw != _lang.num_genders) strgen_fatal("Bad # of arguments for gender command");
+
+		assert(IsInsideBS(cmd->value, SCC_CONTROL_START, UINT8_MAX));
+		buffer->AppendUtf8(SCC_GENDER_LIST);
+		buffer->AppendByte(TranslateArgumentIdx(argidx, offset));
+		EmitWordList(buffer, words, nw);
+	}
+}
+
+static const CmdStruct *FindCmd(const char *s, int len)
+{
+	for (const CmdStruct *cs = _cmd_structs; cs != endof(_cmd_structs); cs++) {
+		if (strncmp(cs->cmd, s, len) == 0 && cs->cmd[len] == '\0') return cs;
+	}
+	return NULL;
+}
+
+static uint ResolveCaseName(const char *str, uint len)
+{
+	/* First get a clean copy of only the case name, then resolve it. */
+	char case_str[CASE_GENDER_LEN];
+	len = min(lengthof(case_str) - 1, len);
+	memcpy(case_str, str, len);
+	case_str[len] = '\0';
+
+	uint8 case_idx = _lang.GetCaseIndex(case_str);
+	if (case_idx >= MAX_NUM_CASES) strgen_fatal("Invalid case-name '%s'", case_str);
+	return case_idx + 1;
+}
+
+
+/* returns NULL on eof
+ * else returns command struct */
+static const CmdStruct *ParseCommandString(const char **str, char *param, int *argno, int *casei)
+{
+	const char *s = *str, *start;
+	char c;
+
+	*argno = -1;
+	*casei = -1;
+
+	/* Scan to the next command, exit if there's no next command. */
+	for (; *s != '{'; s++) {
+		if (*s == '\0') return NULL;
+	}
+	s++; // Skip past the {
+
+	if (*s >= '0' && *s <= '9') {
+		char *end;
+
+		*argno = strtoul(s, &end, 0);
+		if (*end != ':') strgen_fatal("missing arg #");
+		s = end + 1;
+	}
+
+	/* parse command name */
+	start = s;
+	do {
+		c = *s++;
+	} while (c != '}' && c != ' ' && c != '=' && c != '.' && c != 0);
+
+	const CmdStruct *cmd = FindCmd(start, s - start - 1);
+	if (cmd == NULL) {
+		strgen_error("Undefined command '%.*s'", (int)(s - start - 1), start);
+		return NULL;
+	}
+
+	if (c == '.') {
+		const char *casep = s;
+
+		if (!(cmd->flags & C_CASE)) {
+			strgen_fatal("Command '%s' can't have a case", cmd->cmd);
+		}
+
+		do {
+			c = *s++;
+		} while (c != '}' && c != ' ' && c != '\0');
+		*casei = ResolveCaseName(casep, s - casep - 1);
+	}
+
+	if (c == '\0') {
+		strgen_error("Missing } from command '%s'", start);
+		return NULL;
+	}
+
+
+	if (c != '}') {
+		if (c == '=') s--;
+		/* copy params */
+		start = s;
+		for (;;) {
+			c = *s++;
+			if (c == '}') break;
+			if (c == '\0') {
+				strgen_error("Missing } from command '%s'", start);
+				return NULL;
+			}
+			if (s - start == MAX_COMMAND_PARAM_SIZE) error("param command too long");
+			*param++ = c;
+		}
+	}
+	*param = '\0';
+
+	*str = s;
+
+	return cmd;
+}
+
+/**
+ * Prepare reading.
+ * @param data        The data to fill during reading.
+ * @param file        The file we are reading.
+ * @param master      Are we reading the master file?
+ * @param translation Are we reading a translation?
+ */
+StringReader::StringReader(StringData &data, const char *file, bool master, bool translation) :
+		data(data), file(strdup(file)), master(master), translation(translation)
+{
+}
+
+/** Make sure the right reader gets freed. */
+StringReader::~StringReader()
+{
+	free(file);
+}
+
+static void ExtractCommandString(ParsedCommandStruct *p, const char *s, bool warnings)
+{
+	char param[MAX_COMMAND_PARAM_SIZE];
+	int argno;
+	int argidx = 0;
+	int casei;
+
+	memset(p, 0, sizeof(*p));
+
+	for (;;) {
+		/* read until next command from a. */
+		const CmdStruct *ar = ParseCommandString(&s, param, &argno, &casei);
+
+		if (ar == NULL) break;
+
+		/* Sanity checking */
+		if (argno != -1 && ar->consumes == 0) strgen_fatal("Non consumer param can't have a paramindex");
+
+		if (ar->consumes) {
+			if (argno != -1) argidx = argno;
+			if (argidx < 0 || (uint)argidx >= lengthof(p->cmd)) strgen_fatal("invalid param idx %d", argidx);
+			if (p->cmd[argidx] != NULL && p->cmd[argidx] != ar) strgen_fatal("duplicate param idx %d", argidx);
+
+			p->cmd[argidx++] = ar;
+		} else if (!(ar->flags & C_DONTCOUNT)) { // Ignore some of them
+			if (p->np >= lengthof(p->pairs)) strgen_fatal("too many commands in string, max " PRINTF_SIZE, lengthof(p->pairs));
+			p->pairs[p->np].a = ar;
+			p->pairs[p->np].v = param[0] != '\0' ? strdup(param) : "";
+			p->np++;
+		}
+	}
+}
+
+
+static const CmdStruct *TranslateCmdForCompare(const CmdStruct *a)
+{
+	if (a == NULL) return NULL;
+
+	if (strcmp(a->cmd, "STRING1") == 0 ||
+			strcmp(a->cmd, "STRING2") == 0 ||
+			strcmp(a->cmd, "STRING3") == 0 ||
+			strcmp(a->cmd, "STRING4") == 0 ||
+			strcmp(a->cmd, "STRING5") == 0 ||
+			strcmp(a->cmd, "RAW_STRING") == 0) {
+		return FindCmd("STRING", 6);
+	}
+
+	return a;
+}
+
+
+static bool CheckCommandsMatch(char *a, char *b, const char *name)
+{
+	/* If we're not translating, i.e. we're compiling the base language,
+	 * it is pointless to do all these checks as it'll always be correct.
+	 * After all, all checks are based on the base language.
+	 */
+	if (!_translation) return true;
+
+	ParsedCommandStruct templ;
+	ParsedCommandStruct lang;
+	bool result = true;
+
+	ExtractCommandString(&templ, b, true);
+	ExtractCommandString(&lang, a, true);
+
+	/* For each string in templ, see if we find it in lang */
+	if (templ.np != lang.np) {
+		strgen_warning("%s: template string and language string have a different # of commands", name);
+		result = false;
+	}
+
+	for (uint i = 0; i < templ.np; i++) {
+		/* see if we find it in lang, and zero it out */
+		bool found = false;
+		for (uint j = 0; j < lang.np; j++) {
+			if (templ.pairs[i].a == lang.pairs[j].a &&
+					strcmp(templ.pairs[i].v, lang.pairs[j].v) == 0) {
+				/* it was found in both. zero it out from lang so we don't find it again */
+				lang.pairs[j].a = NULL;
+				found = true;
+				break;
+			}
+		}
+
+		if (!found) {
+			strgen_warning("%s: command '%s' exists in template file but not in language file", name, templ.pairs[i].a->cmd);
+			result = false;
+		}
+	}
+
+	/* if we reach here, all non consumer commands match up.
+	 * Check if the non consumer commands match up also. */
+	for (uint i = 0; i < lengthof(templ.cmd); i++) {
+		if (TranslateCmdForCompare(templ.cmd[i]) != lang.cmd[i]) {
+			strgen_warning("%s: Param idx #%d '%s' doesn't match with template command '%s'", name, i,
+				lang.cmd[i]  == NULL ? "<empty>" : TranslateCmdForCompare(lang.cmd[i])->cmd,
+				templ.cmd[i] == NULL ? "<empty>" : templ.cmd[i]->cmd);
+			result = false;
+		}
+	}
+
+	return result;
+}
+
+void StringReader::HandleString(char *str)
+{
+	if (*str == '#') {
+		if (str[1] == '#' && str[2] != '#') this->HandlePragma(str + 2);
+		return;
+	}
+
+	/* Ignore comments & blank lines */
+	if (*str == ';' || *str == ' ' || *str == '\0') return;
+
+	char *s = strchr(str, ':');
+	if (s == NULL) {
+		strgen_error("Line has no ':' delimiter");
+		return;
+	}
+
+	char *t;
+	/* Trim spaces.
+	 * After this str points to the command name, and s points to the command contents */
+	for (t = s; t > str && (t[-1] == ' ' || t[-1] == '\t'); t--) {}
+	*t = 0;
+	s++;
+
+	/* Check string is valid UTF-8 */
+	const char *tmp;
+	for (tmp = s; *tmp != '\0';) {
+		size_t len = Utf8Validate(tmp);
+		if (len == 0) strgen_fatal("Invalid UTF-8 sequence in '%s'", s);
+
+		WChar c;
+		Utf8Decode(&c, tmp);
+		if (c <= 0x001F || // ASCII control character range
+				(c >= 0xE000 && c <= 0xF8FF) || // Private range
+				(c >= 0xFFF0 && c <= 0xFFFF)) { // Specials range
+			strgen_fatal("Unwanted UTF-8 character U+%04X in sequence '%s'", c, s);
+		}
+
+		tmp += len;
+	}
+
+	/* Check if the string has a case..
+	 * The syntax for cases is IDENTNAME.case */
+	char *casep = strchr(str, '.');
+	if (casep != NULL) *casep++ = '\0';
+
+	/* Check if this string already exists.. */
+	LangString *ent = this->data.Find(str);
+
+	if (this->master) {
+		if (casep != NULL) {
+			strgen_error("Cases in the base translation are not supported.");
+			return;
+		}
+
+		if (ent != NULL) {
+			strgen_error("String name '%s' is used multiple times", str);
+			return;
+		}
+
+		if (this->data.strings[this->data.next_string_id] != NULL) {
+			strgen_error("String ID 0x%X for '%s' already in use by '%s'", this->data.next_string_id, str, this->data.strings[this->data.next_string_id]->name);
+			return;
+		}
+
+		/* Allocate a new LangString */
+		this->data.Add(str, new LangString(str, s, this->data.next_string_id++, _cur_line));
+	} else {
+		if (ent == NULL) {
+			strgen_warning("String name '%s' does not exist in master file", str);
+			return;
+		}
+
+		if (ent->translated && casep == NULL) {
+			strgen_error("String name '%s' is used multiple times", str);
+			return;
+		}
+
+		/* make sure that the commands match */
+		if (!CheckCommandsMatch(s, ent->english, str)) return;
+
+		if (casep != NULL) {
+			ent->translated_case = new Case(ResolveCaseName(casep, strlen(casep)), s, ent->translated_case);
+		} else {
+			ent->translated = strdup(s);
+			/* If the string was translated, use the line from the
+			 * translated language so errors in the translated file
+			 * are properly referenced to. */
+			ent->line = _cur_line;
+		}
+	}
+}
+
+
+static void rstrip(char *buf)
+{
+	int i = strlen(buf);
+	while (i > 0 && (buf[i - 1] == '\r' || buf[i - 1] == '\n' || buf[i - 1] == ' ')) i--;
+	buf[i] = '\0';
+}
+
+void StringReader::ParseFile()
+{
+	char buf[2048];
+	_warnings = _errors = 0;
+
+	_translation = this->master || this->translation;
+	_file = this->file;
+
+	/* For each new file we parse, reset the genders, and language codes. */
+	MemSetT(&_lang, 0);
+	strecpy(_lang.digit_group_separator, ",", lastof(_lang.digit_group_separator));
+	strecpy(_lang.digit_group_separator_currency, ",", lastof(_lang.digit_group_separator_currency));
+	strecpy(_lang.digit_decimal_separator, ".", lastof(_lang.digit_decimal_separator));
+
+	_cur_line = 1;
+	while (this->ReadLine(buf, sizeof(buf)) != NULL) {
+		rstrip(buf);
+		this->HandleString(buf);
+		_cur_line++;
+	}
+}
+
+/**
+ * Write the header information.
+ * @param data The data about the string.
+ */
+void HeaderWriter::WriteHeader(const StringData &data)
+{
+	int last = 0;
+	for (size_t i = 0; i < data.max_strings; i++) {
+		if (data.strings[i] != NULL) {
+			this->WriteStringID(data.strings[i]->name, i);
+			last = i;
+		}
+	}
+
+	this->WriteStringID("STR_LAST_STRINGID", last);
+}
+
+static int TranslateArgumentIdx(int argidx, int offset)
+{
+	int sum;
+
+	if (argidx < 0 || (uint)argidx >= lengthof(_cur_pcs.cmd)) {
+		strgen_fatal("invalid argidx %d", argidx);
+	}
+	const CmdStruct *cs = _cur_pcs.cmd[argidx];
+	if (cs != NULL && cs->consumes <= offset) {
+		strgen_fatal("invalid argidx offset %d:%d", argidx, offset);
+	}
+
+	if (_cur_pcs.cmd[argidx] == NULL) {
+		strgen_fatal("no command for this argidx %d", argidx);
+	}
+
+	for (int i = sum = 0; i < argidx; i++) {
+		const CmdStruct *cs = _cur_pcs.cmd[i];
+
+		sum += (cs != NULL) ? cs->consumes : 1;
+	}
+
+	return sum + offset;
+}
+
+static void PutArgidxCommand(Buffer *buffer)
+{
+	buffer->AppendUtf8(SCC_ARG_INDEX);
+	buffer->AppendByte(TranslateArgumentIdx(_cur_argidx));
+}
+
+
+static void PutCommandString(Buffer *buffer, const char *str)
+{
+	_cur_argidx = 0;
+
+	while (*str != '\0') {
+		/* Process characters as they are until we encounter a { */
+		if (*str != '{') {
+			buffer->AppendByte(*str++);
+			continue;
+		}
+
+		char param[MAX_COMMAND_PARAM_SIZE];
+		int argno;
+		int casei;
+		const CmdStruct *cs = ParseCommandString(&str, param, &argno, &casei);
+		if (cs == NULL) break;
+
+		if (casei != -1) {
+			buffer->AppendUtf8(SCC_SET_CASE); // {SET_CASE}
+			buffer->AppendByte(casei);
+		}
+
+		/* For params that consume values, we need to handle the argindex properly */
+		if (cs->consumes > 0) {
+			/* Check if we need to output a move-param command */
+			if (argno != -1 && argno != _cur_argidx) {
+				_cur_argidx = argno;
+				PutArgidxCommand(buffer);
+			}
+
+			/* Output the one from the master string... it's always accurate. */
+			cs = _cur_pcs.cmd[_cur_argidx++];
+			if (cs == NULL) {
+				strgen_fatal("%s: No argument exists at position %d", _cur_ident, _cur_argidx - 1);
+			}
+		}
+
+		cs->proc(buffer, param, cs->value);
+	}
+}
+
+/**
+ * Write the length as a simple gamma.
+ * @param length The number to write.
+ */
+void LanguageWriter::WriteLength(uint length)
+{
+	char buffer[2];
+	int offs = 0;
+	if (length >= 0x4000) {
+		strgen_fatal("string too long");
+	}
+
+	if (length >= 0xC0) {
+		buffer[offs++] = (length >> 8) | 0xC0;
+	}
+	buffer[offs++] = length & 0xFF;
+	this->Write((byte*)buffer, offs);
+}
+
+/**
+ * Actually write the language.
+ * @param data The data about the string.
+ */
+void LanguageWriter::WriteLang(const StringData &data)
+{
+	uint *in_use = AllocaM(uint, data.tabs);
+	for (size_t tab = 0; tab < data.tabs; tab++) {
+		uint n = data.CountInUse(tab);
+
+		in_use[tab] = n;
+		_lang.offsets[tab] = TO_LE16(n);
+
+		for (uint j = 0; j != in_use[tab]; j++) {
+			const LangString *ls = data.strings[(tab * StringData::STRINGS_IN_TAB) + j];
+			if (ls != NULL && ls->translated == NULL) _lang.missing++;
+		}
+	}
+
+	_lang.ident = TO_LE32(LanguagePackHeader::IDENT);
+	_lang.version = TO_LE32(data.Version());
+	_lang.missing = TO_LE16(_lang.missing);
+	_lang.winlangid = TO_LE16(_lang.winlangid);
+
+	this->WriteHeader(&_lang);
+	Buffer buffer;
+
+	for (size_t tab = 0; tab < data.tabs; tab++) {
+		for (uint j = 0; j != in_use[tab]; j++) {
+			const LangString *ls = data.strings[(tab * StringData::STRINGS_IN_TAB) + j];
+			const Case *casep;
+			const char *cmdp;
+
+			/* For undefined strings, just set that it's an empty string */
+			if (ls == NULL) {
+				this->WriteLength(0);
+				continue;
+			}
+
+			_cur_ident = ls->name;
+			_cur_line = ls->line;
+
+			/* Produce a message if a string doesn't have a translation. */
+			if (_show_todo > 0 && ls->translated == NULL) {
+				if ((_show_todo & 2) != 0) {
+					strgen_warning("'%s' is untranslated", ls->name);
+				}
+				if ((_show_todo & 1) != 0) {
+					const char *s = "<TODO> ";
+					while (*s != '\0') buffer.AppendByte(*s++);
+				}
+			}
+
+			/* Extract the strings and stuff from the english command string */
+			ExtractCommandString(&_cur_pcs, ls->english, false);
+
+			if (ls->translated_case != NULL || ls->translated != NULL) {
+				casep = ls->translated_case;
+				cmdp = ls->translated;
+			} else {
+				casep = NULL;
+				cmdp = ls->english;
+			}
+
+			_translated = cmdp != ls->english;
+
+			if (casep != NULL) {
+				const Case *c;
+				uint num;
+
+				/* Need to output a case-switch.
+				 * It has this format
+				 * <0x9E> <NUM CASES> <CASE1> <LEN1> <STRING1> <CASE2> <LEN2> <STRING2> <CASE3> <LEN3> <STRING3> <STRINGDEFAULT>
+				 * Each LEN is printed using 2 bytes in big endian order. */
+				buffer.AppendUtf8(SCC_SWITCH_CASE);
+				/* Count the number of cases */
+				for (num = 0, c = casep; c; c = c->next) num++;
+				buffer.AppendByte(num);
+
+				/* Write each case */
+				for (c = casep; c != NULL; c = c->next) {
+					buffer.AppendByte(c->caseidx);
+					/* Make some space for the 16-bit length */
+					size_t pos = buffer.Length();
+					buffer.AppendByte(0);
+					buffer.AppendByte(0);
+					/* Write string */
+					PutCommandString(&buffer, c->string);
+					buffer.AppendByte(0); // terminate with a zero
+					/* Fill in the length */
+					size_t size = buffer.Length() - (pos + 2);
+					buffer[pos + 0] = GB(size, 8, 8);
+					buffer[pos + 1] = GB(size, 0, 8);
+				}
+			}
+
+			if (cmdp != NULL) PutCommandString(&buffer, cmdp);
+
+			this->WriteLength(buffer.Length());
+			this->Write(buffer.Begin(), buffer.Length());
+			buffer.Clear();
+		}
+	}
+}
author	rubidium <rubidium@openttd.org>	2011-12-17 18:34:03 +0000
committer	rubidium <rubidium@openttd.org>	2011-12-17 18:34:03 +0000
commit	1f083c3ac8961158cf7788a625e1786960d590d4 (patch)
tree	f7c33084901af698c52c51c72c9009d5f5e5711e /src/strgen
parent	c97b2a5224e350380e5f31b0612bf057d1a689ab (diff)
download	openttd-1f083c3ac8961158cf7788a625e1786960d590d4.tar.xz