Initial commit

2026-02-02 04:50:13 +01:00
commit 5b11698731
22592 changed files with 7677434 additions and 0 deletions
--- a/devtools/create_translations/po_parser.cpp
+++ b/devtools/create_translations/po_parser.cpp
@@ -0,0 +1,404 @@
+/* ScummVM - Graphic Adventure Engine
+ *
+ * ScummVM is the legal property of its developers, whose names
+ * are too numerous to list here. Please refer to the COPYRIGHT
+ * file distributed with this source distribution.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * This is a utility for create the translations.dat file from all the po files.
+ * The generated files is used by ScummVM to propose translation of its GUI.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "po_parser.h"
+
+PoMessageList::PoMessageList() : _messages(nullptr), _size(0), _allocated(0) {
+}
+
+PoMessageList::~PoMessageList() {
+	for (int i = 0; i < _size; ++i)
+		delete[] _messages[i];
+	delete[] _messages;
+}
+
+void PoMessageList::insert(const char *msg) {
+	if (msg == nullptr || *msg == '\0')
+		return;
+
+	// binary-search for the insertion index
+	int leftIndex = 0;
+	int rightIndex = _size - 1;
+	while (rightIndex >= leftIndex) {
+		int midIndex = (leftIndex + rightIndex) / 2;
+		int compareResult = strcmp(msg, _messages[midIndex]);
+		if (compareResult == 0)
+			return;
+		else if (compareResult < 0)
+			rightIndex = midIndex - 1;
+		else
+			leftIndex = midIndex + 1;
+	}
+	// We now have rightIndex = leftIndex - 1 and we need to insert the new message
+	// between the two (i.a. at leftIndex).
+	if (_size + 1 > _allocated) {
+		_allocated += 100;
+		char **newMessages = new char*[_allocated];
+		for (int i = 0; i < leftIndex; ++i)
+			newMessages[i] = _messages[i];
+		for (int i = leftIndex; i < _size; ++i)
+			newMessages[i + 1] = _messages[i];
+		delete[] _messages;
+		_messages = newMessages;
+	} else {
+		for (int i = _size - 1; i >= leftIndex; --i)
+			_messages[i + 1] = _messages[i];
+	}
+	_messages[leftIndex] = new char[1 + strlen(msg)];
+	strcpy(_messages[leftIndex], msg);
+	++_size;
+}
+
+int PoMessageList::findIndex(const char *msg) {
+	if (msg == nullptr || *msg == '\0')
+		return -1;
+
+	// binary-search for the message
+	int leftIndex = 0;
+	int rightIndex = _size - 1;
+
+	while (rightIndex >= leftIndex) {
+		const int midIndex = (leftIndex + rightIndex) / 2;
+		const int compareResult = strcmp(msg, _messages[midIndex]);
+		if (compareResult == 0)
+			return midIndex;
+		else if (compareResult < 0)
+			rightIndex = midIndex - 1;
+		else
+			leftIndex = midIndex + 1;
+	}
+
+	return -1;
+}
+
+int PoMessageList::size() const {
+	return _size;
+}
+
+const char *PoMessageList::operator[](int index) const {
+	if (index < 0 || index >= _size)
+		return nullptr;
+	return _messages[index];
+}
+
+PoMessageEntryList::PoMessageEntryList(const char *lang) :
+	_lang(nullptr), _langName(nullptr), _langNameAlt(nullptr), _useUTF8(true),
+	_list(nullptr), _size(0), _allocated(0)
+{
+	_lang = new char[1 + strlen(lang)];
+	strcpy(_lang, lang);
+	// Set default langName to lang
+	_langNameAlt = new char[1 + strlen(lang)];
+	strcpy(_langNameAlt, lang);
+}
+
+PoMessageEntryList::~PoMessageEntryList() {
+	delete[] _lang;
+	delete[] _langName;
+	delete[] _langNameAlt;
+	for (int i = 0; i < _size; ++i)
+		delete _list[i];
+	delete[] _list;
+}
+
+void PoMessageEntryList::addMessageEntry(const char *translation, const char *message, const char *context) {
+	if (*message == '\0') {
+		// This is the header.
+		// We get the charset and the language name from the translation string
+		char *str = parseLine(translation, "X-Language-name:");
+		if (str != nullptr) {
+			delete[] _langName;
+			_langName = str;
+		}
+		str = parseLine(translation, "Language:");
+		if (str != nullptr) {
+			delete[] _langNameAlt;
+			_langNameAlt = str;
+		}
+		str = parseLine(translation, "charset=");
+		if (strcmp(str, "utf-8") != 0 && strcmp(str, "UTF-8") != 0) {
+			_useUTF8 = false;
+		}
+		delete[] str;
+		return;
+	}
+
+	// binary-search for the insertion index
+	int leftIndex = 0;
+	int rightIndex = _size - 1;
+	while (rightIndex >= leftIndex) {
+		int midIndex = (leftIndex + rightIndex) / 2;
+		int compareResult = strcmp(message, _list[midIndex]->msgid);
+		if (compareResult == 0) {
+			if (context == nullptr) {
+				if (_list[midIndex]->msgctxt == nullptr)
+					return;
+				compareResult = -1;
+			} else {
+				if (_list[midIndex]->msgctxt == nullptr)
+					compareResult = 1;
+				else {
+					compareResult = strcmp(context, _list[midIndex]->msgctxt);
+					if (compareResult == 0)
+						return;
+				}
+			}
+		}
+		if (compareResult < 0)
+			rightIndex = midIndex - 1;
+		else
+			leftIndex = midIndex + 1;
+	}
+	// We now have rightIndex = leftIndex - 1 and we need to insert the new message
+	// between the two (i.a. at leftIndex).
+	// However since the TranslationManager will pick the translation associated to no
+	// context if it is not present for a specific context, we can optimize the file
+	// size, memory used at run-time and performances (less strings to read from the file
+	// and less strings to look for) by avoiding duplicate.
+	if (context != nullptr && *context != '\0') {
+		// Check if we have the same translation for no context
+		int contextIndex = leftIndex - 1;
+		while (contextIndex >= 0 && strcmp (message, _list[contextIndex]->msgid) == 0) {
+			--contextIndex;
+		}
+		++contextIndex;
+		if (contextIndex < leftIndex && _list[contextIndex]->msgctxt == nullptr && strcmp(translation, _list[contextIndex]->msgstr) == 0)
+			return;
+	}
+
+
+	if (_size + 1 > _allocated) {
+		_allocated += 100;
+		PoMessageEntry **newList = new PoMessageEntry*[_allocated];
+		for (int i = 0; i < leftIndex; ++i)
+			newList[i] = _list[i];
+		for (int i = leftIndex; i < _size; ++i)
+			newList[i + 1] = _list[i];
+		delete[] _list;
+		_list = newList;
+	} else {
+		for (int i = _size - 1; i >= leftIndex; --i)
+			_list[i + 1] = _list[i];
+	}
+	_list[leftIndex] = new PoMessageEntry(translation, message, context);
+	++_size;
+
+	if (context == nullptr || *context == '\0') {
+		// Remove identical translations for a specific context (see comment above)
+		int contextIndex = leftIndex + 1;
+		int removed = 0;
+		while (contextIndex < _size && strcmp(message, _list[contextIndex]->msgid) == 0) {
+			if (strcmp(translation, _list[contextIndex]->msgstr) == 0) {
+				delete _list[contextIndex];
+				++removed;
+			} else {
+				_list[contextIndex - removed] = _list[contextIndex];
+			}
+			++contextIndex;
+		}
+		if (removed > 0) {
+			while (contextIndex < _size) {
+				_list[contextIndex - removed] = _list[contextIndex];
+				++contextIndex;
+			}
+		}
+		_size -= removed;
+	}
+
+}
+
+const char *PoMessageEntryList::language() const {
+	return _lang;
+}
+
+const char *PoMessageEntryList::languageName() const {
+	return _langName ? _langName : _langNameAlt;
+}
+
+bool PoMessageEntryList::useUTF8() const {
+	return _useUTF8;
+}
+
+int PoMessageEntryList::size() const {
+	return _size;
+}
+
+const PoMessageEntry *PoMessageEntryList::entry(int index) const {
+	if (index < 0 || index >= _size)
+		return nullptr;
+	return _list[index];
+}
+
+
+PoMessageEntryList *parsePoFile(const char *file, PoMessageList& messages) {
+	FILE *inFile = fopen(file, "r");
+	if (!inFile)
+		return nullptr;
+
+	char msgidBuf[20480], msgctxtBuf[20480], msgstrBuf[20480];
+	char line[20480], *currentBuf = msgstrBuf;
+
+	// Get language from file name and create PoMessageEntryList
+	int index = 0, start_index = strlen(file) - 1;
+	while (start_index > 0 && file[start_index - 1] != '/' && file[start_index - 1] != '\\') {
+		--start_index;
+	}
+	while (file[start_index + index] != '.' && file[start_index + index] != '\0') {
+		msgidBuf[index] = file[start_index + index];
+		++index;
+	}
+	msgidBuf[index] = '\0';
+	PoMessageEntryList *list = new PoMessageEntryList(msgidBuf);
+
+	// Initialize the message attributes.
+	bool fuzzy = false;
+	bool fuzzy_next = false;
+
+	// Parse the file line by line.
+	// The msgstr is always the last line of an entry (i.e. msgid and msgctxt always
+	// precede the corresponding msgstr).
+	msgidBuf[0] = msgstrBuf[0] = msgctxtBuf[0] = '\0';
+	while (!feof(inFile) && fgets(line, 1024, inFile)) {
+		if (line[0] == '#' && line[1] == ',') {
+			// Handle message attributes.
+			if (strstr(line, "fuzzy")) {
+				fuzzy_next = true;
+				continue;
+			}
+		}
+		// Skip empty and comment line
+		if (*line == '\n' || *line == '#')
+			continue;
+		if (strncmp(line, "msgid", 5) == 0) {
+			if (currentBuf == msgstrBuf) {
+				// add previous entry
+				if (*msgstrBuf != '\0' && !fuzzy) {
+					messages.insert(msgidBuf);
+					list->addMessageEntry(msgstrBuf, msgidBuf, msgctxtBuf);
+				}
+				msgidBuf[0] = msgstrBuf[0] = msgctxtBuf[0] = '\0';
+
+				// Reset the attribute flags.
+				fuzzy = fuzzy_next;
+				fuzzy_next = false;
+			}
+			strcpy(msgidBuf, stripLine(line));
+			currentBuf = msgidBuf;
+		} else if (strncmp(line, "msgctxt", 7) == 0) {
+			if (currentBuf == msgstrBuf) {
+				// add previous entry
+				if (*msgstrBuf != '\0' && !fuzzy) {
+					messages.insert(msgidBuf);
+					list->addMessageEntry(msgstrBuf, msgidBuf, msgctxtBuf);
+				}
+				msgidBuf[0] = msgstrBuf[0] = msgctxtBuf[0] = '\0';
+
+				// Reset the attribute flags
+				fuzzy = fuzzy_next;
+				fuzzy_next = false;
+			}
+			strcpy(msgctxtBuf, stripLine(line));
+			currentBuf = msgctxtBuf;
+		} else if (strncmp(line, "msgstr", 6) == 0) {
+			strcpy(msgstrBuf, stripLine(line));
+			currentBuf = msgstrBuf;
+		} else {
+			// concatenate the string at the end of the current buffer
+			if (currentBuf)
+				strcat(currentBuf, stripLine(line));
+		}
+	}
+	if (currentBuf == msgstrBuf) {
+		// add last entry
+		if (*msgstrBuf != '\0' && !fuzzy) {
+			messages.insert(msgidBuf);
+			list->addMessageEntry(msgstrBuf, msgidBuf, msgctxtBuf);
+		}
+	}
+
+	fclose(inFile);
+	return list;
+}
+
+char *stripLine(char *const line) {
+	// This function modifies line in place and return it.
+	// Keep only the text between the first two unprotected quotes.
+	// It also look for literal special characters (e.g. preceded by '\n', '\\', '\"', '\'', '\t')
+	// and replace them by the special character so that strcmp() can match them at run time.
+	// Look for the first quote
+	char const *src = line;
+	while (*src != '\0' && *src++ != '"') {}
+	// shift characters until we reach the end of the string or an unprotected quote
+	char *dst = line;
+	while (*src != '\0' && *src != '"') {
+		char c = *src++;
+		if (c == '\\') {
+			switch (c = *src++) {
+			case  'n': c = '\n'; break;
+			case  't': c = '\t'; break;
+			case '\"': c = '\"'; break;
+			case '\'': c = '\''; break;
+			case '\\': c = '\\'; break;
+			default:
+				// Just skip
+				fprintf(stderr, "Unsupported special character \"\\%c\" in string. Please contact ScummVM developers.\n", c);
+				continue;
+			}
+		}
+		*dst++ = c;
+	}
+	*dst = '\0';
+	return line;
+}
+
+char *parseLine(const char *line, const char *field) {
+	// This function allocate and return a new char*.
+	// It will return a NULL pointer if the field is not found.
+	// It is used to parse the header of the po files to find the language name
+	// and the charset.
+	const char *str = strstr(line, field);
+	if (str == nullptr)
+		return nullptr;
+	str += strlen(field);
+	// Skip spaces
+	while (*str != '\0' && isspace(*str)) {
+		++str;
+	}
+	// Find string length (stop at the first '\n')
+	int len = 0;
+	while (str[len] != '\0' && str[len] != '\n') {
+		++len;
+	}
+	if (len == 0)
+		return nullptr;
+	// Create result string
+	char *result = new char[len + 1];
+	strncpy(result, str, len);
+	result[len] = '\0';
+	return result;
+}