Initial commit

2026-02-02 04:50:13 +01:00
commit 5b11698731
22592 changed files with 7677434 additions and 0 deletions
--- a/engines/sci/parser/grammar.cpp
+++ b/engines/sci/parser/grammar.cpp
@@ -0,0 +1,638 @@
+/* ScummVM - Graphic Adventure Engine
+ *
+ * ScummVM is the legal property of its developers, whose names
+ * are too numerous to list here. Please refer to the COPYRIGHT
+ * file distributed with this source distribution.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+/* Functionality to transform the context-free SCI grammar rules into
+ * strict Greibach normal form (strict GNF), and to test SCI input against
+ * that grammar, writing an appropriate node tree if successful.
+ */
+
+#include "sci/parser/vocabulary.h"
+#include "sci/console.h"
+#include "common/array.h"
+#include "common/textconsole.h"
+
+namespace Sci {
+
+#define TOKEN_OPAREN 0xff000000
+#define TOKEN_CPAREN 0xfe000000
+#define TOKEN_TERMINAL_CLASS 0x10000
+#define TOKEN_TERMINAL_GROUP 0x20000
+#define TOKEN_STUFFING_LEAF 0x40000
+#define TOKEN_STUFFING_WORD 0x80000
+#define TOKEN_NON_NT (TOKEN_OPAREN | TOKEN_TERMINAL_CLASS | TOKEN_TERMINAL_GROUP | TOKEN_STUFFING_LEAF | TOKEN_STUFFING_WORD)
+#define TOKEN_TERMINAL (TOKEN_TERMINAL_CLASS | TOKEN_TERMINAL_GROUP)
+
+static int _allocd_rules = 0;	// FIXME: Avoid non-const global vars
+
+struct ParseRule {
+	int _id; /**< non-terminal ID */
+	uint _firstSpecial; /**< first terminal or non-terminal */
+	uint _numSpecials; /**< number of terminals and non-terminals */
+	Common::Array<int> _data;	/**< actual data */
+
+	~ParseRule() {
+		assert(_allocd_rules > 0);
+		--_allocd_rules;
+	}
+
+	// FIXME remove this one again?
+	bool operator==(const ParseRule &other) const {
+		return _id == other._id &&
+			_firstSpecial == other._firstSpecial &&
+			_numSpecials == other._numSpecials &&
+			_data == other._data;
+	}
+};
+
+
+struct ParseRuleList {
+	int terminal; /**< Terminal character this rule matches against or 0 for a non-terminal rule */
+	ParseRule *rule;
+	ParseRuleList *next;
+
+	void print() const;
+
+	ParseRuleList(ParseRule *r) : rule(r), next(nullptr) {
+		int term = rule->_data[rule->_firstSpecial];
+		terminal = ((term & TOKEN_TERMINAL) ? term : 0);
+	}
+
+	~ParseRuleList() {
+		delete rule;
+		delete next;
+	}
+};
+
+
+static void vocab_print_rule(ParseRule *rule) {
+	int wspace = 0;
+
+	if (!rule) {
+		warning("NULL rule");
+		return;
+	}
+
+	debugN("[%03x] -> ", rule->_id);
+
+	if (rule->_data.empty())
+		debugN("e");
+
+	for (uint i = 0; i < rule->_data.size(); i++) {
+		uint token = rule->_data[i];
+
+		if (token == TOKEN_OPAREN) {
+			if (i == rule->_firstSpecial)
+				debugN("_");
+
+			debugN("(");
+			wspace = 0;
+		} else if (token == TOKEN_CPAREN) {
+			if (i == rule->_firstSpecial)
+				debugN("_");
+
+			debugN(")");
+			wspace = 0;
+		} else {
+			if (wspace)
+				debugN(" ");
+
+			if (i == rule->_firstSpecial)
+				debugN("_");
+			if (token & TOKEN_TERMINAL_CLASS)
+				debugN("C(%04x)", token & 0xffff);
+			else if (token & TOKEN_TERMINAL_GROUP)
+				debugN("G(%04x)", token & 0xffff);
+			else if (token & TOKEN_STUFFING_LEAF)
+				debugN("%03x", token & 0xffff);
+			else if (token & TOKEN_STUFFING_WORD)
+				debugN("{%03x}", token & 0xffff);
+			else
+				debugN("[%03x]", token); /* non-terminal */
+			wspace = 1;
+		}
+
+		if (i == rule->_firstSpecial)
+			debugN("_");
+	}
+	debugN(" [%d specials]", rule->_numSpecials);
+}
+
+static ParseRule *_vdup(ParseRule *a) {
+	++_allocd_rules;
+	return new ParseRule(*a);
+}
+
+static ParseRule *_vinsert(ParseRule *turkey, ParseRule *stuffing) {
+	uint firstnt = turkey->_firstSpecial;
+
+	// Search for first TOKEN_NON_NT in 'turkey'
+	while ((firstnt < turkey->_data.size()) && (turkey->_data[firstnt] & TOKEN_NON_NT))
+		firstnt++;
+
+	// If no TOKEN_NON_NT found, or if it doesn't match the id of 'stuffing', abort.
+	if ((firstnt == turkey->_data.size()) || (turkey->_data[firstnt] != stuffing->_id))
+		return nullptr;
+
+	// Create a new rule as a copy of 'turkey', where the token firstnt has been substituted
+	// by the rule 'stuffing'.
+	++_allocd_rules;
+
+	ParseRule *rule = new ParseRule(*turkey);
+	rule->_numSpecials += stuffing->_numSpecials - 1;
+	rule->_firstSpecial = firstnt + stuffing->_firstSpecial;
+	rule->_data.resize(turkey->_data.size() - 1 + stuffing->_data.size());
+
+	// Replace rule->_data[firstnt] by all of stuffing->_data
+	Common::copy(stuffing->_data.begin(), stuffing->_data.end(), rule->_data.begin() + firstnt);
+
+	if (firstnt < turkey->_data.size() - 1)
+		Common::copy(turkey->_data.begin() + firstnt + 1, turkey->_data.end(),
+				rule->_data.begin() + firstnt + stuffing->_data.size());
+
+	return rule;
+}
+
+static ParseRule *_vbuild_rule(const parse_tree_branch_t *branch) {
+	int tokens = 0, tokenpos = 0, i;
+
+	while (tokenpos < 10 && branch->data[tokenpos]) {
+		int type = branch->data[tokenpos];
+		tokenpos += 2;
+
+		if ((type == VOCAB_TREE_NODE_COMPARE_TYPE) || (type == VOCAB_TREE_NODE_COMPARE_GROUP) || (type == VOCAB_TREE_NODE_FORCE_STORAGE))
+			++tokens;
+		else if (type > VOCAB_TREE_NODE_LAST_WORD_STORAGE)
+			tokens += 5;
+		else
+			return nullptr; // invalid
+	}
+
+	ParseRule *rule = new ParseRule();
+
+	++_allocd_rules;
+	rule->_id = branch->id;
+	rule->_numSpecials = tokenpos >> 1;
+	rule->_data.resize(tokens);
+	rule->_firstSpecial = 0;
+
+	tokens = 0;
+	for (i = 0; i < tokenpos; i += 2) {
+		int type = branch->data[i];
+		int value = branch->data[i + 1];
+
+		if (type == VOCAB_TREE_NODE_COMPARE_TYPE)
+			rule->_data[tokens++] = value | TOKEN_TERMINAL_CLASS;
+		else if (type == VOCAB_TREE_NODE_COMPARE_GROUP)
+			rule->_data[tokens++] = value | TOKEN_TERMINAL_GROUP;
+		else if (type == VOCAB_TREE_NODE_FORCE_STORAGE)
+			rule->_data[tokens++] = value | TOKEN_STUFFING_WORD;
+		else { // normal inductive rule
+			rule->_data[tokens++] = TOKEN_OPAREN;
+			rule->_data[tokens++] = type | TOKEN_STUFFING_LEAF;
+			rule->_data[tokens++] = value | TOKEN_STUFFING_LEAF;
+
+			if (i == 0)
+				rule->_firstSpecial = tokens;
+
+			rule->_data[tokens++] = value; // The non-terminal
+			rule->_data[tokens++] = TOKEN_CPAREN;
+		}
+	}
+
+	return rule;
+}
+
+static ParseRule *_vsatisfy_rule(ParseRule *rule, const ResultWordList &input) {
+	int dep;
+
+	if (!rule->_numSpecials)
+		return nullptr;
+
+	dep = rule->_data[rule->_firstSpecial];
+
+	int count = 0;
+	int match = 0;
+	ResultWordList::const_iterator iter;
+	// TODO: Inserting an array in the middle of another array is slow
+	Common::Array<int> matches;
+	matches.reserve(input.size());
+
+	// We store the first match in 'match', and any subsequent matches in
+	// 'matches'. 'match' replaces the special in the rule, and 'matches' gets
+	// inserted after it.
+	for (iter = input.begin(); iter != input.end(); ++iter)
+		if (((dep & TOKEN_TERMINAL_CLASS) && ((dep & 0xffff) & iter->_class)) ||
+			((dep & TOKEN_TERMINAL_GROUP) && ((dep & 0xffff) & iter->_group))) {
+			if (count == 0)
+				match = TOKEN_STUFFING_WORD | iter->_group;
+			else
+				matches.push_back(TOKEN_STUFFING_WORD | iter->_group);
+			count++;
+		}
+
+	if (count) {
+		ParseRule *retval = new ParseRule(*rule);
+		++_allocd_rules;
+		retval->_data[rule->_firstSpecial] = match;
+		if (count > 1)
+			retval->_data.insert_at(rule->_firstSpecial+1, matches);
+		retval->_numSpecials--;
+		retval->_firstSpecial = 0;
+
+		if (retval->_numSpecials) { // find first special, if it exists
+			for (uint i = rule->_firstSpecial; i < retval->_data.size(); ++i) {
+				int tmp = retval->_data[i];
+				if (!(tmp & TOKEN_NON_NT) || (tmp & TOKEN_TERMINAL)) {
+					retval->_firstSpecial = i;
+					break;
+				}
+			}
+		}
+
+		return retval;
+	} else
+		return nullptr;
+}
+
+void Vocabulary::freeRuleList(ParseRuleList *list) {
+	delete list;
+}
+
+static ParseRuleList *_vocab_add_rule(ParseRuleList *list, ParseRule *rule) {
+	if (!rule)
+		return list;
+	if (!rule->_data.size()) {
+		// Special case for qfg2 demo
+		warning("no rule contents on _vocab_add_rule()");
+		return list;
+	}
+
+	ParseRuleList *new_elem = new ParseRuleList(rule);
+
+	if (list) {
+		const int term = new_elem->terminal;
+/*		if (term < list->terminal) {
+			new_elem->next = list;
+			return new_elem;
+		} else {*/
+		ParseRuleList *seeker = list;
+
+		while (seeker->next/* && seeker->next->terminal <= term*/) {
+			if (seeker->next->terminal == term) {
+				if (*(seeker->next->rule) == *rule) {
+					delete new_elem; // NB: This also deletes 'rule'
+
+					return list; // No duplicate rules
+				}
+			}
+			seeker = seeker->next;
+		}
+
+		new_elem->next = seeker->next;
+		seeker->next = new_elem;
+		return list;
+	} else {
+		return new_elem;
+	}
+}
+
+void ParseRuleList::print() const {
+	const ParseRuleList *list = this;
+	int pos = 0;
+	while (list) {
+		debugN("R%03d: ", pos);
+		vocab_print_rule(list->rule);
+		debugN("\n");
+		list = list->next;
+		pos++;
+	}
+	debugN("%d rules total.\n", pos);
+}
+
+static ParseRuleList *_vocab_split_rule_list(ParseRuleList *list) {
+	assert(list);
+	if (!list->next || (list->next->terminal)) {
+		ParseRuleList *tmp = list->next;
+		list->next = nullptr;
+		return tmp;
+	} else
+		return _vocab_split_rule_list(list->next);
+}
+
+static void _vocab_free_empty_rule_list(ParseRuleList *list) {
+	assert(list);
+	if (list->next)
+		_vocab_free_empty_rule_list(list->next);
+	list->next = nullptr;
+	list->rule = nullptr;
+	delete list;
+}
+
+static ParseRuleList *_vocab_merge_rule_lists(ParseRuleList *l1, ParseRuleList *l2) {
+	ParseRuleList *retval = l1, *seeker = l2;
+	while (seeker) {
+		retval = _vocab_add_rule(retval, seeker->rule);
+		seeker = seeker->next;
+	}
+	_vocab_free_empty_rule_list(l2);
+
+	return retval;
+}
+
+static int _vocab_rule_list_length(ParseRuleList *list) {
+	return ((list) ? _vocab_rule_list_length(list->next) + 1 : 0);
+}
+
+static ParseRuleList *_vocab_clone_rule_list_by_id(ParseRuleList *list, int id) {
+	ParseRuleList *result = nullptr;
+	ParseRuleList *seeker = list;
+
+	while (seeker) {
+		if (seeker->rule->_id == id) {
+			result = _vocab_add_rule(result, _vdup(seeker->rule));
+		}
+		seeker = seeker->next;
+	}
+
+	return result;
+}
+
+ParseRuleList *Vocabulary::buildGNF(bool verbose) {
+	int iterations = 0;
+	int termrules = 0;
+	int ntrules_nr;
+	ParseRuleList *ntlist = nullptr;
+	ParseRuleList *tlist, *new_tlist;
+	Console *con = g_sci->getSciDebugger();
+
+	for (uint i = 1; i < _parserBranches.size(); i++) { // branch rule 0 is treated specially
+		ParseRule *rule = _vbuild_rule(&_parserBranches[i]);
+		if (!rule) {
+			freeRuleList(ntlist);
+			return nullptr;
+		}
+		ntlist = _vocab_add_rule(ntlist, rule);
+	}
+
+	tlist = _vocab_split_rule_list(ntlist);
+	ntrules_nr = _vocab_rule_list_length(ntlist);
+
+	if (verbose)
+		con->debugPrintf("Starting with %d rules\n", ntrules_nr);
+
+	new_tlist = tlist;
+	tlist = nullptr;
+
+	do {
+		ParseRuleList *new_new_tlist = nullptr;
+		ParseRuleList *ntseeker, *tseeker;
+
+		ntseeker = ntlist;
+		while (ntseeker) {
+			tseeker = new_tlist;
+
+			while (tseeker) {
+				ParseRule *newrule = _vinsert(ntseeker->rule, tseeker->rule);
+				if (newrule)
+					new_new_tlist = _vocab_add_rule(new_new_tlist, newrule);
+				tseeker = tseeker->next;
+			}
+
+			ntseeker = ntseeker->next;
+		}
+
+		tlist = _vocab_merge_rule_lists(tlist, new_tlist);
+		new_tlist = new_new_tlist;
+		termrules = _vocab_rule_list_length(new_new_tlist);
+
+		if (verbose)
+			con->debugPrintf("After iteration #%d: %d new term rules\n", ++iterations, termrules);
+
+	} while (termrules && (iterations < 30));
+
+	freeRuleList(ntlist);
+
+	if (verbose) {
+		con->debugPrintf("\nGNF rules:\n");
+		tlist->print();
+		con->debugPrintf("%d allocd rules\n", _allocd_rules);
+		con->debugPrintf("Freeing rule list...\n");
+		freeRuleList(tlist);
+		return nullptr;
+	}
+
+	return tlist;
+}
+
+static int _vbpt_pareno(ParseTreeNode *nodes, int *pos, int base) {
+	// Opens parentheses
+	nodes[base].left = &nodes[(*pos) + 1];
+	nodes[++(*pos)].type = kParseTreeBranchNode;
+	nodes[*pos].left = nullptr;
+	nodes[*pos].right = nullptr;
+	return *pos;
+}
+
+static int _vbpt_parenc(ParseTreeNode *nodes, int *pos, int paren) {
+	// Closes parentheses for appending
+	nodes[paren].right = &nodes[++(*pos)];
+	nodes[*pos].type = kParseTreeBranchNode;
+	nodes[*pos].left = nullptr;
+	nodes[*pos].right = nullptr;
+	return *pos;
+}
+
+static int _vbpt_append(ParseTreeNode *nodes, int *pos, int base, int value) {
+	// writes one value to an existing base node and creates a successor node for writing
+	nodes[base].left = &nodes[++(*pos)];
+	nodes[*pos].type = kParseTreeLeafNode;
+	nodes[*pos].value = value;
+	nodes[*pos].right = nullptr;
+	nodes[base].right = &nodes[++(*pos)];
+	nodes[*pos].type = kParseTreeBranchNode;
+	nodes[*pos].left = nullptr;
+	nodes[*pos].right = nullptr;
+	return *pos;
+}
+
+static int _vbpt_terminate(ParseTreeNode *nodes, int *pos, int base, int value) {
+	// Terminates, overwriting a nextwrite forknode
+	nodes[base].type = kParseTreeLeafNode;
+	nodes[base].value = value;
+	nodes[base].right = nullptr;
+	return *pos;
+}
+static int _vbpt_append_word(ParseTreeNode *nodes, int *pos, int base, int value) {
+	// writes one value to an existing node and creates a sibling for writing
+	nodes[base].type = kParseTreeWordNode;
+	nodes[base].value = value;
+	nodes[base].right = &nodes[++(*pos)];
+	nodes[*pos].type = kParseTreeBranchNode;
+	nodes[*pos].left = nullptr;
+	nodes[*pos].right = nullptr;
+	return *pos;
+}
+
+static int _vbpt_terminate_word(ParseTreeNode *nodes, int *pos, int base, int value) {
+	// Terminates, overwriting a nextwrite forknode
+	nodes[base].type = kParseTreeWordNode;
+	nodes[base].value = value;
+	nodes[base].right = nullptr;
+	return *pos;
+}
+
+
+static int _vbpt_write_subexpression(ParseTreeNode *nodes, int *pos, ParseRule *rule, uint rulepos, int writepos) {
+	uint token;
+
+	while ((token = ((rulepos < rule->_data.size()) ? rule->_data[rulepos++] : TOKEN_CPAREN)) != TOKEN_CPAREN) {
+		uint nexttoken = (rulepos < rule->_data.size()) ? rule->_data[rulepos] : TOKEN_CPAREN;
+		if (token == TOKEN_OPAREN) {
+			int writepos2 = _vbpt_pareno(nodes, pos, writepos);
+			rulepos = _vbpt_write_subexpression(nodes, pos, rule, rulepos, writepos2);
+			nexttoken = (rulepos < rule->_data.size()) ? rule->_data[rulepos] : TOKEN_CPAREN;
+			if (nexttoken != TOKEN_CPAREN)
+				writepos = _vbpt_parenc(nodes, pos, writepos);
+		} else if (token & TOKEN_STUFFING_LEAF) {
+			if (nexttoken == TOKEN_CPAREN)
+				writepos = _vbpt_terminate(nodes, pos, writepos, token & 0xffff);
+			else
+				writepos = _vbpt_append(nodes, pos, writepos, token & 0xffff);
+		} else if (token & TOKEN_STUFFING_WORD) {
+			if (nexttoken == TOKEN_CPAREN)
+				writepos = _vbpt_terminate_word(nodes, pos, writepos, token & 0xffff);
+			else
+				writepos = _vbpt_append_word(nodes, pos, writepos, token & 0xffff);
+		} else {
+			warning("\nError in parser (grammar.cpp, _vbpt_write_subexpression()): Rule data broken in rule ");
+			vocab_print_rule(rule);
+			debugN(", at token position %d\n", *pos);
+			return rulepos;
+		}
+	}
+
+	return rulepos;
+}
+
+int Vocabulary::parseGNF(const ResultWordListList &words, bool verbose) {
+	Console *con = g_sci->getSciDebugger();
+	// Get the start rules:
+	ParseRuleList *work = _vocab_clone_rule_list_by_id(_parserRules, _parserBranches[0].data[1]);
+	ParseRuleList *results = nullptr;
+	uint word = 0;
+	const uint words_nr = words.size();
+	ResultWordListList::const_iterator words_iter;
+
+	for (words_iter = words.begin(); words_iter != words.end(); ++words_iter, ++word) {
+		ParseRuleList *new_work = nullptr;
+		ParseRuleList *reduced_rules = nullptr;
+		ParseRuleList *seeker, *subseeker;
+
+		if (verbose)
+			con->debugPrintf("Adding word %d...\n", word);
+
+		seeker = work;
+		while (seeker) {
+			if (seeker->rule->_numSpecials <= (words_nr - word)) {
+				reduced_rules = _vocab_add_rule(reduced_rules, _vsatisfy_rule(seeker->rule, *words_iter));
+			}
+
+			seeker = seeker->next;
+		}
+
+		if (reduced_rules == nullptr) {
+			freeRuleList(work);
+			if (verbose)
+				con->debugPrintf("No results.\n");
+			return 1;
+		}
+
+		freeRuleList(work);
+
+		if (word + 1 < words_nr) {
+			seeker = reduced_rules;
+
+			while (seeker) {
+				if (seeker->rule->_numSpecials) {
+					int my_id = seeker->rule->_data[seeker->rule->_firstSpecial];
+
+					subseeker = _parserRules;
+					while (subseeker) {
+						if (subseeker->rule->_id == my_id)
+							new_work = _vocab_add_rule(new_work, _vinsert(seeker->rule, subseeker->rule));
+
+						subseeker = subseeker->next;
+					}
+				}
+
+				seeker = seeker->next;
+			}
+			freeRuleList(reduced_rules);
+		} else // last word
+			new_work = reduced_rules;
+
+		work = new_work;
+		if (verbose)
+			con->debugPrintf("Now at %d candidates\n", _vocab_rule_list_length(work));
+		if (work == nullptr) {
+			if (verbose)
+				con->debugPrintf("No results.\n");
+			return 1;
+		}
+	}
+
+	results = work;
+
+	if (verbose) {
+		con->debugPrintf("All results (excluding the surrounding '(141 %03x' and ')'):\n", _parserBranches[0].id);
+		results->print();
+		con->debugPrintf("\n");
+	}
+
+	// now use the first result
+	{
+		int temp, pos;
+
+		_parserNodes[0].type = kParseTreeBranchNode;
+		_parserNodes[0].left = &_parserNodes[1];
+		_parserNodes[0].right = &_parserNodes[2];
+
+		_parserNodes[1].type = kParseTreeLeafNode;
+		_parserNodes[1].value = 0x141;
+		_parserNodes[1].right = nullptr;
+
+		_parserNodes[2].type = kParseTreeBranchNode;
+		_parserNodes[2].left = nullptr;
+		_parserNodes[2].right = nullptr;
+
+		pos = 2;
+
+		temp = _vbpt_append(_parserNodes, &pos, 2, _parserBranches[0].id);
+		//_vbpt_write_subexpression(nodes, &pos, results[_vocab_rule_list_length(results)].rule, 0, temp);
+		_vbpt_write_subexpression(_parserNodes, &pos, results->rule, 0, temp);
+	}
+
+	freeRuleList(results);
+
+	return 0;
+}
+
+} // End of namespace Sci
--- a/engines/sci/parser/said.cpp
+++ b/engines/sci/parser/said.cpp
--- a/engines/sci/parser/vocabulary.cpp
+++ b/engines/sci/parser/vocabulary.cpp
--- a/engines/sci/parser/vocabulary.h
+++ b/engines/sci/parser/vocabulary.h
@@ -0,0 +1,450 @@
+/* ScummVM - Graphic Adventure Engine
+ *
+ * ScummVM is the legal property of its developers, whose names
+ * are too numerous to list here. Please refer to the COPYRIGHT
+ * file distributed with this source distribution.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef SCI_PARSER_VOCABULARY_H
+#define SCI_PARSER_VOCABULARY_H
+
+#include "common/str.h"
+#include "common/hashmap.h"
+#include "common/hash-str.h"
+#include "common/list.h"
+
+#include "sci/sci.h"
+#include "sci/engine/vm_types.h"
+#include "sci/util.h"
+
+namespace Common {
+
+class Serializer;
+
+}
+
+namespace Sci {
+
+class ResourceManager;
+
+/*#define VOCABULARY_DEBUG */
+
+enum {
+	VOCAB_RESOURCE_SELECTORS = 997,
+
+	VOCAB_RESOURCE_SCI0_MAIN_VOCAB = 0,
+	VOCAB_RESOURCE_SCUMM_LOC_VOCAB = 1, // Special fanmade format for vocab translate
+	VOCAB_RESOURCE_SCI0_PARSE_TREE_BRANCHES = 900,
+	VOCAB_RESOURCE_SCI0_SUFFIX_VOCAB = 901,
+
+	VOCAB_RESOURCE_SCI1_MAIN_VOCAB = 900,
+	VOCAB_RESOURCE_SCI1_PARSE_TREE_BRANCHES = 901,
+	VOCAB_RESOURCE_SCI1_SUFFIX_VOCAB = 902,
+
+	VOCAB_RESOURCE_ALT_INPUTS = 913
+};
+
+
+enum {
+	VOCAB_CLASS_PREPOSITION = 0x01,
+	VOCAB_CLASS_ARTICLE = 0x02,
+	VOCAB_CLASS_ADJECTIVE = 0x04,
+	VOCAB_CLASS_PRONOUN = 0x08,
+	VOCAB_CLASS_NOUN = 0x10,
+	VOCAB_CLASS_INDICATIVE_VERB = 0x20,
+	VOCAB_CLASS_ADVERB = 0x40,
+	VOCAB_CLASS_IMPERATIVE_VERB = 0x80
+};
+
+enum {
+	kParseEndOfInput = 0,
+	kParseOpeningParenthesis = 1,
+	kParseClosingParenthesis = 2,
+	kParseNil = 3,
+	kParseNumber = 4
+};
+
+#define VOCAB_MAX_WORDLENGTH 256
+
+/* Anywords are ignored by the parser */
+#define VOCAB_CLASS_ANYWORD 0xff
+
+/* This word class is used for numbers */
+#define VOCAB_MAGIC_NUMBER_GROUP 0xffd /* 0xffe ? */
+#define VOCAB_MAGIC_NOTHING_GROUP 0xffe
+
+/* Number of nodes for each ParseTreeNode structure */
+#define VOCAB_TREE_NODES 500
+
+#define VOCAB_TREE_NODE_LAST_WORD_STORAGE 0x140
+#define VOCAB_TREE_NODE_COMPARE_TYPE 0x146
+#define VOCAB_TREE_NODE_COMPARE_GROUP 0x14d
+#define VOCAB_TREE_NODE_FORCE_STORAGE 0x154
+
+#define SAID_COMMA   0xf0
+#define SAID_AMP     0xf1
+#define SAID_SLASH   0xf2
+#define SAID_PARENO  0xf3
+#define SAID_PARENC  0xf4
+#define SAID_BRACKO  0xf5
+#define SAID_BRACKC  0xf6
+#define SAID_HASH    0xf7
+#define SAID_LT      0xf8
+#define SAID_GT      0xf9
+#define SAID_TERM    0xff
+
+#define SAID_FIRST SAID_COMMA
+
+/* There was no 'last matching word': */
+#define SAID_FULL_MATCH 0xffff
+#define SAID_NO_MATCH 0xfffe
+#define SAID_PARTIAL_MATCH 0xfffd
+
+#define SAID_LONG(x) ((x) << 8)
+
+struct ResultWord {
+	int _class; /**< Word class */
+	int _group; /**< Word group */
+};
+
+typedef Common::List<ResultWord> ResultWordList;
+typedef Common::List<ResultWordList> ResultWordListList;
+
+typedef Common::HashMap<Common::String, ResultWordList, Common::CaseSensitiveString_Hash, Common::CaseSensitiveString_EqualTo> WordMap;
+
+
+struct ParseRuleList;
+
+struct suffix_t {
+
+	int class_mask; /**< the word class this suffix applies to */
+	int result_class; /**< the word class a word is morphed to if it doesn't fail this check */
+
+	int alt_suffix_length; /**< String length of the suffix */
+	int word_suffix_length; /**< String length of the other suffix */
+
+	const char *alt_suffix; /**< The alternative suffix */
+	const char *word_suffix; /**< The suffix as used in the word vocabulary */
+
+};
+
+typedef Common::List<suffix_t> SuffixList;
+
+
+struct synonym_t {
+	uint16 replaceant; /**< The word group to replace */
+	uint16 replacement; /**< The replacement word group for this one */
+};
+
+typedef Common::Array<synonym_t> SynonymList;
+
+
+struct AltInput {
+	const char *_input;
+	const char *_replacement;
+	uint32 _inputLength;
+	uint32 _replacementLength;
+	bool _prefix;
+};
+
+
+struct parse_tree_branch_t {
+	int id;
+	int data[10];
+};
+
+enum ParseTypes {
+	kParseTreeWordNode = 4,
+	kParseTreeLeafNode = 5,
+	kParseTreeBranchNode = 6
+};
+
+struct ParseTreeNode {
+	ParseTypes type;  /**< leaf or branch */
+	int value; /**< For leaves */
+	ParseTreeNode* left; /**< Left child, for branches */
+	ParseTreeNode* right; /**< Right child, for branches (and word leaves) */
+};
+
+enum VocabularyVersions {
+	kVocabularySCI0 = 0,
+	kVocabularySCI1 = 1
+};
+
+class Vocabulary {
+public:
+	Vocabulary(ResourceManager *resMan, bool foreign);
+	~Vocabulary();
+
+	// reset parser status
+	void reset();
+
+	/**
+	 * Gets any word from the specified group. For debugging only.
+	 * @param group		Group number
+	 */
+	const char *getAnyWordFromGroup(int group);
+
+
+	/**
+	 * Looks up a single word in the words and suffixes list.
+	 * @param retval	the list of matches
+	 * @param word		pointer to the word to look up
+	 * @param word_len	length of the word to look up
+	 */
+	void lookupWord(ResultWordList &retval, const char *word, int word_len);
+
+	/**
+	 * Looks up a single word in the words list, taking into account suffixes, and updating parent_retval if a matching prefix is found
+	 * Note: there is no equivalent in Sierra SCI, added to support specific languages translations
+	 * For other languages, it does nothing
+	 * @param parent_retval     parent's function list of matches
+	 * @param retval            the list of matches
+	 * @param word              pointer to the word to look up
+	 * @param word_len          length of the word to look up
+	 */
+	void lookupWordPrefix(ResultWordListList &parent_retval, ResultWordList &retval, const char *word, int word_len);
+
+	/**
+	 * Helper function for lookupWordPrefix, checking specific prefix for match
+	 * Intended for nouns and prepositions, and the prefix has meaning as another word
+	 * @param parent_retval     lookupWordPrefix's parent's function list of matches
+	 * @param retval            lookupWordPrefix's list of matches
+	 * @param word              pointer to the word to look up
+	 * @param word_len          length of the word to look up
+	 * @param prefix            the prefix to look for in the word
+	 * @param meaning           the meaning of that prefix
+	 * @return true on prefix match, false on prefix not matching
+	 */
+	bool lookupSpecificPrefixWithMeaning(ResultWordListList &parent_retval, ResultWordList &retval, const char *word, int word_len, unsigned char prefix, const char *meaning);
+
+	/**
+	 * Helper function for lookupWordPrefix, checking specific prefix for match
+	 * Intended for verbs, and the prefix doesn't have any meaning
+	 * @param parent_retval     lookupWordPrefix's parent's function list of matches
+	 * @param retval            lookupWordPrefix's list of matches
+	 * @param word              pointer to the word to look up
+	 * @param word_len          length of the word to look up
+	 * @param prefix            the prefix to look for in the word
+	 * @return true on prefix match, false on prefix not matching
+	 */
+	bool lookupVerbPrefix(ResultWordListList &parent_retval, ResultWordList &retval, Common::String word, int word_len, Common::String prefix);
+
+	/**
+	 * Tokenizes a string and compiles it into word_ts.
+	 * @param[in] retval		A list of words which will be set to the result
+	 * @param[out] sentence		The sentence to examine
+	 * @param[out] error		Points to a malloc'd copy of the offending text or to NULL on error
+	 * @return true on success, false on failure
+	 *
+	 * On error, false is returned. If *error is NULL, the sentence did not
+	 * contain any useful words; if not, *error points to a malloc'd copy of
+	 * the offending word. The returned list may contain anywords.
+	 */
+	bool tokenizeString(ResultWordListList &retval, const char *sentence, char **error);
+
+	/**
+	 * Builds a parse tree from a list of words, using a set of Greibach Normal
+	 * Form rules.
+	 * @param words		The words to build the tree from
+	 * @param verbose	Set to true for debugging
+	 * @return	0 on success, 1 if the tree couldn't be built in VOCAB_TREE_NODES
+	 *			nodes or if the sentence structure in 'words' is not part of the
+	 *			language described by the grammar passed in 'rules'.
+	 */
+	int parseGNF(const ResultWordListList &words, bool verbose = false);
+
+	/**
+	 * Find and store reference for future pronouns
+	 */
+	bool storePronounReference();
+
+	/**
+	 * Replace pronouns by stored reference
+	 */
+	void replacePronouns(ResultWordListList &words);
+
+	/**
+	 * Constructs the Greibach Normal Form of the grammar supplied in 'branches'.
+	 * @param verbose	Set to true for debugging. If true, the list is
+	 *					freed before the function ends
+	 * @return	Pointer to a list of singly linked GNF rules describing the same
+	 *			language that was described by 'branches'
+	 *
+	 * The original SCI rules are in almost-CNF (Chomsky Normal Form). Note that
+	 * branch[0] is used only for a few magical incantations, as it is treated
+	 * specially by the SCI parser.
+	 */
+	ParseRuleList *buildGNF(bool verbose = false);
+
+	/**
+	 * Deciphers a said block and dumps its content via debugN.
+	 * For debugging only.
+	 * @param pos	pointer to the data to dump
+	 */
+	void debugDecipherSaidBlock(const SciSpan<const byte> &data);
+
+	/**
+	 * Prints the parser suffixes to the debug console.
+	 */
+	void printSuffixes() const;
+
+	/**
+	 * Prints the parser words to the debug console.
+	 */
+	void printParserWords() const;
+
+	uint getParserBranchesSize() const { return _parserBranches.size(); }
+	const parse_tree_branch_t &getParseTreeBranch(int number) const { return _parserBranches[number]; }
+
+	/**
+	 * Adds a new synonym to the list
+	 */
+	void addSynonym(synonym_t syn) { _synonyms.push_back(syn); }
+
+	/**
+	 * Clears the list of synonyms
+	 */
+	void clearSynonyms() { _synonyms.clear(); }
+
+	/**
+	 * Synonymizes a token list
+	 * Parameters: (ResultWordListList &) words: The word list to synonymize
+	 */
+	void synonymizeTokens(ResultWordListList &words);
+
+	void printParserNodes(int num);
+
+	void dumpParseTree();
+
+	int parseNodes(int *i, int *pos, int type, int nr, int argc, const char **argv);
+
+	/**
+	 * Check text input against alternative inputs.
+	 * @param text The text to process. It will be modified in-place
+	 * @param cursorPos The cursor position
+	 * @return true if anything changed
+	 */
+	bool checkAltInput(Common::String& text, uint16& cursorPos);
+
+	/**
+	 * Save/load vocabulary data
+	 */
+	void saveLoadWithSerializer(Common::Serializer &ser);
+
+private:
+	/**
+	 * Loads all words from the main vocabulary.
+	 * @return true on success, false on failure
+	 */
+	bool loadParserWords();
+	
+	/**
+	 * Loads additional translated words from special format vocabulary.
+	 * @return true on success, false on failure
+	 */
+	void loadTranslatedWords();
+
+	/**
+	 * Loads all suffixes from the suffix vocabulary.
+	 * @return true on success, false on failure
+	 */
+	bool loadSuffixes();
+
+	/**
+	 * Frees all suffixes in the given list.
+	 * @param suffixes: The suffixes to free
+	 */
+	void freeSuffixes();
+
+	/**
+	 * Retrieves all grammar rules from the resource data.
+	 * @param branches		The rules are stored into this Array
+	 * @return true on success, false on error
+	 */
+	bool loadBranches();
+
+	/**
+	 * Frees a parser rule list as returned by vocab_build_gnf().
+	 * @param rule_list		the rule list to free
+	 */
+	void freeRuleList(ParseRuleList *rule_list);
+
+
+	/**
+	 * Retrieves all alternative input combinations from vocab 913.
+	 * @return true on success, false on error
+	 */
+	bool loadAltInputs();
+
+	/**
+	 * Frees all alternative input combinations.
+	 */
+	void freeAltInputs();
+
+	ResourceManager *_resMan;
+	VocabularyVersions _vocabVersion;
+
+	bool _foreign;
+	uint16 _resourceIdWords;
+	uint16 _resourceIdSuffixes;
+	uint16 _resourceIdBranches;
+
+	// Parser-related lists
+	SuffixList _parserSuffixes;
+	ParseRuleList *_parserRules; /**< GNF rules used in the parser algorithm */
+	Common::Array<parse_tree_branch_t> _parserBranches;
+	WordMap _parserWords;
+	SynonymList _synonyms; /**< The list of synonyms */
+	Common::Array<Common::List<AltInput> > _altInputs;
+
+	struct PrefixMeaning {
+		unsigned char prefix;
+		const char *meaning;
+	};
+
+	int _pronounReference;
+
+public:
+	// Accessed by said()
+	ParseTreeNode _parserNodes[VOCAB_TREE_NODES]; /**< The parse tree */
+
+	// Parser data:
+	reg_t parser_event; /**< The event passed to Parse() and later used by Said() */
+	bool parserIsValid; /**< If something has been correctly parsed */
+};
+
+/**
+ * Prints a parse tree.
+ * @param tree_name		Name of the tree to dump (free-form)
+ * @param nodes			The nodes containing the parse tree
+ */
+void vocab_dump_parse_tree(const char *tree_name, ParseTreeNode *nodes);
+
+
+
+/**
+ * Builds a parse tree from a spec and compares it to a parse tree.
+ * @param spec		Pointer to the spec to build
+ * @param verbose	Whether to display the parse tree after building it
+ * @return 1 on a match, 0 otherwise
+ */
+int said(const byte *spec, bool verbose);
+
+} // End of namespace Sci
+
+#endif // SCI_PARSER_VOCABULARY_H