Initial commit
This commit is contained in:
638
engines/sci/parser/grammar.cpp
Normal file
638
engines/sci/parser/grammar.cpp
Normal file
@@ -0,0 +1,638 @@
|
||||
/* ScummVM - Graphic Adventure Engine
|
||||
*
|
||||
* ScummVM is the legal property of its developers, whose names
|
||||
* are too numerous to list here. Please refer to the COPYRIGHT
|
||||
* file distributed with this source distribution.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
*/
|
||||
|
||||
/* Functionality to transform the context-free SCI grammar rules into
|
||||
* strict Greibach normal form (strict GNF), and to test SCI input against
|
||||
* that grammar, writing an appropriate node tree if successful.
|
||||
*/
|
||||
|
||||
#include "sci/parser/vocabulary.h"
|
||||
#include "sci/console.h"
|
||||
#include "common/array.h"
|
||||
#include "common/textconsole.h"
|
||||
|
||||
namespace Sci {
|
||||
|
||||
#define TOKEN_OPAREN 0xff000000
|
||||
#define TOKEN_CPAREN 0xfe000000
|
||||
#define TOKEN_TERMINAL_CLASS 0x10000
|
||||
#define TOKEN_TERMINAL_GROUP 0x20000
|
||||
#define TOKEN_STUFFING_LEAF 0x40000
|
||||
#define TOKEN_STUFFING_WORD 0x80000
|
||||
#define TOKEN_NON_NT (TOKEN_OPAREN | TOKEN_TERMINAL_CLASS | TOKEN_TERMINAL_GROUP | TOKEN_STUFFING_LEAF | TOKEN_STUFFING_WORD)
|
||||
#define TOKEN_TERMINAL (TOKEN_TERMINAL_CLASS | TOKEN_TERMINAL_GROUP)
|
||||
|
||||
static int _allocd_rules = 0; // FIXME: Avoid non-const global vars
|
||||
|
||||
struct ParseRule {
|
||||
int _id; /**< non-terminal ID */
|
||||
uint _firstSpecial; /**< first terminal or non-terminal */
|
||||
uint _numSpecials; /**< number of terminals and non-terminals */
|
||||
Common::Array<int> _data; /**< actual data */
|
||||
|
||||
~ParseRule() {
|
||||
assert(_allocd_rules > 0);
|
||||
--_allocd_rules;
|
||||
}
|
||||
|
||||
// FIXME remove this one again?
|
||||
bool operator==(const ParseRule &other) const {
|
||||
return _id == other._id &&
|
||||
_firstSpecial == other._firstSpecial &&
|
||||
_numSpecials == other._numSpecials &&
|
||||
_data == other._data;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct ParseRuleList {
|
||||
int terminal; /**< Terminal character this rule matches against or 0 for a non-terminal rule */
|
||||
ParseRule *rule;
|
||||
ParseRuleList *next;
|
||||
|
||||
void print() const;
|
||||
|
||||
ParseRuleList(ParseRule *r) : rule(r), next(nullptr) {
|
||||
int term = rule->_data[rule->_firstSpecial];
|
||||
terminal = ((term & TOKEN_TERMINAL) ? term : 0);
|
||||
}
|
||||
|
||||
~ParseRuleList() {
|
||||
delete rule;
|
||||
delete next;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
static void vocab_print_rule(ParseRule *rule) {
|
||||
int wspace = 0;
|
||||
|
||||
if (!rule) {
|
||||
warning("NULL rule");
|
||||
return;
|
||||
}
|
||||
|
||||
debugN("[%03x] -> ", rule->_id);
|
||||
|
||||
if (rule->_data.empty())
|
||||
debugN("e");
|
||||
|
||||
for (uint i = 0; i < rule->_data.size(); i++) {
|
||||
uint token = rule->_data[i];
|
||||
|
||||
if (token == TOKEN_OPAREN) {
|
||||
if (i == rule->_firstSpecial)
|
||||
debugN("_");
|
||||
|
||||
debugN("(");
|
||||
wspace = 0;
|
||||
} else if (token == TOKEN_CPAREN) {
|
||||
if (i == rule->_firstSpecial)
|
||||
debugN("_");
|
||||
|
||||
debugN(")");
|
||||
wspace = 0;
|
||||
} else {
|
||||
if (wspace)
|
||||
debugN(" ");
|
||||
|
||||
if (i == rule->_firstSpecial)
|
||||
debugN("_");
|
||||
if (token & TOKEN_TERMINAL_CLASS)
|
||||
debugN("C(%04x)", token & 0xffff);
|
||||
else if (token & TOKEN_TERMINAL_GROUP)
|
||||
debugN("G(%04x)", token & 0xffff);
|
||||
else if (token & TOKEN_STUFFING_LEAF)
|
||||
debugN("%03x", token & 0xffff);
|
||||
else if (token & TOKEN_STUFFING_WORD)
|
||||
debugN("{%03x}", token & 0xffff);
|
||||
else
|
||||
debugN("[%03x]", token); /* non-terminal */
|
||||
wspace = 1;
|
||||
}
|
||||
|
||||
if (i == rule->_firstSpecial)
|
||||
debugN("_");
|
||||
}
|
||||
debugN(" [%d specials]", rule->_numSpecials);
|
||||
}
|
||||
|
||||
static ParseRule *_vdup(ParseRule *a) {
|
||||
++_allocd_rules;
|
||||
return new ParseRule(*a);
|
||||
}
|
||||
|
||||
static ParseRule *_vinsert(ParseRule *turkey, ParseRule *stuffing) {
|
||||
uint firstnt = turkey->_firstSpecial;
|
||||
|
||||
// Search for first TOKEN_NON_NT in 'turkey'
|
||||
while ((firstnt < turkey->_data.size()) && (turkey->_data[firstnt] & TOKEN_NON_NT))
|
||||
firstnt++;
|
||||
|
||||
// If no TOKEN_NON_NT found, or if it doesn't match the id of 'stuffing', abort.
|
||||
if ((firstnt == turkey->_data.size()) || (turkey->_data[firstnt] != stuffing->_id))
|
||||
return nullptr;
|
||||
|
||||
// Create a new rule as a copy of 'turkey', where the token firstnt has been substituted
|
||||
// by the rule 'stuffing'.
|
||||
++_allocd_rules;
|
||||
|
||||
ParseRule *rule = new ParseRule(*turkey);
|
||||
rule->_numSpecials += stuffing->_numSpecials - 1;
|
||||
rule->_firstSpecial = firstnt + stuffing->_firstSpecial;
|
||||
rule->_data.resize(turkey->_data.size() - 1 + stuffing->_data.size());
|
||||
|
||||
// Replace rule->_data[firstnt] by all of stuffing->_data
|
||||
Common::copy(stuffing->_data.begin(), stuffing->_data.end(), rule->_data.begin() + firstnt);
|
||||
|
||||
if (firstnt < turkey->_data.size() - 1)
|
||||
Common::copy(turkey->_data.begin() + firstnt + 1, turkey->_data.end(),
|
||||
rule->_data.begin() + firstnt + stuffing->_data.size());
|
||||
|
||||
return rule;
|
||||
}
|
||||
|
||||
static ParseRule *_vbuild_rule(const parse_tree_branch_t *branch) {
|
||||
int tokens = 0, tokenpos = 0, i;
|
||||
|
||||
while (tokenpos < 10 && branch->data[tokenpos]) {
|
||||
int type = branch->data[tokenpos];
|
||||
tokenpos += 2;
|
||||
|
||||
if ((type == VOCAB_TREE_NODE_COMPARE_TYPE) || (type == VOCAB_TREE_NODE_COMPARE_GROUP) || (type == VOCAB_TREE_NODE_FORCE_STORAGE))
|
||||
++tokens;
|
||||
else if (type > VOCAB_TREE_NODE_LAST_WORD_STORAGE)
|
||||
tokens += 5;
|
||||
else
|
||||
return nullptr; // invalid
|
||||
}
|
||||
|
||||
ParseRule *rule = new ParseRule();
|
||||
|
||||
++_allocd_rules;
|
||||
rule->_id = branch->id;
|
||||
rule->_numSpecials = tokenpos >> 1;
|
||||
rule->_data.resize(tokens);
|
||||
rule->_firstSpecial = 0;
|
||||
|
||||
tokens = 0;
|
||||
for (i = 0; i < tokenpos; i += 2) {
|
||||
int type = branch->data[i];
|
||||
int value = branch->data[i + 1];
|
||||
|
||||
if (type == VOCAB_TREE_NODE_COMPARE_TYPE)
|
||||
rule->_data[tokens++] = value | TOKEN_TERMINAL_CLASS;
|
||||
else if (type == VOCAB_TREE_NODE_COMPARE_GROUP)
|
||||
rule->_data[tokens++] = value | TOKEN_TERMINAL_GROUP;
|
||||
else if (type == VOCAB_TREE_NODE_FORCE_STORAGE)
|
||||
rule->_data[tokens++] = value | TOKEN_STUFFING_WORD;
|
||||
else { // normal inductive rule
|
||||
rule->_data[tokens++] = TOKEN_OPAREN;
|
||||
rule->_data[tokens++] = type | TOKEN_STUFFING_LEAF;
|
||||
rule->_data[tokens++] = value | TOKEN_STUFFING_LEAF;
|
||||
|
||||
if (i == 0)
|
||||
rule->_firstSpecial = tokens;
|
||||
|
||||
rule->_data[tokens++] = value; // The non-terminal
|
||||
rule->_data[tokens++] = TOKEN_CPAREN;
|
||||
}
|
||||
}
|
||||
|
||||
return rule;
|
||||
}
|
||||
|
||||
static ParseRule *_vsatisfy_rule(ParseRule *rule, const ResultWordList &input) {
|
||||
int dep;
|
||||
|
||||
if (!rule->_numSpecials)
|
||||
return nullptr;
|
||||
|
||||
dep = rule->_data[rule->_firstSpecial];
|
||||
|
||||
int count = 0;
|
||||
int match = 0;
|
||||
ResultWordList::const_iterator iter;
|
||||
// TODO: Inserting an array in the middle of another array is slow
|
||||
Common::Array<int> matches;
|
||||
matches.reserve(input.size());
|
||||
|
||||
// We store the first match in 'match', and any subsequent matches in
|
||||
// 'matches'. 'match' replaces the special in the rule, and 'matches' gets
|
||||
// inserted after it.
|
||||
for (iter = input.begin(); iter != input.end(); ++iter)
|
||||
if (((dep & TOKEN_TERMINAL_CLASS) && ((dep & 0xffff) & iter->_class)) ||
|
||||
((dep & TOKEN_TERMINAL_GROUP) && ((dep & 0xffff) & iter->_group))) {
|
||||
if (count == 0)
|
||||
match = TOKEN_STUFFING_WORD | iter->_group;
|
||||
else
|
||||
matches.push_back(TOKEN_STUFFING_WORD | iter->_group);
|
||||
count++;
|
||||
}
|
||||
|
||||
if (count) {
|
||||
ParseRule *retval = new ParseRule(*rule);
|
||||
++_allocd_rules;
|
||||
retval->_data[rule->_firstSpecial] = match;
|
||||
if (count > 1)
|
||||
retval->_data.insert_at(rule->_firstSpecial+1, matches);
|
||||
retval->_numSpecials--;
|
||||
retval->_firstSpecial = 0;
|
||||
|
||||
if (retval->_numSpecials) { // find first special, if it exists
|
||||
for (uint i = rule->_firstSpecial; i < retval->_data.size(); ++i) {
|
||||
int tmp = retval->_data[i];
|
||||
if (!(tmp & TOKEN_NON_NT) || (tmp & TOKEN_TERMINAL)) {
|
||||
retval->_firstSpecial = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return retval;
|
||||
} else
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void Vocabulary::freeRuleList(ParseRuleList *list) {
|
||||
delete list;
|
||||
}
|
||||
|
||||
static ParseRuleList *_vocab_add_rule(ParseRuleList *list, ParseRule *rule) {
|
||||
if (!rule)
|
||||
return list;
|
||||
if (!rule->_data.size()) {
|
||||
// Special case for qfg2 demo
|
||||
warning("no rule contents on _vocab_add_rule()");
|
||||
return list;
|
||||
}
|
||||
|
||||
ParseRuleList *new_elem = new ParseRuleList(rule);
|
||||
|
||||
if (list) {
|
||||
const int term = new_elem->terminal;
|
||||
/* if (term < list->terminal) {
|
||||
new_elem->next = list;
|
||||
return new_elem;
|
||||
} else {*/
|
||||
ParseRuleList *seeker = list;
|
||||
|
||||
while (seeker->next/* && seeker->next->terminal <= term*/) {
|
||||
if (seeker->next->terminal == term) {
|
||||
if (*(seeker->next->rule) == *rule) {
|
||||
delete new_elem; // NB: This also deletes 'rule'
|
||||
|
||||
return list; // No duplicate rules
|
||||
}
|
||||
}
|
||||
seeker = seeker->next;
|
||||
}
|
||||
|
||||
new_elem->next = seeker->next;
|
||||
seeker->next = new_elem;
|
||||
return list;
|
||||
} else {
|
||||
return new_elem;
|
||||
}
|
||||
}
|
||||
|
||||
void ParseRuleList::print() const {
|
||||
const ParseRuleList *list = this;
|
||||
int pos = 0;
|
||||
while (list) {
|
||||
debugN("R%03d: ", pos);
|
||||
vocab_print_rule(list->rule);
|
||||
debugN("\n");
|
||||
list = list->next;
|
||||
pos++;
|
||||
}
|
||||
debugN("%d rules total.\n", pos);
|
||||
}
|
||||
|
||||
static ParseRuleList *_vocab_split_rule_list(ParseRuleList *list) {
|
||||
assert(list);
|
||||
if (!list->next || (list->next->terminal)) {
|
||||
ParseRuleList *tmp = list->next;
|
||||
list->next = nullptr;
|
||||
return tmp;
|
||||
} else
|
||||
return _vocab_split_rule_list(list->next);
|
||||
}
|
||||
|
||||
static void _vocab_free_empty_rule_list(ParseRuleList *list) {
|
||||
assert(list);
|
||||
if (list->next)
|
||||
_vocab_free_empty_rule_list(list->next);
|
||||
list->next = nullptr;
|
||||
list->rule = nullptr;
|
||||
delete list;
|
||||
}
|
||||
|
||||
static ParseRuleList *_vocab_merge_rule_lists(ParseRuleList *l1, ParseRuleList *l2) {
|
||||
ParseRuleList *retval = l1, *seeker = l2;
|
||||
while (seeker) {
|
||||
retval = _vocab_add_rule(retval, seeker->rule);
|
||||
seeker = seeker->next;
|
||||
}
|
||||
_vocab_free_empty_rule_list(l2);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
static int _vocab_rule_list_length(ParseRuleList *list) {
|
||||
return ((list) ? _vocab_rule_list_length(list->next) + 1 : 0);
|
||||
}
|
||||
|
||||
static ParseRuleList *_vocab_clone_rule_list_by_id(ParseRuleList *list, int id) {
|
||||
ParseRuleList *result = nullptr;
|
||||
ParseRuleList *seeker = list;
|
||||
|
||||
while (seeker) {
|
||||
if (seeker->rule->_id == id) {
|
||||
result = _vocab_add_rule(result, _vdup(seeker->rule));
|
||||
}
|
||||
seeker = seeker->next;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
ParseRuleList *Vocabulary::buildGNF(bool verbose) {
|
||||
int iterations = 0;
|
||||
int termrules = 0;
|
||||
int ntrules_nr;
|
||||
ParseRuleList *ntlist = nullptr;
|
||||
ParseRuleList *tlist, *new_tlist;
|
||||
Console *con = g_sci->getSciDebugger();
|
||||
|
||||
for (uint i = 1; i < _parserBranches.size(); i++) { // branch rule 0 is treated specially
|
||||
ParseRule *rule = _vbuild_rule(&_parserBranches[i]);
|
||||
if (!rule) {
|
||||
freeRuleList(ntlist);
|
||||
return nullptr;
|
||||
}
|
||||
ntlist = _vocab_add_rule(ntlist, rule);
|
||||
}
|
||||
|
||||
tlist = _vocab_split_rule_list(ntlist);
|
||||
ntrules_nr = _vocab_rule_list_length(ntlist);
|
||||
|
||||
if (verbose)
|
||||
con->debugPrintf("Starting with %d rules\n", ntrules_nr);
|
||||
|
||||
new_tlist = tlist;
|
||||
tlist = nullptr;
|
||||
|
||||
do {
|
||||
ParseRuleList *new_new_tlist = nullptr;
|
||||
ParseRuleList *ntseeker, *tseeker;
|
||||
|
||||
ntseeker = ntlist;
|
||||
while (ntseeker) {
|
||||
tseeker = new_tlist;
|
||||
|
||||
while (tseeker) {
|
||||
ParseRule *newrule = _vinsert(ntseeker->rule, tseeker->rule);
|
||||
if (newrule)
|
||||
new_new_tlist = _vocab_add_rule(new_new_tlist, newrule);
|
||||
tseeker = tseeker->next;
|
||||
}
|
||||
|
||||
ntseeker = ntseeker->next;
|
||||
}
|
||||
|
||||
tlist = _vocab_merge_rule_lists(tlist, new_tlist);
|
||||
new_tlist = new_new_tlist;
|
||||
termrules = _vocab_rule_list_length(new_new_tlist);
|
||||
|
||||
if (verbose)
|
||||
con->debugPrintf("After iteration #%d: %d new term rules\n", ++iterations, termrules);
|
||||
|
||||
} while (termrules && (iterations < 30));
|
||||
|
||||
freeRuleList(ntlist);
|
||||
|
||||
if (verbose) {
|
||||
con->debugPrintf("\nGNF rules:\n");
|
||||
tlist->print();
|
||||
con->debugPrintf("%d allocd rules\n", _allocd_rules);
|
||||
con->debugPrintf("Freeing rule list...\n");
|
||||
freeRuleList(tlist);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return tlist;
|
||||
}
|
||||
|
||||
static int _vbpt_pareno(ParseTreeNode *nodes, int *pos, int base) {
|
||||
// Opens parentheses
|
||||
nodes[base].left = &nodes[(*pos) + 1];
|
||||
nodes[++(*pos)].type = kParseTreeBranchNode;
|
||||
nodes[*pos].left = nullptr;
|
||||
nodes[*pos].right = nullptr;
|
||||
return *pos;
|
||||
}
|
||||
|
||||
static int _vbpt_parenc(ParseTreeNode *nodes, int *pos, int paren) {
|
||||
// Closes parentheses for appending
|
||||
nodes[paren].right = &nodes[++(*pos)];
|
||||
nodes[*pos].type = kParseTreeBranchNode;
|
||||
nodes[*pos].left = nullptr;
|
||||
nodes[*pos].right = nullptr;
|
||||
return *pos;
|
||||
}
|
||||
|
||||
static int _vbpt_append(ParseTreeNode *nodes, int *pos, int base, int value) {
|
||||
// writes one value to an existing base node and creates a successor node for writing
|
||||
nodes[base].left = &nodes[++(*pos)];
|
||||
nodes[*pos].type = kParseTreeLeafNode;
|
||||
nodes[*pos].value = value;
|
||||
nodes[*pos].right = nullptr;
|
||||
nodes[base].right = &nodes[++(*pos)];
|
||||
nodes[*pos].type = kParseTreeBranchNode;
|
||||
nodes[*pos].left = nullptr;
|
||||
nodes[*pos].right = nullptr;
|
||||
return *pos;
|
||||
}
|
||||
|
||||
static int _vbpt_terminate(ParseTreeNode *nodes, int *pos, int base, int value) {
|
||||
// Terminates, overwriting a nextwrite forknode
|
||||
nodes[base].type = kParseTreeLeafNode;
|
||||
nodes[base].value = value;
|
||||
nodes[base].right = nullptr;
|
||||
return *pos;
|
||||
}
|
||||
static int _vbpt_append_word(ParseTreeNode *nodes, int *pos, int base, int value) {
|
||||
// writes one value to an existing node and creates a sibling for writing
|
||||
nodes[base].type = kParseTreeWordNode;
|
||||
nodes[base].value = value;
|
||||
nodes[base].right = &nodes[++(*pos)];
|
||||
nodes[*pos].type = kParseTreeBranchNode;
|
||||
nodes[*pos].left = nullptr;
|
||||
nodes[*pos].right = nullptr;
|
||||
return *pos;
|
||||
}
|
||||
|
||||
static int _vbpt_terminate_word(ParseTreeNode *nodes, int *pos, int base, int value) {
|
||||
// Terminates, overwriting a nextwrite forknode
|
||||
nodes[base].type = kParseTreeWordNode;
|
||||
nodes[base].value = value;
|
||||
nodes[base].right = nullptr;
|
||||
return *pos;
|
||||
}
|
||||
|
||||
|
||||
static int _vbpt_write_subexpression(ParseTreeNode *nodes, int *pos, ParseRule *rule, uint rulepos, int writepos) {
|
||||
uint token;
|
||||
|
||||
while ((token = ((rulepos < rule->_data.size()) ? rule->_data[rulepos++] : TOKEN_CPAREN)) != TOKEN_CPAREN) {
|
||||
uint nexttoken = (rulepos < rule->_data.size()) ? rule->_data[rulepos] : TOKEN_CPAREN;
|
||||
if (token == TOKEN_OPAREN) {
|
||||
int writepos2 = _vbpt_pareno(nodes, pos, writepos);
|
||||
rulepos = _vbpt_write_subexpression(nodes, pos, rule, rulepos, writepos2);
|
||||
nexttoken = (rulepos < rule->_data.size()) ? rule->_data[rulepos] : TOKEN_CPAREN;
|
||||
if (nexttoken != TOKEN_CPAREN)
|
||||
writepos = _vbpt_parenc(nodes, pos, writepos);
|
||||
} else if (token & TOKEN_STUFFING_LEAF) {
|
||||
if (nexttoken == TOKEN_CPAREN)
|
||||
writepos = _vbpt_terminate(nodes, pos, writepos, token & 0xffff);
|
||||
else
|
||||
writepos = _vbpt_append(nodes, pos, writepos, token & 0xffff);
|
||||
} else if (token & TOKEN_STUFFING_WORD) {
|
||||
if (nexttoken == TOKEN_CPAREN)
|
||||
writepos = _vbpt_terminate_word(nodes, pos, writepos, token & 0xffff);
|
||||
else
|
||||
writepos = _vbpt_append_word(nodes, pos, writepos, token & 0xffff);
|
||||
} else {
|
||||
warning("\nError in parser (grammar.cpp, _vbpt_write_subexpression()): Rule data broken in rule ");
|
||||
vocab_print_rule(rule);
|
||||
debugN(", at token position %d\n", *pos);
|
||||
return rulepos;
|
||||
}
|
||||
}
|
||||
|
||||
return rulepos;
|
||||
}
|
||||
|
||||
int Vocabulary::parseGNF(const ResultWordListList &words, bool verbose) {
|
||||
Console *con = g_sci->getSciDebugger();
|
||||
// Get the start rules:
|
||||
ParseRuleList *work = _vocab_clone_rule_list_by_id(_parserRules, _parserBranches[0].data[1]);
|
||||
ParseRuleList *results = nullptr;
|
||||
uint word = 0;
|
||||
const uint words_nr = words.size();
|
||||
ResultWordListList::const_iterator words_iter;
|
||||
|
||||
for (words_iter = words.begin(); words_iter != words.end(); ++words_iter, ++word) {
|
||||
ParseRuleList *new_work = nullptr;
|
||||
ParseRuleList *reduced_rules = nullptr;
|
||||
ParseRuleList *seeker, *subseeker;
|
||||
|
||||
if (verbose)
|
||||
con->debugPrintf("Adding word %d...\n", word);
|
||||
|
||||
seeker = work;
|
||||
while (seeker) {
|
||||
if (seeker->rule->_numSpecials <= (words_nr - word)) {
|
||||
reduced_rules = _vocab_add_rule(reduced_rules, _vsatisfy_rule(seeker->rule, *words_iter));
|
||||
}
|
||||
|
||||
seeker = seeker->next;
|
||||
}
|
||||
|
||||
if (reduced_rules == nullptr) {
|
||||
freeRuleList(work);
|
||||
if (verbose)
|
||||
con->debugPrintf("No results.\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
freeRuleList(work);
|
||||
|
||||
if (word + 1 < words_nr) {
|
||||
seeker = reduced_rules;
|
||||
|
||||
while (seeker) {
|
||||
if (seeker->rule->_numSpecials) {
|
||||
int my_id = seeker->rule->_data[seeker->rule->_firstSpecial];
|
||||
|
||||
subseeker = _parserRules;
|
||||
while (subseeker) {
|
||||
if (subseeker->rule->_id == my_id)
|
||||
new_work = _vocab_add_rule(new_work, _vinsert(seeker->rule, subseeker->rule));
|
||||
|
||||
subseeker = subseeker->next;
|
||||
}
|
||||
}
|
||||
|
||||
seeker = seeker->next;
|
||||
}
|
||||
freeRuleList(reduced_rules);
|
||||
} else // last word
|
||||
new_work = reduced_rules;
|
||||
|
||||
work = new_work;
|
||||
if (verbose)
|
||||
con->debugPrintf("Now at %d candidates\n", _vocab_rule_list_length(work));
|
||||
if (work == nullptr) {
|
||||
if (verbose)
|
||||
con->debugPrintf("No results.\n");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
results = work;
|
||||
|
||||
if (verbose) {
|
||||
con->debugPrintf("All results (excluding the surrounding '(141 %03x' and ')'):\n", _parserBranches[0].id);
|
||||
results->print();
|
||||
con->debugPrintf("\n");
|
||||
}
|
||||
|
||||
// now use the first result
|
||||
{
|
||||
int temp, pos;
|
||||
|
||||
_parserNodes[0].type = kParseTreeBranchNode;
|
||||
_parserNodes[0].left = &_parserNodes[1];
|
||||
_parserNodes[0].right = &_parserNodes[2];
|
||||
|
||||
_parserNodes[1].type = kParseTreeLeafNode;
|
||||
_parserNodes[1].value = 0x141;
|
||||
_parserNodes[1].right = nullptr;
|
||||
|
||||
_parserNodes[2].type = kParseTreeBranchNode;
|
||||
_parserNodes[2].left = nullptr;
|
||||
_parserNodes[2].right = nullptr;
|
||||
|
||||
pos = 2;
|
||||
|
||||
temp = _vbpt_append(_parserNodes, &pos, 2, _parserBranches[0].id);
|
||||
//_vbpt_write_subexpression(nodes, &pos, results[_vocab_rule_list_length(results)].rule, 0, temp);
|
||||
_vbpt_write_subexpression(_parserNodes, &pos, results->rule, 0, temp);
|
||||
}
|
||||
|
||||
freeRuleList(results);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // End of namespace Sci
|
||||
1134
engines/sci/parser/said.cpp
Normal file
1134
engines/sci/parser/said.cpp
Normal file
File diff suppressed because it is too large
Load Diff
1021
engines/sci/parser/vocabulary.cpp
Normal file
1021
engines/sci/parser/vocabulary.cpp
Normal file
File diff suppressed because it is too large
Load Diff
450
engines/sci/parser/vocabulary.h
Normal file
450
engines/sci/parser/vocabulary.h
Normal file
@@ -0,0 +1,450 @@
|
||||
/* ScummVM - Graphic Adventure Engine
|
||||
*
|
||||
* ScummVM is the legal property of its developers, whose names
|
||||
* are too numerous to list here. Please refer to the COPYRIGHT
|
||||
* file distributed with this source distribution.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef SCI_PARSER_VOCABULARY_H
|
||||
#define SCI_PARSER_VOCABULARY_H
|
||||
|
||||
#include "common/str.h"
|
||||
#include "common/hashmap.h"
|
||||
#include "common/hash-str.h"
|
||||
#include "common/list.h"
|
||||
|
||||
#include "sci/sci.h"
|
||||
#include "sci/engine/vm_types.h"
|
||||
#include "sci/util.h"
|
||||
|
||||
namespace Common {
|
||||
|
||||
class Serializer;
|
||||
|
||||
}
|
||||
|
||||
namespace Sci {
|
||||
|
||||
class ResourceManager;
|
||||
|
||||
/*#define VOCABULARY_DEBUG */
|
||||
|
||||
enum {
|
||||
VOCAB_RESOURCE_SELECTORS = 997,
|
||||
|
||||
VOCAB_RESOURCE_SCI0_MAIN_VOCAB = 0,
|
||||
VOCAB_RESOURCE_SCUMM_LOC_VOCAB = 1, // Special fanmade format for vocab translate
|
||||
VOCAB_RESOURCE_SCI0_PARSE_TREE_BRANCHES = 900,
|
||||
VOCAB_RESOURCE_SCI0_SUFFIX_VOCAB = 901,
|
||||
|
||||
VOCAB_RESOURCE_SCI1_MAIN_VOCAB = 900,
|
||||
VOCAB_RESOURCE_SCI1_PARSE_TREE_BRANCHES = 901,
|
||||
VOCAB_RESOURCE_SCI1_SUFFIX_VOCAB = 902,
|
||||
|
||||
VOCAB_RESOURCE_ALT_INPUTS = 913
|
||||
};
|
||||
|
||||
|
||||
enum {
|
||||
VOCAB_CLASS_PREPOSITION = 0x01,
|
||||
VOCAB_CLASS_ARTICLE = 0x02,
|
||||
VOCAB_CLASS_ADJECTIVE = 0x04,
|
||||
VOCAB_CLASS_PRONOUN = 0x08,
|
||||
VOCAB_CLASS_NOUN = 0x10,
|
||||
VOCAB_CLASS_INDICATIVE_VERB = 0x20,
|
||||
VOCAB_CLASS_ADVERB = 0x40,
|
||||
VOCAB_CLASS_IMPERATIVE_VERB = 0x80
|
||||
};
|
||||
|
||||
enum {
|
||||
kParseEndOfInput = 0,
|
||||
kParseOpeningParenthesis = 1,
|
||||
kParseClosingParenthesis = 2,
|
||||
kParseNil = 3,
|
||||
kParseNumber = 4
|
||||
};
|
||||
|
||||
#define VOCAB_MAX_WORDLENGTH 256
|
||||
|
||||
/* Anywords are ignored by the parser */
|
||||
#define VOCAB_CLASS_ANYWORD 0xff
|
||||
|
||||
/* This word class is used for numbers */
|
||||
#define VOCAB_MAGIC_NUMBER_GROUP 0xffd /* 0xffe ? */
|
||||
#define VOCAB_MAGIC_NOTHING_GROUP 0xffe
|
||||
|
||||
/* Number of nodes for each ParseTreeNode structure */
|
||||
#define VOCAB_TREE_NODES 500
|
||||
|
||||
#define VOCAB_TREE_NODE_LAST_WORD_STORAGE 0x140
|
||||
#define VOCAB_TREE_NODE_COMPARE_TYPE 0x146
|
||||
#define VOCAB_TREE_NODE_COMPARE_GROUP 0x14d
|
||||
#define VOCAB_TREE_NODE_FORCE_STORAGE 0x154
|
||||
|
||||
#define SAID_COMMA 0xf0
|
||||
#define SAID_AMP 0xf1
|
||||
#define SAID_SLASH 0xf2
|
||||
#define SAID_PARENO 0xf3
|
||||
#define SAID_PARENC 0xf4
|
||||
#define SAID_BRACKO 0xf5
|
||||
#define SAID_BRACKC 0xf6
|
||||
#define SAID_HASH 0xf7
|
||||
#define SAID_LT 0xf8
|
||||
#define SAID_GT 0xf9
|
||||
#define SAID_TERM 0xff
|
||||
|
||||
#define SAID_FIRST SAID_COMMA
|
||||
|
||||
/* There was no 'last matching word': */
|
||||
#define SAID_FULL_MATCH 0xffff
|
||||
#define SAID_NO_MATCH 0xfffe
|
||||
#define SAID_PARTIAL_MATCH 0xfffd
|
||||
|
||||
#define SAID_LONG(x) ((x) << 8)
|
||||
|
||||
struct ResultWord {
|
||||
int _class; /**< Word class */
|
||||
int _group; /**< Word group */
|
||||
};
|
||||
|
||||
typedef Common::List<ResultWord> ResultWordList;
|
||||
typedef Common::List<ResultWordList> ResultWordListList;
|
||||
|
||||
typedef Common::HashMap<Common::String, ResultWordList, Common::CaseSensitiveString_Hash, Common::CaseSensitiveString_EqualTo> WordMap;
|
||||
|
||||
|
||||
struct ParseRuleList;
|
||||
|
||||
struct suffix_t {
|
||||
|
||||
int class_mask; /**< the word class this suffix applies to */
|
||||
int result_class; /**< the word class a word is morphed to if it doesn't fail this check */
|
||||
|
||||
int alt_suffix_length; /**< String length of the suffix */
|
||||
int word_suffix_length; /**< String length of the other suffix */
|
||||
|
||||
const char *alt_suffix; /**< The alternative suffix */
|
||||
const char *word_suffix; /**< The suffix as used in the word vocabulary */
|
||||
|
||||
};
|
||||
|
||||
typedef Common::List<suffix_t> SuffixList;
|
||||
|
||||
|
||||
struct synonym_t {
|
||||
uint16 replaceant; /**< The word group to replace */
|
||||
uint16 replacement; /**< The replacement word group for this one */
|
||||
};
|
||||
|
||||
typedef Common::Array<synonym_t> SynonymList;
|
||||
|
||||
|
||||
struct AltInput {
|
||||
const char *_input;
|
||||
const char *_replacement;
|
||||
uint32 _inputLength;
|
||||
uint32 _replacementLength;
|
||||
bool _prefix;
|
||||
};
|
||||
|
||||
|
||||
struct parse_tree_branch_t {
|
||||
int id;
|
||||
int data[10];
|
||||
};
|
||||
|
||||
enum ParseTypes {
|
||||
kParseTreeWordNode = 4,
|
||||
kParseTreeLeafNode = 5,
|
||||
kParseTreeBranchNode = 6
|
||||
};
|
||||
|
||||
struct ParseTreeNode {
|
||||
ParseTypes type; /**< leaf or branch */
|
||||
int value; /**< For leaves */
|
||||
ParseTreeNode* left; /**< Left child, for branches */
|
||||
ParseTreeNode* right; /**< Right child, for branches (and word leaves) */
|
||||
};
|
||||
|
||||
enum VocabularyVersions {
|
||||
kVocabularySCI0 = 0,
|
||||
kVocabularySCI1 = 1
|
||||
};
|
||||
|
||||
class Vocabulary {
|
||||
public:
|
||||
Vocabulary(ResourceManager *resMan, bool foreign);
|
||||
~Vocabulary();
|
||||
|
||||
// reset parser status
|
||||
void reset();
|
||||
|
||||
/**
|
||||
* Gets any word from the specified group. For debugging only.
|
||||
* @param group Group number
|
||||
*/
|
||||
const char *getAnyWordFromGroup(int group);
|
||||
|
||||
|
||||
/**
|
||||
* Looks up a single word in the words and suffixes list.
|
||||
* @param retval the list of matches
|
||||
* @param word pointer to the word to look up
|
||||
* @param word_len length of the word to look up
|
||||
*/
|
||||
void lookupWord(ResultWordList &retval, const char *word, int word_len);
|
||||
|
||||
/**
|
||||
* Looks up a single word in the words list, taking into account suffixes, and updating parent_retval if a matching prefix is found
|
||||
* Note: there is no equivalent in Sierra SCI, added to support specific languages translations
|
||||
* For other languages, it does nothing
|
||||
* @param parent_retval parent's function list of matches
|
||||
* @param retval the list of matches
|
||||
* @param word pointer to the word to look up
|
||||
* @param word_len length of the word to look up
|
||||
*/
|
||||
void lookupWordPrefix(ResultWordListList &parent_retval, ResultWordList &retval, const char *word, int word_len);
|
||||
|
||||
/**
|
||||
* Helper function for lookupWordPrefix, checking specific prefix for match
|
||||
* Intended for nouns and prepositions, and the prefix has meaning as another word
|
||||
* @param parent_retval lookupWordPrefix's parent's function list of matches
|
||||
* @param retval lookupWordPrefix's list of matches
|
||||
* @param word pointer to the word to look up
|
||||
* @param word_len length of the word to look up
|
||||
* @param prefix the prefix to look for in the word
|
||||
* @param meaning the meaning of that prefix
|
||||
* @return true on prefix match, false on prefix not matching
|
||||
*/
|
||||
bool lookupSpecificPrefixWithMeaning(ResultWordListList &parent_retval, ResultWordList &retval, const char *word, int word_len, unsigned char prefix, const char *meaning);
|
||||
|
||||
/**
|
||||
* Helper function for lookupWordPrefix, checking specific prefix for match
|
||||
* Intended for verbs, and the prefix doesn't have any meaning
|
||||
* @param parent_retval lookupWordPrefix's parent's function list of matches
|
||||
* @param retval lookupWordPrefix's list of matches
|
||||
* @param word pointer to the word to look up
|
||||
* @param word_len length of the word to look up
|
||||
* @param prefix the prefix to look for in the word
|
||||
* @return true on prefix match, false on prefix not matching
|
||||
*/
|
||||
bool lookupVerbPrefix(ResultWordListList &parent_retval, ResultWordList &retval, Common::String word, int word_len, Common::String prefix);
|
||||
|
||||
/**
|
||||
* Tokenizes a string and compiles it into word_ts.
|
||||
* @param[in] retval A list of words which will be set to the result
|
||||
* @param[out] sentence The sentence to examine
|
||||
* @param[out] error Points to a malloc'd copy of the offending text or to NULL on error
|
||||
* @return true on success, false on failure
|
||||
*
|
||||
* On error, false is returned. If *error is NULL, the sentence did not
|
||||
* contain any useful words; if not, *error points to a malloc'd copy of
|
||||
* the offending word. The returned list may contain anywords.
|
||||
*/
|
||||
bool tokenizeString(ResultWordListList &retval, const char *sentence, char **error);
|
||||
|
||||
/**
|
||||
* Builds a parse tree from a list of words, using a set of Greibach Normal
|
||||
* Form rules.
|
||||
* @param words The words to build the tree from
|
||||
* @param verbose Set to true for debugging
|
||||
* @return 0 on success, 1 if the tree couldn't be built in VOCAB_TREE_NODES
|
||||
* nodes or if the sentence structure in 'words' is not part of the
|
||||
* language described by the grammar passed in 'rules'.
|
||||
*/
|
||||
int parseGNF(const ResultWordListList &words, bool verbose = false);
|
||||
|
||||
/**
|
||||
* Find and store reference for future pronouns
|
||||
*/
|
||||
bool storePronounReference();
|
||||
|
||||
/**
|
||||
* Replace pronouns by stored reference
|
||||
*/
|
||||
void replacePronouns(ResultWordListList &words);
|
||||
|
||||
/**
|
||||
* Constructs the Greibach Normal Form of the grammar supplied in 'branches'.
|
||||
* @param verbose Set to true for debugging. If true, the list is
|
||||
* freed before the function ends
|
||||
* @return Pointer to a list of singly linked GNF rules describing the same
|
||||
* language that was described by 'branches'
|
||||
*
|
||||
* The original SCI rules are in almost-CNF (Chomsky Normal Form). Note that
|
||||
* branch[0] is used only for a few magical incantations, as it is treated
|
||||
* specially by the SCI parser.
|
||||
*/
|
||||
ParseRuleList *buildGNF(bool verbose = false);
|
||||
|
||||
/**
|
||||
* Deciphers a said block and dumps its content via debugN.
|
||||
* For debugging only.
|
||||
* @param pos pointer to the data to dump
|
||||
*/
|
||||
void debugDecipherSaidBlock(const SciSpan<const byte> &data);
|
||||
|
||||
/**
|
||||
* Prints the parser suffixes to the debug console.
|
||||
*/
|
||||
void printSuffixes() const;
|
||||
|
||||
/**
|
||||
* Prints the parser words to the debug console.
|
||||
*/
|
||||
void printParserWords() const;
|
||||
|
||||
uint getParserBranchesSize() const { return _parserBranches.size(); }
|
||||
const parse_tree_branch_t &getParseTreeBranch(int number) const { return _parserBranches[number]; }
|
||||
|
||||
/**
|
||||
* Adds a new synonym to the list
|
||||
*/
|
||||
void addSynonym(synonym_t syn) { _synonyms.push_back(syn); }
|
||||
|
||||
/**
|
||||
* Clears the list of synonyms
|
||||
*/
|
||||
void clearSynonyms() { _synonyms.clear(); }
|
||||
|
||||
/**
|
||||
* Synonymizes a token list
|
||||
* Parameters: (ResultWordListList &) words: The word list to synonymize
|
||||
*/
|
||||
void synonymizeTokens(ResultWordListList &words);
|
||||
|
||||
void printParserNodes(int num);
|
||||
|
||||
void dumpParseTree();
|
||||
|
||||
int parseNodes(int *i, int *pos, int type, int nr, int argc, const char **argv);
|
||||
|
||||
/**
|
||||
* Check text input against alternative inputs.
|
||||
* @param text The text to process. It will be modified in-place
|
||||
* @param cursorPos The cursor position
|
||||
* @return true if anything changed
|
||||
*/
|
||||
bool checkAltInput(Common::String& text, uint16& cursorPos);
|
||||
|
||||
/**
|
||||
* Save/load vocabulary data
|
||||
*/
|
||||
void saveLoadWithSerializer(Common::Serializer &ser);
|
||||
|
||||
private:
|
||||
/**
|
||||
* Loads all words from the main vocabulary.
|
||||
* @return true on success, false on failure
|
||||
*/
|
||||
bool loadParserWords();
|
||||
|
||||
/**
|
||||
* Loads additional translated words from special format vocabulary.
|
||||
* @return true on success, false on failure
|
||||
*/
|
||||
void loadTranslatedWords();
|
||||
|
||||
/**
|
||||
* Loads all suffixes from the suffix vocabulary.
|
||||
* @return true on success, false on failure
|
||||
*/
|
||||
bool loadSuffixes();
|
||||
|
||||
/**
|
||||
* Frees all suffixes in the given list.
|
||||
* @param suffixes: The suffixes to free
|
||||
*/
|
||||
void freeSuffixes();
|
||||
|
||||
/**
|
||||
* Retrieves all grammar rules from the resource data.
|
||||
* @param branches The rules are stored into this Array
|
||||
* @return true on success, false on error
|
||||
*/
|
||||
bool loadBranches();
|
||||
|
||||
/**
|
||||
* Frees a parser rule list as returned by vocab_build_gnf().
|
||||
* @param rule_list the rule list to free
|
||||
*/
|
||||
void freeRuleList(ParseRuleList *rule_list);
|
||||
|
||||
|
||||
/**
|
||||
* Retrieves all alternative input combinations from vocab 913.
|
||||
* @return true on success, false on error
|
||||
*/
|
||||
bool loadAltInputs();
|
||||
|
||||
/**
|
||||
* Frees all alternative input combinations.
|
||||
*/
|
||||
void freeAltInputs();
|
||||
|
||||
ResourceManager *_resMan;
|
||||
VocabularyVersions _vocabVersion;
|
||||
|
||||
bool _foreign;
|
||||
uint16 _resourceIdWords;
|
||||
uint16 _resourceIdSuffixes;
|
||||
uint16 _resourceIdBranches;
|
||||
|
||||
// Parser-related lists
|
||||
SuffixList _parserSuffixes;
|
||||
ParseRuleList *_parserRules; /**< GNF rules used in the parser algorithm */
|
||||
Common::Array<parse_tree_branch_t> _parserBranches;
|
||||
WordMap _parserWords;
|
||||
SynonymList _synonyms; /**< The list of synonyms */
|
||||
Common::Array<Common::List<AltInput> > _altInputs;
|
||||
|
||||
struct PrefixMeaning {
|
||||
unsigned char prefix;
|
||||
const char *meaning;
|
||||
};
|
||||
|
||||
int _pronounReference;
|
||||
|
||||
public:
|
||||
// Accessed by said()
|
||||
ParseTreeNode _parserNodes[VOCAB_TREE_NODES]; /**< The parse tree */
|
||||
|
||||
// Parser data:
|
||||
reg_t parser_event; /**< The event passed to Parse() and later used by Said() */
|
||||
bool parserIsValid; /**< If something has been correctly parsed */
|
||||
};
|
||||
|
||||
/**
|
||||
* Prints a parse tree.
|
||||
* @param tree_name Name of the tree to dump (free-form)
|
||||
* @param nodes The nodes containing the parse tree
|
||||
*/
|
||||
void vocab_dump_parse_tree(const char *tree_name, ParseTreeNode *nodes);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Builds a parse tree from a spec and compares it to a parse tree.
|
||||
* @param spec Pointer to the spec to build
|
||||
* @param verbose Whether to display the parse tree after building it
|
||||
* @return 1 on a match, 0 otherwise
|
||||
*/
|
||||
int said(const byte *spec, bool verbose);
|
||||
|
||||
} // End of namespace Sci
|
||||
|
||||
#endif // SCI_PARSER_VOCABULARY_H
|
||||
Reference in New Issue
Block a user