Files
2026-02-02 04:50:13 +01:00

474 lines
20 KiB
C++

/* ScummVM - Graphic Adventure Engine
*
* ScummVM is the legal property of its developers, whose names
* are too numerous to list here. Please refer to the COPYRIGHT
* file distributed with this source distribution.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
#ifndef GLK_TADS_TADS2_TOKENIZER
#define GLK_TADS_TADS2_TOKENIZER
#include "glk/tads/tads2/lib.h"
#include "glk/tads/tads2/error_handling.h"
#include "glk/tads/tads2/line_source.h"
#include "glk/tads/tads2/line_source_file.h"
#include "glk/tads/tads2/memory_cache.h"
namespace Glk {
namespace TADS {
namespace TADS2 {
/* number of entries in hash table - must be power of 2 */
#define TOKHASHSIZE 256
/* symbol types */
#define TOKSTUNK 0 /* unknown symbol, not yet defined */
#define TOKSTFUNC 1 /* function; value is object number */
#define TOKSTOBJ 2 /* object; value is object number */
#define TOKSTPROP 3 /* property; value is property number */
#define TOKSTLOCAL 4 /* a local variable or formal parameter */
#define TOKSTSELF 5 /* the pseudo-object "self" */
#define TOKSTBIFN 6 /* a built-in function */
#define TOKSTFWDOBJ 7 /* forward-referenced object */
#define TOKSTFWDFN 8 /* forward-referenced object */
#define TOKSTINHERIT 9 /* the pseudo-object "inherited" */
#define TOKSTEXTERN 10 /* an external function */
#define TOKSTKW 11 /* keyword; value is token number */
#define TOKSTLABEL 12 /* statement label */
#define TOKSTARGC 13 /* 'argcount' pseudo-variable */
#define TOKSTPROPSPEC 14 /* speculative evaluation property */
/* token types */
#define TOKTEOF 1
/* binary operators - keep these together (see prsbopl[] in prs.c) */
#define TOKTPLUS 2
#define TOKTMINUS 3
#define TOKTDIV 4
#define TOKTTIMES 5
#define TOKTNOT 6 /* ! or "not" */
#define TOKTEQ 7
#define TOKTNE 8
#define TOKTGT 9
#define TOKTGE 10
#define TOKTLT 11
#define TOKTLE 12
#define TOKTMOD 13
#define TOKTBAND 14
#define TOKTBOR 15
#define TOKTXOR 16
#define TOKTSHL 17
#define TOKTSHR 18
#define TOKTTILDE 30
/*
* special 'dot' replacement for speculative evaluation mode -- this is
* strictly for marking parse tree nodes, and has the same meaning in a
* parse tree node as a regular TOKTDOT, but generates code that can't
* call methods
*/
#define TOKTDOTSPEC 31
/* special node marker for explicit superclass inheritance nodes */
#define TOKTEXPINH 32
#define TOKTLPAR 50 /* ( */
#define TOKTRPAR 51 /* ) */
#define TOKTCOLON 52
#define TOKTDSTRING 53 /* string in double quotes */
#define TOKTSSTRING 54 /* string in single quotes */
#define TOKTNUMBER 55
#define TOKTSYMBOL 56
#define TOKTINVALID 57 /* invalid lexical token */
#define TOKTLBRACK 58 /* [ */
#define TOKTRBRACK 59 /* ] */
#define TOKTLBRACE 60 /* { */
#define TOKTRBRACE 61 /* } */
#define TOKTSEM 62 /* ; */
#define TOKTCOMMA 63
#define TOKTDOT 64 /* . */
#define TOKTOR 65 /* | or "if" */
#define TOKTAND 66 /* & or "and" */
#define TOKTIF 67 /* keywords */
#define TOKTELSE 68
#define TOKTWHILE 69
#define TOKTFUNCTION 70
#define TOKTRETURN 71
#define TOKTLOCAL 72
#define TOKTOBJECT 73
#define TOKTBREAK 74
#define TOKTCONTINUE 75
#define TOKTLIST 76 /* a list */
#define TOKTNIL 77
#define TOKTTRUE 78
#define TOKTPASS 79
#define TOKTCLASS 80
#define TOKTEXIT 81
#define TOKTABORT 82
#define TOKTASKDO 83
#define TOKTASKIO 84
#define TOKTPOUND 85 /* # */
#define TOKTQUESTION 86 /* ? */
#define TOKTCOMPOUND 87
#define TOKTIOSYN 88
#define TOKTDOSYN 89
#define TOKTEXTERN 90
#define TOKTFORMAT 91
#define TOKTDO 92
#define TOKTFOR 93
#define TOKTNEW 94
#define TOKTDELETE 95
/* assignment operators - keep these together */
#define TOKTINC 150 /* ++ */
#define TOKTPOSTINC 151 /* MUST BE TOKTINC + 1 */
#define TOKTDEC 152 /* -- */
#define TOKTPOSTDEC 153 /* MUST BE TOKTDEC + 1 */
#define TOKTPLEQ 154 /* += */
#define TOKTMINEQ 155 /* -= */
#define TOKTDIVEQ 156 /* /= */
#define TOKTTIMEQ 157 /* *= */
#define TOKTASSIGN 158 /* simple assignment */
#define TOKTMODEQ 159 /* %= (mod and assign) operator */
#define TOKTBANDEQ 160 /* &= */
#define TOKTBOREQ 161 /* |= */
#define TOKTXOREQ 162 /* ^= (xor and assign) */
#define TOKTSHLEQ 163 /* <<= (shift left and assign) */
#define TOKTSHREQ 164 /* >>= (shift right and assign */
#define TOKTSWITCH 200
#define TOKTCASE 201
#define TOKTDEFAULT 202
#define TOKTGOTO 203
#define TOKTELLIPSIS 204 /* ... */
#define TOKTSPECIAL 205 /* "specialWords" */
#define TOKTREPLACE 206 /* replace */
#define TOKTMODIFY 207 /* modify */
#define TOKTEQEQ 208 /* the '==' operator */
#define TOKTPOINTER 209 /* the -> operator */
/* the longest a symbol name can be */
#define TOKNAMMAX 39
/* symbol table entry */
struct toksdef {
uchar tokstyp; /* type of the symbol */
uchar tokshsh; /* hash value of symbol */
ushort toksval; /* value of the symbol (depends on type) */
ushort toksfr; /* frame offset of symbol (for debugger) */
uchar tokslen; /* length of the symbol's name */
char toksnam[TOKNAMMAX]; /* name of symbol */
};
/* symbol table entry without 'name' portion - for allocation purposes */
struct toks1def {
uchar tokstyp;
uchar tokshsh;
ushort toksval;
ushort toksfr;
uchar tokslen;
char toksnam[1];
};
/* generic symbol table object - other symbol tables are subclasses */
struct toktdef {
void (*toktfadd)(toktdef *tab, char *name, int namel, int typ,
int val, int hash); /* add symbol */
int (*toktfsea)(toktdef *tab, char *name, int namel, int hash,
toksdef *ret); /* search symbol table */
void (*toktfset)(toktdef *tab, toksdef *sym);
/* update val & typ of symbol to those in *sym */
void (*toktfeach)(toktdef *tab,
void (*fn)(void *ctx, toksdef *sym),
void *fnctx); /* call fn for each sym */
toktdef *toktnxt; /* next symbol table to be searched */
errcxdef *tokterr; /* error handling context */
};
/* maximum number of pools (TOKTSIZE bytes each) for symbols */
#define TOKPOOLMAX 128
/* pointer to a symbol in a hashed symbol table */
struct tokthpdef {
mcmon tokthpobj; /* cache manager object number of page */
uint tokthpofs; /* offset within page of this symbol */
};
/* extended symbol entry in a hashed symbol table */
struct tokshdef {
tokthpdef tokshnxt; /* pointer to next symbol in the table */
toksdef tokshsc; /* superclass - normal symbol entry */
};
/* hashing symbol table (subclass of generic symbol table) */
struct tokthdef {
toktdef tokthsc; /* generic symbol table superclass data */
mcmcxdef *tokthmem; /* memory manager context */
tokthpdef tokthhsh[TOKHASHSIZE]; /* hash table */
uint tokthpcnt; /* number of memory pools for toksdef's */
mcmon tokthpool[TOKPOOLMAX]; /* memory pools for toksdef's */
uint tokthfinal[TOKPOOLMAX]; /* actual sizes of these pools */
uchar *tokthcpool; /* current pool pointer */
ushort tokthsize; /* remaining size of top memory pool */
ushort tokthofs; /* allocation offset in top memory pool */
};
/* size of toksdef pools to allocate for hashed symbol tables */
#define TOKTHSIZE 4096
/*
* Linear cache-object-embedded symbol table. This type of symbol
* table is used for frame parameter/local variable lists. It is best
* for small tables, because it isn't broken up into hash buckets, so it
* is searched linearly. As a result, it's small enough to be embedded
* in code.
*/
struct toktldef {
toktdef toktlsc; /* generic symbol table superclass data */
uchar *toktlptr; /* base of linear symbol table */
uchar *toktlnxt; /* next free byte in table */
uint toktlcnt; /* number of objects in the table */
uint toktlsiz; /* bytes remaining in the table */
};
struct tokdef {
int toktyp; /* type of the token */
int toklen; /* length of token text, if a symbolic token */
long tokval; /* numeric value, if applicable */
ushort tokofs;
uint tokhash; /* token hash value, if a symbolic token */
char toknam[TOKNAMMAX+1]; /* text of token, if a symbolic token */
toksdef toksym; /* symbol from table matching token */
};
/* special character sequence */
#define TOKSCMAX 3 /* maximum length of a special char sequence */
struct tokscdef {
tokscdef *tokscnxt; /* next sequence with same first character */
int toksctyp; /* token type corresponding to sequence */
int toksclen; /* length of the sequence */
char tokscstr[TOKSCMAX+1]; /* the sequence itself */
};
/*
* Compare a special character sequence - for efficiency, define
* something special for the maximum length available (TOKSCMAX).
* Note that the first character will always be equal, or the
* string wouldn't even get to the point of being tested by this
* macro.
*/
#if TOKSCMAX == 3
# define toksceq(str1, str2, len1, len2) \
((len2) >= (len1) \
&& ((len1) == 1 \
|| ((str1)[1] == (str2)[1] \
&& ((len1) == 2 \
|| (str1)[2] == (str2)[2]))))
#endif /* TOKSCMAX == 3 */
#ifndef toksceq
# define toksceq(str1, str2, len) (!memcmp(str1, str2, (size_t)(len)))
#endif /* toksceq */
/* special character sequence list table entry */
struct tokldef {
int tokltyp; /* token type corresponding to sequence */
char toklstr[TOKSCMAX+1]; /* the text of the sequence */
};
/* include path structure */
struct tokpdef {
tokpdef *tokpnxt; /* next path in list */
int tokplen; /* length of directory name */
char tokpdir[1]; /* directory to search */
};
/* #define symbol structure */
struct tokdfdef {
tokdfdef *nxt; /* next symbol in the same hash chain */
char *nm; /* name of the symbol */
int len; /* length of the symbol */
int explen; /* length of the expansion */
char expan[1]; /* expansion buffer */
};
/* #define hash table information */
#define TOKDFHSHSIZ 64
#define TOKDFHSHMASK 63
/* maximum #if nesting */
#define TOKIFNEST 64
/* #if state */
#define TOKIF_IF_YES 1 /* processing a true #if/#ifdef block */
#define TOKIF_IF_NO 2 /* processing a false #if/#ifdef block */
#define TOKIF_ELSE_YES 3 /* processing a true #else part */
#define TOKIF_ELSE_NO 4 /* processing a false #else part */
/* maximum macro expansion nesting */
#define TOKMACNEST 20
/* lexical analysis context */
struct tokcxdef {
errcxdef *tokcxerr; /* error handling context */
mcmcxdef *tokcxmem; /* cache manager context */
struct dbgcxdef *tokcxdbg; /* debugger context */
lindef *tokcxlin; /* line source */
tokpdef *tokcxinc; /* head of include path list */
toktdef *tokcxstab; /* current head of symbol table chain */
void *tokcxscx; /* context for string storage callback functions */
ushort (*tokcxsst)(void *ctx);
/* start storing a string; return offset of string's storage */
void (*tokcxsad)(void *ctx, const char *str, ushort len);
/* add characters to a string */
void (*tokcxsend)(void *ctx); /* finish storing string */
const char *tokcxmsav[TOKMACNEST]; /* saved positions for macro expansion */
ushort tokcxmsvl[TOKMACNEST]; /* saved lengths for macro expansion */
int tokcxmlvl; /* macro nesting level */
int tokcxflg; /* flags */
# define TOKCXFINMAC 0x01 /* doing <<expr>> macro expansion */
# define TOKCXCASEFOLD 0x02 /* fold upper and lower case */
# define TOKCXFCMODE 0x04 /* parse using C operators */
# define TOKCXF_EMBED_PAREN_PRE 0x08 /* embedded expr - did '(' */
# define TOKCXF_EMBED_PAREN_AFT 0x10 /* embedded expr - must do ')' */
# define TOKCXFLIN2 0x20 /* new-style line records */
tokdef tokcxcur; /* current token */
char *tokcxbuf; /* buffer for long lines */
ushort tokcxbsz; /* size of long line buffer */
const char *tokcxptr; /* pointer into line source */
ushort tokcxlen; /* length of text in buffer */
uchar tokcxinx[256]; /* special character indices */
tokdfdef *tokcxdf[TOKDFHSHSIZ]; /* hash table for #define symbols */
int tokcxifcnt; /* number of #endif's we expect to find */
char tokcxif[TOKIFNEST]; /* #if state for each nesting level */
int tokcxifcur; /* current #if state, obeying nesting */
linfdef *tokcxhdr; /* list of previously included headers */
tokscdef *tokcxsc[1]; /* special character table */
};
/* allocate and initialize a lexical analysis context */
tokcxdef *tokcxini(errcxdef *errctx, mcmcxdef *mctx, tokldef *sctab);
/* add an include path to a token handling context */
void tokaddinc(tokcxdef *ctx, char *path, int pathlen);
/* compute the hash value of a string */
uint tokhsh(char *nam);
/*
* Fold case of a token if we're in case-insensitive mode. This should
* be called any time a token is constructed artificially; it need not
* be used the token is read through the tokenizer, because the
* tokenizer will always adjust a token as needed before returning it.
*/
void tok_case_fold(tokcxdef *ctx, tokdef *tok);
/* initialize a hashed symbol table */
void tokthini(errcxdef *errctx, mcmcxdef *memctx, toktdef *toktab1);
/* add a symbol to a hashed symbol table */
void tokthadd(toktdef *toktab, char *name, int namel,
int typ, int val, int hash);
/* update a symbol in a hashed symbol table */
void tokthset(toktdef *toktab, toksdef *sym);
/* search a hashed symbol table for a symbol */
int tokthsea(toktdef *tab, char *name, int namel, int hash,
toksdef *ret);
/* call a function for each symbol in a hashed symbol table */
void toktheach(toktdef *tab, void (*cb)(void *ctx, toksdef *sym),
void *ctx);
/* find a symbol given type and value */
int tokthfind(toktdef *tab, int typ, uint val, toksdef *sym);
/* initialize a linear symbol table */
void toktlini(errcxdef *errctx, toktldef *toktab,
uchar *mem, uint siz);
/* add a symbol to a linear symbol table */
void toktladd(toktdef *toktab, char *name, int namel,
int typ, int val, int hash);
/* search a linear symbol table */
int toktlsea(toktdef *tab, char *name, int namel, int hash,
toksdef *ret);
/* update a symbol in a linear symbol table */
void toktlset(toktdef *toktab, toksdef *sym);
/* call a function for each symbol in a local symbol table */
void toktleach(toktdef *tab, void (*cb)(void *ctx, toksdef *sym),
void *ctx);
/* delete all symbols from a linear table */
void toktldel(toktldef *tab);
/* get next token, removing it from input stream */
int toknext(tokcxdef *ctx);
/* general function to get/peek at next token */
int tokget1(tokcxdef *ctx, tokdef *tok, int consume);
/* add a symbol to the #define symbol table */
void tok_add_define(tokcxdef *ctx, const char *sym, int len,
const char *expan, int explen);
/*
* add a symbol to the #define symbol table, folding case if we're
* operating in case-insensitive mode
*/
void tok_add_define_cvtcase(tokcxdef *ctx, const char *sym, int len,
const char *expan, int explen);
/* add a symbol to the #define symbol table as a number */
void tok_add_define_num_cvtcase(tokcxdef *ctx, const char *sym, int len, int num);
/* undefine a #define symbol */
void tok_del_define(tokcxdef *ctx, char *sym, int len);
/* read/write preprocessor symbols from/to a file */
void tok_read_defines(tokcxdef *ctx, osfildef *fp, errcxdef *ec);
/* write preprocessor state to a file */
void tok_write_defines(tokcxdef *ctx, osfildef *fp, errcxdef *ec);
/* determine if a char is a valid non-initial character in a symbol name */
#define TOKISSYM(c) \
(Common::isAlpha((uchar)(c)) || Common::isDigit((uchar)(c)) || (c)=='_' || (c)=='$')
/* numeric conversion and checking macros */
#define TOKISHEX(c) \
(Common::isDigit((uchar)(c))||((c)>='a'&&(c)<='f')||((c)>='A'&&(c)<='F'))
#define TOKISOCT(c) \
(Common::isDigit((uchar)(c))&&!((c)=='8'||(c)=='9'))
#define TOKHEX2INT(c) \
(Common::isDigit((uchar)c)?(c)-'0':((c)>='a'?(c)-'a'+10:(c)-'A'+10))
#define TOKOCT2INT(c) ((c)-'0')
#define TOKDEC2INT(c) ((c)-'0')
} // End of namespace TADS2
} // End of namespace TADS
} // End of namespace Glk
#endif