From b49d8ebefe9b10c53a6a09ad564e22111b7b25c6 Mon Sep 17 00:00:00 2001 From: Stef Walter Date: Sat, 20 Sep 2003 07:12:49 +0000 Subject: Initial Import --- lib/compile.c | 2337 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 2337 insertions(+) create mode 100644 lib/compile.c (limited to 'lib/compile.c') diff --git a/lib/compile.c b/lib/compile.c new file mode 100644 index 0000000..e92a2f6 --- /dev/null +++ b/lib/compile.c @@ -0,0 +1,2337 @@ +/* + * AUTHOR + * N. Nielsen + * + * LICENSE + * This software is in the public domain. + * + * The software is provided "as is", without warranty of any kind, + * express or implied, including but not limited to the warranties + * of merchantability, fitness for a particular purpose, and + * noninfringement. In no event shall the author(s) be liable for any + * claim, damages, or other liability, whether in an action of + * contract, tort, or otherwise, arising from, out of, or in connection + * with the software or the use or other dealings in the software. + * + * SUPPORT + * Send bug reports to: + */ + +/* ---------------------------------------------------------------------- +// Recipe Compiler +// 2000-2002 Copyright, Nate Nielsen +*/ + + +#include +#include "common/usuals.h" +#include "common/compat.h" +#include "lib/rlib.h" +#include "priv.h" +#include "execute.h" +#include "ops.h" + +const byte kEncUTF8[] = { 0xEF, 0xBB, 0xBF }; +const byte kEncUCS2_L[] = { 0xFF, 0xFE }; +const byte kEncUCS2_R[] = { 0xFE, 0xFF }; +const byte kEncUCS4_L[] = { 0xFF, 0xFE, 0x00, 0x00 }; +const byte kEncUCS4_R[] = { 0x00, 0x00, 0xFE, 0xFF }; + +/* Flags Syntax: ----------------------------------------------------------------- + */ + +/* All flags for available to statements */ +typedef enum _syn_flags +{ + f_not = 0x0001, /* match: Reverse effect */ + f_once = 0x0002, /* match: Only execute once */ + f_case = 0x0004, /* options: Case sensitive */ + f_line = 0x0010, /* options: Limit to line matches */ + f_find = 0x0020, /* match: match but don't move limits */ + f_tag = 0x0040, /* tag: use special tag matching */ + + /* Special cases */ + f_num = 0x0400 /* represents a number */ +} +syn_flags; + + +/* Listing of all flags and their text representations */ +typedef struct _flagmap +{ + syn_flags flag; + const char* text; +} +flagmap; + +flagmap kAllFlags[] = { + { f_not, "not" }, + { f_once, "once" }, + { f_case, "case" }, + { f_line, "line" }, + { f_find, "find" }, + { f_tag, "tag" }, + + /* Special cases */ + { f_num, "" } +}; + + +/* Statement Syntax: -------------------------------------------------------------- + */ + +typedef enum _syn_statements +{ + s_none, + s_function, + s_loop, + s_once, + s_options, + s_call, + s_return, + s_end, + s_stop, + s_match, + s_replace, + s_lock, + s_setvar, + s_addvar, + s_clrvar, + s_message, + s_else, + s_opbrace, + s_clbrace, + s_implied +} +syn_statements; + +typedef struct _syntaxmap +{ + syn_statements syntax; /* id */ + const char* text; /* text representation */ + uint flags; /* which flags are valid */ + uint args; /* Does it have arguments (either a name or a data block) */ + uint context; /* In which context valid */ +} +syntaxmap; + +/* Context values */ +#define SYNTAX_INROOT 0x00000001 /* Only allowed in the root script */ +#define SYNTAX_INBRACE 0x00000004 /* Only allowed inside braces */ +#define SYNTAX_INFUNCTION 0x00000008 /* Only allowed in a function */ +#define SYNTAX_BLOCK 0x00000010 /* The statement is start of a block */ +#define SYNTAX_IMPLIED 0x00000020 /* We're in an implied block */ +#define SYNTAX_SPECIAL 0x00000100 /* Syntax not determined by parser */ +#define SYNTAX_ANYWHERE (SYNTAX_INROOT | SYNTAX_INBRACE | SYNTAX_INFUNCTION) + +/* Argument values */ +#define ARGUMENT_NAME 0x00000001 +#define ARGUMENT_DATA 0x00000002 + +syntaxmap kAllStatements[] = { + { s_none, "", 0, 0, SYNTAX_SPECIAL }, + { s_function, "function", 0, ARGUMENT_NAME, SYNTAX_INROOT }, + { s_loop, "loop", 0, 0, SYNTAX_ANYWHERE | SYNTAX_BLOCK }, + { s_once, "once", 0, 0, SYNTAX_ANYWHERE | SYNTAX_BLOCK }, + { s_options, "options", f_case | f_line, 0, SYNTAX_ANYWHERE }, + { s_call, "call", 0, ARGUMENT_NAME, SYNTAX_ANYWHERE }, + { s_return, "return", f_num, 0, SYNTAX_INFUNCTION }, + { s_end, "end", 0, 0, SYNTAX_ANYWHERE }, + { s_stop, "stop", 0, ARGUMENT_DATA, SYNTAX_ANYWHERE }, + { s_match, "match", f_not|f_once|f_num|f_find|f_tag,ARGUMENT_DATA, SYNTAX_ANYWHERE | SYNTAX_BLOCK }, + { s_replace, "replace", 0, ARGUMENT_DATA, SYNTAX_ANYWHERE }, + { s_lock, "lock", 0, 0, SYNTAX_ANYWHERE }, + { s_setvar, "set", 0, ARGUMENT_DATA | ARGUMENT_NAME, SYNTAX_ANYWHERE }, + { s_addvar, "add", 0, ARGUMENT_DATA | ARGUMENT_NAME, SYNTAX_ANYWHERE }, + { s_clrvar, "clr", 0, ARGUMENT_NAME, SYNTAX_ANYWHERE }, + { s_message, "message", 0, ARGUMENT_DATA, SYNTAX_ANYWHERE }, + { s_else, "else", 0, 0, SYNTAX_ANYWHERE | SYNTAX_BLOCK }, + + /* special cases */ + { s_opbrace, "{", 0, 0, SYNTAX_SPECIAL }, + { s_clbrace, "}", 0, 0, SYNTAX_SPECIAL }, + { s_implied, "", 0, 0, SYNTAX_SPECIAL } +}; + + + + +/* Syntax Constants: ------------------------------------------------------------- + */ + +const char* kValidNum = "0123465798"; +const char* kValidDelim = "\"~`!@#$%^&*[]|'<>./?+=-;:"; +const char* kValidBrace = "{}"; +const char kFlagsStart = '('; +const char kFlagsEnd = ')'; +const char kFlagDelim = ','; +const char kTagDelim = '='; +const char kEscapeChar = '\\'; + +/* Maximum size of an identifier */ +#define kMaxIdentifier 40 + + +#define INVALID_PTR 0xFFFFFFFF + + + +/* ---------------------------------------------------------------------- +// CODE and COMPILE FUNCTIONALITY +*/ + +const size_t OPS_BUFFER_SIZE = 0x1000; + + +/* codestack: We use one of these for each level of braces. Helps +// us maintain context. */ + +typedef struct _codestack +{ + uint curContext; /* Current parse context */ + uint curOptions; /* Current options in use */ + uint numStatements; /* Number of statements in this block */ + + size_t insPos; /* The position to insert code */ + size_t endPos; /* End of code owned by current codestack */ + + struct _codestack* pPrev; /* previous stack (if in list) */ +} +codestack; + +/* Add a new post op buffer and stash away current */ +#define PUSH_CODESTACK(c) \ +do { \ + (c)->code = pushCodeStack((c)->code); \ + if(!(c)->code) RETURN(R_NOMEM); \ +} while(0) + +/* Dump current post op buffer and get previous */ +#define POP_CODESTACK(c) \ +do { \ + commitCodeStack((c)->code); \ + (c)->code = popCodeStack((c)->code); \ +} while (0) \ + + +/* Initialize a new post ops buffer and hook into previous */ +static codestack* pushCodeStack(codestack* prev) +{ + codestack* code = (codestack*)malloc(sizeof(codestack)); + if(!code) + return NULL; + + memset(code, 0, sizeof(codestack)); + + if(prev) + { + /* Things to carry over from previous */ + code->insPos = code->endPos = prev->insPos; + code->curOptions = prev->curOptions; + code->curContext = prev->curContext; + } + else + { + code->curContext = SYNTAX_INROOT; + } + + /* Init the context stuff */ + + + code->pPrev = prev; + return code; +} + +/* Free a post op buffer and return previous */ +static codestack* popCodeStack(codestack* code) +{ + + codestack* prev = code->pPrev; + + if(code->pPrev) + { + + /* The insertion position needs fixing ... */ + /* calc offset */ + size_t offset = code->insPos - prev->insPos; + prev->insPos += offset; + prev->endPos += offset; + } + + free(code); + + return prev; +} + +#define commitCodeStack(code) ((code)->insPos = (code)->endPos) + + +/* compilecontext: The main compile state. Only one used throughout +// compilation +*/ +typedef struct _compilecontext +{ + syn_statements lastStatement; /* Last statement we had */ + syn_statements nextStatement; /* Next statement we're expecting */ + + syn_statements curStatement; /* Current statement: */ + const char* curName; /* - The Name */ + size_t lenName; /* - Length of the name */ + uint curFlags; /* - Flags */ + const char* curFlagData; /* - Extra flag (f_num) */ + size_t lenFlagData; /* - Length of extra flag */ + const char* curData; /* - Data */ + size_t lenData; /* - Length of data */ + + + const char* in; /* Next location to compile in script */ + long curKey; /* a unique key (id) which gets incremented */ + /* for various uses such as jumps and calls */ + + vmop_t* beg; /* start of block of output code */ + size_t cbops; /* number of bytes of output code */ + size_t alloc; /* number of bytes allocated for output */ + + codestack* code; /* Current codestack */ + bool failed : 1; /* did the last memory allocation fail? */ +} +compilecontext; + + + +/* ------------------------------------------------------------------- +// POS OP +// +// the 'pos' op is a temporary place holder while compiling +// it takes one value parameter and it's value is the same as is +// temporarily placed in jmp, je, jne or call +// it's removed and jumps are patched in the optimization stage +*/ + +#define o_pos ((vmop_t)(0xFF)) +/* const vmop_t pos = 0xFF; */ + + + +/* ------------------------------------------------------------------- +// OPS CODING FUNCTIONALITY +*/ + +/* These macros can only be used within repCompile */ + +#define PUSH_OP(op) \ + pushValues(&ctx, true, sizeof(byte), op, 0) + +#define PUSH_OP_1(op, arg) \ + pushValues(&ctx, true, sizeof(byte), op, sizeof(arg), arg, 0) + +#define PUSH_OP_2(op, arg, arg2) \ + pushValues(&ctx, true, sizeof(byte), op, sizeof(arg), arg, sizeof(arg2), arg2, 0) + +/* Add ops in reverse */ +#define PUSH_ROP(op) \ + pushValues(&ctx, false, sizeof(byte), op, 0) + +#define PUSH_ROP_1(op, arg) \ + pushValues(&ctx, false, sizeof(byte), op, sizeof(arg), arg, 0) + +#define PUSH_ROP_2(op, arg, arg2) \ + pushValues(&ctx, false, sizeof(byte), op, sizeof(arg), arg, sizeof(arg2), arg2, 0) + + +/* Get more stack space for ops */ +static void moreOutput(compilecontext* ctx) +{ + /* Reallocate */ + ctx->beg = (byte*)reallocf(ctx->beg, ctx->alloc + OPS_BUFFER_SIZE); + ctx->alloc += OPS_BUFFER_SIZE; + + /* Set flag if failed */ + if(!ctx->beg) + ctx->failed = true; +} + +/* Allocate a specific amount from the stack */ +#define allocOutput(ctx, len) pushData(ctx, NULL, len, true) + + + + +/* Push any amount of data on ops buffer */ +static void* pushData(compilecontext* ctx, const void* data, size_t len, bool forward) +{ + void* pIns; + + if(ctx->cbops + len >= ctx->alloc) + moreOutput(ctx); + + if(ctx->failed || !len) + return NULL; + + pIns = ctx->beg + ctx->code->insPos; + + /* Make space at insertion point */ + memmove(ctx->beg + ctx->code->insPos + len, ctx->beg + ctx->code->insPos, + ctx->cbops - ctx->code->insPos); + + /* If we have data copy it in */ + if(data) + memcpy(pIns, data, len); + + /* In debug mode clear it */ +#ifdef _DEBUG + else + memset(pIns, 0xCC, len); +#endif + + ctx->cbops += len; + ctx->code->endPos += len; + + if(forward) + ctx->code->insPos += len; + + /* Only return value if no input data */ + return data ? NULL : pIns; +} + + +static void pushValues(compilecontext* ctx, bool forward, ...) +{ + va_list ap; + size_t len; + + #define VAL_BUF 20 + byte buff[VAL_BUF]; + size_t cur = 0; + + va_start(ap, forward); + while(len = va_arg(ap, size_t)) + { + if(cur + len > VAL_BUF) + { + pushData(ctx, buff, cur, forward); + cur = 0; + } + + switch(len) + { + case 1: + buff[cur] = va_arg(ap, byte); + break; + case 2: + *((unsigned short*)(buff + cur)) = va_arg(ap, unsigned short); + break; + case 4: + *((unsigned int*)(buff + cur)) = va_arg(ap, unsigned int); + break; + default: + ASSERT(false); + break; + } + + cur += len; + } + + pushData(ctx, buff, cur, forward); +} + +static int testRegexp(r_script* script, const char* regexp, short options) +{ + int erroroffset; + const char* error = NULL; + pcre* re = pcre_compile(regexp, options, &error, &erroroffset, NULL); + + if(!re) + { + if(error) + scriptSetError(script, error); + return R_REGEXP; + } + else + { + free(re); + return R_OK; + } +} + +/* ---------------------------------------------------------------------- +// Functions for pushing specific types of ops on the stack +*/ +static int pushMatch(r_script* script, compilecontext* ctx, const char* regexp) +{ + int ret; + + /* Allocate */ + size_t len = sizeof(match_op_pcre) + (sizeof(char) * strlen(regexp)); + match_op_pcre* op = (match_op_pcre*)allocOutput(ctx, len); + + /* Setup op */ + if(!op) + return R_NOMEM; + + memset(op, 0, len); + op->header.len = len; + op->header.type = kMatchPcre; + + op->options = PCRE_DOLLAR_ENDONLY; + + /* TODO: Do we need to make an option for PCRE_MULTILINE? */ + if(!(ctx->code->curOptions & f_line)) + op->options |= PCRE_DOTALL; + if(!(ctx->code->curOptions & f_case)) + op->options |= PCRE_CASELESS; + + ret = testRegexp(script, regexp, op->options); + if(ret < 0) + return ret; + + /* Copy the uncompiled regular expression onto the ops stack */ + strcpy(op->pattern, regexp); + + return R_OK; +} + +static void pushText(compilecontext* ctx, const char* string, size_t len) +{ + /* Allocate */ + text_op* op = (text_op*)allocOutput(ctx, sizeof(text_op) + (sizeof(char) * (len))); + + /* Setup op */ + if(op) + { + op->len = len; + + /* Copy the replacement string onto the ops stack */ + /* TODO: Get this ready for binary replacements */ + strncpy((char*)op->string, string, len); + op->string[len] = 0; + } +} + +static void pushVar(compilecontext* ctx, const char* name, size_t lenName) +{ + /* Allocate */ + size_t len = lenName + 1; + var_op* op = (var_op*)allocOutput(ctx, sizeof(var_op) + (sizeof(char) * len)); + + /* Setup op */ + if(op) + { + op->len = len; + + /* Copy the variable name onto the ops stack */ + /* TODO: Get this ready for binary replacements */ + memcpy(op->name, name, lenName); + op->name[lenName] = 0; + } +} + + +/* ---------------------------------------------------------------------- +// FUNCTIONS +*/ + +/* Keeps track of current functions seen */ +typedef struct funcdef +{ + char name[kMaxIdentifier + 1]; + uint key; +} +funcdef; + +typedef struct _funcdefs +{ + uint alloc; + uint cur; + funcdef defs[1]; +} +funcdefs; + +/* Add a function to the stack */ +static bool addFunction(funcdefs** ppdefs, const char* name, size_t len, uint key) +{ + /* Do allocation if necessary */ + if(!*ppdefs || (*ppdefs)->cur >= (*ppdefs)->alloc) + { + uint alloc = *ppdefs ? (*ppdefs)->alloc : 0; + uint cur = *ppdefs ? (*ppdefs)->cur : 0; + + alloc += 0x10; + + *ppdefs = (funcdefs*)reallocf(*ppdefs, sizeof(funcdefs) + (sizeof(funcdef) * alloc)); + if(!*ppdefs) return false; + + (*ppdefs)->alloc = alloc; + (*ppdefs)->cur = cur; + } + + /* Push it on the back */ + strlcpy((*ppdefs)->defs[(*ppdefs)->cur].name, name, min(kMaxIdentifier, len) + 1); + (*ppdefs)->defs[(*ppdefs)->cur].key = key; + (*ppdefs)->cur++; + + return true; +} + +/* Check if a function exists */ +static uint findFunction(funcdefs* pdefs, const char* name, size_t len) +{ + if(pdefs) + { + char funcname[kMaxIdentifier + 1]; + size_t i; + + strlcpy(funcname, name, min(len, kMaxIdentifier) + 1); + + /* Just loop through and compare names */ + for(i = 0; i < pdefs->cur; i++) + { + if(!strcasecmp(pdefs->defs[i].name, funcname)) + return pdefs->defs[i].key; + } + } + + return INVALID_PTR; +} + +/* ---------------------------------------------------------------------- +// SYNTAX FUNCTIONS +*/ + +/* Is a character escaped or not? */ +bool isEscaped(const char* str, const char* posi) +{ + /* + Checks for a backslash before + but note that backslash can be escaped to so... + */ + bool bEscaped = false; + while(posi > str && posi[-1] == kEscapeChar) + { + bEscaped = !bEscaped; + posi--; + } + + return bEscaped; +} + + +/* Split a tag match into two for later use */ +char* splitTagMatch(r_script* script, char* regexp) +{ + char* second = regexp; + while(second = strchr(second, kTagDelim)) + { + uint escs = 0; + if(!isEscaped(regexp, second)) + { + second[0] = '\0'; + return second + 1; + } + + second++; + } + + scriptSetError(script, "Couldn't find tags in match (separate with '%c').", kTagDelim); + return NULL; +} + + +/* Eat spaces and comments */ +static bool compileSpace(compilecontext* ctx) +{ + /* Eat white space and comments here */ + while(isspace(ctx->in[0]) || ctx->in[0] == '#') + { + /* Comments ... */ + if(ctx->in[0] == '#') + { + /* Eat rest off line */ + while(ctx->in[0] != '\n' && ctx->in[0] != '\0') + ctx->in++; + } + + ctx->in++; + } + + /* Return true if not end of file */ + return ctx->in[0] != '\0'; +} + +int compileEncoding(r_script* script, compilecontext* ctx) +{ + if(!memcmp(ctx->in, kEncUTF8, countof(kEncUTF8))) + { + ctx->in += countof(kEncUTF8); + } + else if(!memcmp(ctx->in, kEncUCS2_L, countof(kEncUCS2_L)) || + !memcmp(ctx->in, kEncUCS2_R, countof(kEncUCS4_R)) || + !memcmp(ctx->in, kEncUCS4_L, countof(kEncUCS4_L)) || + !memcmp(ctx->in, kEncUCS4_R, countof(kEncUCS4_R))) + { + scriptSetError(script, "unsupported unicode encoding"); + return R_SYNTAX; + } + + return R_OK; +} + +/* Compile a single statement */ +int compileStatement(r_script* script, compilecontext* ctx) +{ + /* Some MACROS */ + #define SYNTAX_ERROR(s) \ + do{ \ + scriptSetError(script, s); \ + RETURN(R_SYNTAX); \ + } while(0) + + #define SYNTAX_ERROR_1(s, a1) \ + do{ \ + scriptSetError(script, s, a1); \ + RETURN(R_SYNTAX); \ + } while(0) + + /* Jump to cleanup label instead of return */ + #define RETURN(r) \ + do { \ + retv = r; \ + goto cleanup; \ + } while (0) + + const char* end; + int retv = R_OK; + + { + int i; + + if(ctx->curStatement != s_opbrace && ctx->curStatement != s_clbrace) + ctx->lastStatement = ctx->curStatement; + + ctx->curStatement = s_none; + ctx->curName = NULL; + ctx->lenName = 0; + ctx->curFlags = 0; + ctx->curFlagData = NULL; + ctx->lenFlagData = 0; + ctx->curData = NULL; + ctx->lenData = 0; + + + do + { + /* Eat all whitespace and comments */ + compileSpace(ctx); + + /* Check for end of input */ + if(ctx->in[0] == 0) + RETURN(R_OK); + + /* Check for Statement */ + if(!strcspn(ctx->in, kValidIdentifier)) + end = ctx->in + strspn(ctx->in, kValidIdentifier); + + /* Check for braces */ + else if(!strcspn(ctx->in, kValidBrace)) + end = ctx->in + 1; /* Brace always just one character */ + + /* Anything else is bad */ + else + SYNTAX_ERROR_1("Unexpected character \'%c\'.", ctx->in[0]); + + /* Okay now determine which statement we got */ + for(i = 0; i < countof(kAllStatements); i++) + { + if(!strncasecmp(kAllStatements[i].text, ctx->in, end - ctx->in)) + { + /* Check the context */ + if(kAllStatements[i].context != SYNTAX_SPECIAL && + !(ctx->code->curContext & kAllStatements[i].context)) + SYNTAX_ERROR_1("\'%s\' not allowed here.", kAllStatements[i].text); + + ctx->curStatement = kAllStatements[i].syntax; + break; + } + } + + + /* Check that we got a statement + if not then it should be a function call */ + if(ctx->curStatement == s_none) + { + ctx->curStatement = s_call; + ctx->curName = ctx->in; + ctx->lenName = end - ctx->in; + } + + + /* Make sure it's what we were expecting */ + if(ctx->nextStatement != s_none && + ctx->nextStatement != ctx->curStatement) + { + /* If braces were required but not found, then do an impliedBlock */ + if(ctx->nextStatement == s_opbrace && + kAllStatements[ctx->lastStatement].context & SYNTAX_BLOCK) + { + ctx->nextStatement = ctx->curStatement; + ctx->curStatement = s_implied; + end = ctx->in; + } + + /* Otherwise it's an error */ + else + { + SYNTAX_ERROR_1("Expecting \'%s\' here.", kAllStatements[ctx->nextStatement].text); + } + } + + + ctx->in = end; + ctx->nextStatement = s_none; + + /* We process braces here */ + if(ctx->curStatement == s_opbrace || + ctx->curStatement == s_implied) + { + PUSH_CODESTACK(ctx); + ctx->code->curContext |= SYNTAX_INBRACE; + + if(ctx->lastStatement == s_function) + ctx->code->curContext |= SYNTAX_INFUNCTION; + + if(ctx->curStatement == s_implied) + ctx->code->curContext |= SYNTAX_IMPLIED; + + ctx->curStatement = s_none; + continue; + } + + /* See if we need to pop any codestacks for implied blocks */ + while(ctx->code->curContext & SYNTAX_IMPLIED && + ctx->code->numStatements > 0) + { + /* Only should be one statement on an implied block */ + ASSERT(ctx->code->numStatements == 1); + POP_CODESTACK(ctx); + } + + /* Closing braces here */ + if(ctx->curStatement == s_clbrace) + { + POP_CODESTACK(ctx); + ctx->curStatement = s_none; + continue; + } + + } + while(ctx->curStatement == s_none); + + + /* Okay now do name if we have one */ + if(kAllStatements[ctx->curStatement].args & ARGUMENT_NAME && !ctx->curName) + { + compileSpace(ctx); + + /* Now look for a normal or name flag */ + if(!strcspn(ctx->in, kValidIdentifier)) + { + end = ctx->in + strspn(ctx->in, kValidIdentifier); + + /* Any flags not found assume it's an identifier */ + ctx->curName = ctx->in; + ctx->lenName = end - ctx->in; + + if(ctx->lenName > kMaxIdentifier) + SYNTAX_ERROR("Maximum length for an identifier is 40 characters."); + + ctx->in = end; + } + } + + + /* Eat the next little bit of whitespace */ + compileSpace(ctx); + + /* Okay now look for flags start */ + if(ctx->in[0] == kFlagsStart) + { + ctx->in++; + + while(1) + { + compileSpace(ctx); + + + /* Check for a number flag */ + if(!strcspn(ctx->in, kValidNum)) + { + if(ctx->curFlagData) + SYNTAX_ERROR("Invalid flags."); + + /* If found then just grab and go */ + ctx->curFlags |= f_num; + ctx->curFlagData = ctx->in; + ctx->lenFlagData = strspn(ctx->in, kValidNum); + + ctx->in += ctx->lenFlagData; + } + + /* Now look for a normal flag */ + else if(!strcspn(ctx->in, kValidIdentifier)) + { + bool found = false; + end = ctx->in + strspn(ctx->in, kValidIdentifier); + + /* Okay now try and map out that flag */ + for(i = 0; i < countof(kAllFlags); i++) + { + if(!strncasecmp(kAllFlags[i].text, ctx->in, end - ctx->in)) + { + found = true; + ctx->curFlags |= kAllFlags[i].flag; + break; + } + } + + /* Any flags not found assume it's an identifier */ + if(!found) + { + if(ctx->curName != NULL) + SYNTAX_ERROR("Invalid flags."); + + ctx->curName = ctx->in; + ctx->lenName = end - ctx->in; + } + + ctx->in = end; + } + + /* End ) of flags */ + else if(ctx->in[0] == kFlagsEnd) + { + ctx->in++; + break; + } + + /* Separator , between flags */ + else if(ctx->in[0] == kFlagDelim) + ctx->in++; + + else + SYNTAX_ERROR("Expected a flag."); + } + + /* Now check the flags */ + if((kAllStatements[ctx->curStatement].flags | ctx->curFlags) + != kAllStatements[ctx->curStatement].flags) + SYNTAX_ERROR("Invalid flags for this statement."); + + } + + + /* Okay now do data processing */ + if(kAllStatements[ctx->curStatement].args & ARGUMENT_DATA) + { + const char* delim; + compileSpace(ctx); + + /* Check for a delimiter */ + if(delim = strchr(kValidDelim, *(ctx->in))) + { + ctx->in++; + end = ctx->in; + + /* Find end of data but checking for escaped delimeters */ + while(1) + { + end = strchr(end, *delim); + + if(!end) + SYNTAX_ERROR("Couldn't find end of data for this statement"); + + if(!isEscaped(ctx->in, end)) + break; + + end++; + } + + + ctx->curData = ctx->in; + ctx->lenData = (end - ctx->in); + + ctx->in = end + 1; + } + else + { + SYNTAX_ERROR("Expected data for this statement."); + } + } + + if(ctx->lenName > kMaxIdentifier) + SYNTAX_ERROR("Maximum length for an identifier is 40 characters."); + + ctx->code->numStatements++; + } + + /* done! */ + +cleanup: + return retv; +} + +static uint getLine(const char* beg, const char* cur) +{ + size_t ret = 1; + while(beg <= cur) + { + if(*beg == '\n') + ret++; + + beg++; + } + + return ret; +} + +int compilerRun(r_script* script, const char* data) +{ + + int retv = R_OK; /* used by RETURN macro */ + funcdefs* pFunctions = NULL; + char* regexp = NULL; + + /* We allocate the main instruction buffer */ + compilecontext ctx; + memset(&ctx, 0, sizeof(ctx)); + + ctx.in = data; + ctx.curKey = 0x00000100; + ctx.beg = (vmop_t*)malloc(OPS_BUFFER_SIZE); + ctx.alloc = OPS_BUFFER_SIZE; + ctx.code = pushCodeStack(NULL); + + if(!ctx.beg || !ctx.code) + RETURN(R_NOMEM); + +#ifdef _DEBUG + { + int i; + /* Do a check here! Index should be equal to */ + /* command name for each statement */ + for(i = 0; i < countof(kAllStatements); i++) + ASSERT(i == kAllStatements[i].syntax); + } +#endif + + + /* + Push a first empty pops stack + We have to have one to pop below + */ + PUSH_CODESTACK(&ctx); + ctx.code->curContext = SYNTAX_INROOT; + + /* Push our signature of 4 nop bytes */ + PUSH_OP(o_nop); + PUSH_OP(o_nop); + PUSH_OP(o_nop); + PUSH_OP(o_nop); + + /* Set up initial stack frame */ + PUSH_OP_2(o_mov, r_bp, r_sp); + /* Push the beginning of the main loop here */ + PUSH_OP_1(o_pos, ARG_MAKE_VALUE(0L)); + PUSH_OP_2(o_mov, r_ac, ARG_MAKE_VALUE(0L)); + + retv = compileEncoding(script, &ctx); + if(retv < 0) + RETURN(retv); + + while(ctx.in[0] != '\0') + { + if(ctx.cbops + 0x200 >= ctx.alloc) + moreOutput(&ctx); + + /* Here we check if we have enough memory */ + if(ctx.failed) + RETURN(R_NOMEM); + + /* Get and parse the current statement */ + retv = compileStatement(script, &ctx); + if(retv < 0) + RETURN(retv); + + if(ctx.curStatement == s_none) + continue; + + + + + /* Now we pop the commit end code from previous statement */ + commitCodeStack(ctx.code); + + /* Okay now do the rest of the statements */ + switch(ctx.curStatement) + { + + + /*////////////////////////////////////////////////////// + // function + */ + case s_function: + { + /* SYNTAX ---------------------------------------- + // + // function(name) + // { + // ..... + // } + */ + + uint key, keyJmp; + + /* Check that we got a name */ + if(!(ctx.curName && ctx.lenName)) + SYNTAX_ERROR("'function' needs a name"); + + /* We need a opening brace next */ + ctx.nextStatement = s_opbrace; + + + key = ARG_MAKE_VALUE(ctx.curKey++); + + /* Put the function name at current op pos */ + if(!addFunction(&pFunctions, ctx.curName, + ctx.lenName, key)) + RETURN(R_NOMEM); + + + + /* OPS -------------------------------------------- + // + // jmp <1> + // pos + // push bp + // mov bp, r_sp + // etc. + // ...... + // etc. + // mov fe, 1 + // mov sp, bp + // pop bp + // ret + // <1> + */ + + keyJmp = ARG_MAKE_VALUE(ctx.curKey++); + + PUSH_OP_1(o_jmp, keyJmp); + PUSH_OP_1(o_pos, key); + PUSH_OP_1(o_push, r_bp); + PUSH_OP_2(o_mov, r_bp, r_sp); + + + PUSH_ROP_1(o_pos, keyJmp); + PUSH_ROP(o_ret); + PUSH_ROP_1(o_pop, r_bp); + PUSH_ROP_2(o_mov, r_sp, r_bp); + PUSH_ROP_2(o_mov, r_fe, ARG_MAKE_VALUE(1)); + + } + break; + + + /*////////////////////////////////////////////////////// + // loop + */ + case s_loop: + { + + uint key, key2; + + /* SYNTAX ---------------------------------------- + // + // loop + // { + // ..... + // } + */ + + /* We need a opening brace next */ + ctx.nextStatement = s_opbrace; + + + + /* OPS -------------------------------------------- + // + // push ac + // <1> + // mov ac, 0 + // ..... + // pop x2 pop the previously pushed action value + // test ac test current action + // jne <2> if action + // mov x2, ac then overide previous action value + // <2> + // push x2 push back changed action value + // je <1> + // pop ac pop out the + */ + + key = ARG_MAKE_VALUE(ctx.curKey++); + key2 = ARG_MAKE_VALUE(ctx.curKey++); + + PUSH_OP_1(o_push, r_ac); + PUSH_OP_1(o_pos, key); + PUSH_OP_2(o_mov, r_ac, ARG_MAKE_VALUE(0L)); + + PUSH_ROP_1(o_pop, r_ac); + PUSH_ROP_1(o_je, key); + PUSH_ROP_1(o_push, r_x2); + PUSH_ROP_1(o_pos, key2); + PUSH_ROP_2(o_mov, r_x2, r_ac); + PUSH_ROP_1(o_jne, key2); + PUSH_ROP_1(o_test, r_ac); + PUSH_ROP_1(o_pop, r_x2); + } + break; + + + /*////////////////////////////////////////////////////// + // once + */ + case s_once: + { + uint keyOnce, key1, key5; + + /* SYNTAX ---------------------------------------- + // + // once + // { + // ..... + // } + */ + + /* We need a opening brace next */ + ctx.nextStatement = s_opbrace; + + + + /* OPS -------------------------------------------- + // + // test mem(value) Pull in the flag from memory + // jne <1> If not present then jump to containing code + // mov fe, 0 Otherwise set fail flag + // jmp <5> And skip the match + // <1> + // ..... + // + // mov mem(value), 1 + // <5> + */ + + + keyOnce = ARG_MAKE_MEMORY(ctx.curKey++); + key1 = ARG_MAKE_VALUE(ctx.curKey++); + key5 = ARG_MAKE_VALUE(ctx.curKey++); + + + /* Once code */ + PUSH_OP_1(o_test, keyOnce); + PUSH_OP_1(o_jne, key1); + PUSH_OP_2(o_mov, r_fe, ARG_MAKE_VALUE(0)); + PUSH_OP_1(o_jmp, key5); + PUSH_OP_1(o_pos, key1); + + + PUSH_ROP_1(o_pos, key5); + PUSH_ROP_2(o_mov, keyOnce, ARG_MAKE_VALUE(1)); + } + break; + + + + /*///////////////////////////////////////////////////// + // call + */ + case s_call: + { + uint key; + + /* SYNTAX ---------------------------------------- + // + // call(name) + */ + + /* Check that we got a name */ + if(!(ctx.curName && ctx.lenName)) + SYNTAX_ERROR("'call' needs a function name"); + + /* Find the function */ + key = findFunction(pFunctions, ctx.curName, + ctx.lenName); + + if(key == INVALID_PTR) + SYNTAX_ERROR("function does not exist"); + + + /* OPS -------------------------------------------- + // + // call + */ + + PUSH_OP_1(o_call, key); + } + break; + + + /*/////////////////////////////////////////////////////// + // return + */ + case s_return: + { + /* SYNTAX ---------------------------------------- + // + // return(0) + */ + + uint code = ARG_MAKE_VALUE(1); + + /* Check for the number */ + if(ctx.curFlags & f_num) + { + if(ctx.lenFlagData > 1 || (ctx.curFlagData[0] != '0' && ctx.curFlagData[0] != '1')) + SYNTAX_ERROR("Return code must be 0 or 1"); + + if(ctx.curFlagData[0] == '0') + code = ARG_MAKE_VALUE(0); + } + + /* OPS -------------------------------------------- + // + // mov fe, 0 (or 1 depending on code) + // mov sp, r_bp + // pop bp + // ret + */ + + PUSH_OP_2(o_mov, r_fe, code); + PUSH_OP_2(o_mov, r_sp, r_bp); + PUSH_OP_1(o_pop, r_bp); + PUSH_OP(o_ret); + } + break; + + + + /*////////////////////////////////////////////////////// + // end + */ + case s_end: + { + + /* SYNTAX ---------------------------------------- + // + // end + */ + + + /* OPS -------------------------------------------- + // + // stop + */ + + PUSH_OP(o_text); + pushText(&ctx, NULL, 0); + PUSH_OP_1(o_stop, ARG_MAKE_VALUE(0)); + } + break; + + + + /*////////////////////////////////////////////////////// + // end + */ + case s_stop: + { + + /* SYNTAX ---------------------------------------- + // + // stop "message" + */ + + /* OPS -------------------------------------------- + // + // text "message" + // stop + */ + + PUSH_OP(o_text); + pushText(&ctx, ctx.curData, ctx.lenData); + PUSH_OP_1(o_stop, ARG_MAKE_VALUE(1)); + } + break; + + + /*///////////////////////////////////////////////////// + // match + */ + case s_match: + { + + /* SYNTAX ---------------------------------------- + // + // match(not, once) "regexp" + */ + + uint keyOnce, keyWatermark, keyJmp1, keyJmp2, keyJmp3, + keyJmp4, keyJmp5, keyJmp6, keyJmp7, groupNum, + key1, key4, key5, key9; + + /* Get the flags */ + bool bNot = ctx.curFlags & f_not ? true : false; + bool bOnce = ctx.curFlags & f_once ? true : false; + bool bHas = ctx.curFlags & f_find ? true : false; + bool bTag = ctx.curFlags & f_tag ? true : false; + + /* Check that we got data */ + if(!ctx.curData || !ctx.lenData) + SYNTAX_ERROR("'match' needs a regular expression"); + + /* We need a opening brace next */ + ctx.nextStatement = s_opbrace; + + regexp = strndup(ctx.curData, ctx.lenData); + if(!regexp) + RETURN(R_NOMEM); + + groupNum = 0; + + if(ctx.curFlagData && ctx.lenFlagData) + { + if(ctx.lenFlagData > 1) + SYNTAX_ERROR("Group specifier must be between 0 and 9."); + + if(bHas) + SYNTAX_ERROR("Group specifier invalid with 'has' flag."); + + + /* Get the number to use */ + groupNum = ctx.curFlagData[0] - '0'; + } + + + /* OPS -------------------------------------------- + // + // push x1 Save limits + // push y1 " " + + // test mem(value) (once) Pull in the flag from memory + // jne <1> (once) If not present then jump to match code + // mov fe, 0 (once) Otherwise set fail flag + // jmp <5> (once) And skip the match + // <1> (once) + + // mov x6, mem(key_value) (watermark) Get the watermark + // cmp x1, r_x6 (watermark) If watermark higher than match area + // mov fe, r_fg (watermark) + // jne <2> (watermark) skip + // mov x6, r_x1 (watermark) Otherwise bring up watermark to match area + // <2> (watermark) + + // match x6, r_y1 Do match + // mov x4, fe + + // <5> (once) + + // jne <3> If match failed skip set below + + // mov ac, 1 Set action flag + + // cmp cg, 2 See if we have enough groups + // mov fe, r_fg "" + // jne <7> + // mov fe, 0 If not then set failed + // jmp <3> And skip to failed part + // <7> + // mov x1, r_b2 Set new limit for inside + // mov y1, e2 different registers (depending on number parameter) + + // mov x2, r_b0 (watermark) Get the start of batch + // add x2, 1 (watermark) Add one to it + // mov mem(value), x2 (watermark) Stash it away in memory + + // <3> + + // cmp x4, 1 (not) Compare success against 0 or 1 depending on not + // push fe + // jne <4> Skip if no match + + // mov mem(value), 1 (once) + + // ..... + + // <4> + // pop fe + + // pop y1 + // pop x1 + */ + + keyOnce = ARG_MAKE_MEMORY(ctx.curKey++); + keyWatermark = ARG_MAKE_STACK(ctx.curKey++); + keyJmp1 = ARG_MAKE_VALUE(ctx.curKey++); + keyJmp2 = ARG_MAKE_VALUE(ctx.curKey++); + keyJmp3 = ARG_MAKE_VALUE(ctx.curKey++); + keyJmp4 = ARG_MAKE_VALUE(ctx.curKey++); + keyJmp5 = ARG_MAKE_VALUE(ctx.curKey++); + keyJmp6 = ARG_MAKE_VALUE(ctx.curKey++); + keyJmp7 = ARG_MAKE_VALUE(ctx.curKey++); + + /* Save for later */ + PUSH_OP_1(o_push, r_x1); + PUSH_OP_1(o_push, r_y1); + + if(bOnce) + { + /* Once code */ + PUSH_OP_1(o_test, keyOnce); + PUSH_OP_1(o_jne, keyJmp1); + PUSH_OP_2(o_mov, r_fe, ARG_MAKE_VALUE(0)); + PUSH_OP_1(o_jmp, keyJmp5); + PUSH_OP_1(o_pos, keyJmp1); + } + + /* Watermark code */ + PUSH_OP_2(o_mov, r_x6, keyWatermark); + PUSH_OP_2(o_cmp, r_x1, r_x6); + PUSH_OP_2(o_mov, r_fe, r_fg); + PUSH_OP_1(o_jne, keyJmp2); + PUSH_OP_2(o_mov, r_x6, r_x1); + PUSH_OP_1(o_pos, keyJmp2); + + + /* Is it a simple match? */ + if(!bTag) + { + /* Actual match */ + PUSH_OP_2(o_match, r_x6, r_y1); + retv = pushMatch(script, &ctx, regexp); + if(retv < 0) RETURN(retv); + } + + /* Or the very complicated tag statement */ + else + { + /* Split the regular expression */ + char* second = splitTagMatch(script, regexp); + if(!second) RETURN(R_SYNTAX); + + + /* (See code docs in tag.txt file) */ + key1 = ARG_MAKE_VALUE(ctx.curKey++); + key4 = ARG_MAKE_VALUE(ctx.curKey++); + key5 = ARG_MAKE_VALUE(ctx.curKey++); + key9 = ARG_MAKE_VALUE(ctx.curKey++); + + + /* Setup */ + PUSH_OP_2(o_mov, r_x2, ARG_MAKE_VALUE(0)); + PUSH_OP_2(o_mov, r_x0, r_x6); + PUSH_OP_2(o_mov, r_y0, r_y1); + + /* Top of loop */ + PUSH_OP_1(o_pos, key1); + + /* Start code */ + PUSH_OP_2(o_match, r_x0, r_y1); + + retv = pushMatch(script, &ctx, regexp); + if(retv < 0) RETURN(retv); + + PUSH_OP_1(o_jne, key5); + PUSH_OP_2(o_cmp, r_b0, r_y0); + PUSH_OP_1(o_je, key5); + PUSH_OP_2(o_mov, r_fe, r_fg); + PUSH_OP_1(o_je, key5); + + PUSH_OP_2(o_cmp, r_x2, ARG_MAKE_VALUE(0)); + PUSH_OP_1(o_jne, key4); + PUSH_OP_2(o_mov, r_x5, r_b0); + PUSH_OP_2(o_mov, r_y5, r_e0); + PUSH_OP_2(o_mov, r_y0, r_e0); + PUSH_OP_1(o_pos, key4); + + PUSH_OP_2(o_mov, r_x2, ARG_MAKE_VALUE(0)); + PUSH_OP_2(o_mov, r_x0, r_e0); + + /* Endless loop check */ + PUSH_OP_2(o_cmp, r_y2, ARG_MAKE_VALUE(0x00100000)); + PUSH_OP_1(o_je, key5); + + /* End code */ + PUSH_OP_2(o_match, r_y0, r_y1); + retv = pushMatch(script, &ctx, second); + if(retv < 0) RETURN(retv); + + PUSH_OP_1(o_jne, key5); + PUSH_OP_2(o_mov, r_y0, r_e0); + PUSH_OP_2(o_mov, r_x7, r_b0); + PUSH_OP_2(o_mov, r_y7, r_e0); + + /* Locks check */ + PUSH_OP_2(o_check, r_y5, r_x7); + PUSH_OP_2(o_mov, r_x2, r_fe); + + /* End of loop */ + PUSH_OP_1(o_jmp, key1); + PUSH_OP_1(o_pos, key5); + + /* Wrap up */ + PUSH_OP_2(o_cmp, r_x2, ARG_MAKE_VALUE(0)); + PUSH_OP_1(o_je, key9); + PUSH_OP_2(o_mov, r_b0, r_x5); + PUSH_OP_2(o_mov, r_e0, r_y7); + PUSH_OP_2(o_mov, r_b1, r_x5); + PUSH_OP_2(o_mov, r_e1, r_y5); + PUSH_OP_2(o_mov, r_b2, r_y5); + PUSH_OP_2(o_mov, r_e2, r_x7); + PUSH_OP_2(o_mov, r_b3, r_x7); + PUSH_OP_2(o_mov, r_e3, r_y7); + PUSH_OP_2(o_mov, r_cg, ARG_MAKE_VALUE(4)); + PUSH_OP_1(o_pos, key9); + PUSH_OP_2(o_mov, r_fe, r_x2); + + } + + + PUSH_OP_2(o_mov, r_x4, r_fe); + + if(bOnce) + PUSH_OP_1(o_pos, keyJmp5); + + /* Skip all the rest of the setup if failed */ + PUSH_OP_1(o_jne, keyJmp3); + + /* Set action flag */ + PUSH_OP_2(o_mov, r_ac, ARG_MAKE_VALUE(1)); + + if(!bHas) + { + /* Group validation code */ + PUSH_OP_2(o_cmp, ARG_MAKE_VALUE(groupNum + 1), r_cg); + PUSH_OP_2(o_mov, r_fe, r_fg); + PUSH_OP_1(o_jne, keyJmp7); + PUSH_OP_2(o_mov, r_fe, ARG_MAKE_VALUE(0)); + PUSH_OP_1(o_jmp, keyJmp3); + PUSH_OP_1(o_pos, keyJmp7); + + /* Now depending on group number set do this we set a set of registers to check */ + PUSH_OP_2(o_mov, r_x1, ARG_MAKE_REGISTER(ARG_GET_REGISTER(r_b0) + groupNum)); + PUSH_OP_2(o_mov, r_y1, ARG_MAKE_REGISTER(ARG_GET_REGISTER(r_e0) + groupNum)); + } + + /* Watermark */ + PUSH_OP_2(o_mov, keyWatermark, r_e0); + + /* End of setup code */ + PUSH_OP_1(o_pos, keyJmp3); + + /* Implement 'not' */ + PUSH_OP_2(o_cmp, r_x4, ARG_MAKE_VALUE(bNot ? 0 : 1)); + PUSH_OP_1(o_push, r_fe); + PUSH_OP_1(o_jne, keyJmp4); + + /* Now we're inside */ + if(bOnce) + PUSH_OP_2(o_mov, keyOnce, ARG_MAKE_VALUE(1)); + + + /* Wrap up code */ + PUSH_ROP_1(o_pop, r_x1); + PUSH_ROP_1(o_pop, r_y1); + PUSH_ROP_1(o_pop, r_fe); + PUSH_ROP_1(o_pos, keyJmp4); + + + free(regexp); + regexp = NULL; + } + break; + + + /*////////////////////////////////////////////////////// + // replace + */ + case s_replace: + { + + /* SYNTAX ---------------------------------------- + // + // replace "replacetext" + */ + + uint key; + + /* Check that we got data */ + if(!ctx.curData) + SYNTAX_ERROR("'replace' needs text to replace"); + + /* OPS -------------------------------------------- + // + // check x1, r_y1 + // jne <1> + // repl x1, r_y1 + // lock x1, r_y1 + // <1> + */ + + key = ARG_MAKE_VALUE(ctx.curKey++); + + PUSH_OP_2(o_check, r_x1, r_y1); + PUSH_OP_1(o_jne, key); + + PUSH_OP(o_text); + pushText(&ctx, ctx.curData, ctx.lenData); + PUSH_OP_2(o_repl, r_x1, r_y1); + + PUSH_OP_2(o_lock, r_x1, r_y1); + PUSH_OP_1(o_pos, key); + } + break; + + + /*////////////////////////////////////////////////////// + // lock + */ + case s_lock: + { + + /* SYNTAX ---------------------------------------- + // + // lock + */ + + /* OPS -------------------------------------------- + // + // lock x1, r_y1 + */ + + PUSH_OP_2(o_lock, r_x1, r_y1); + } + break; + + + + /*////////////////////////////////////////////////////// + // variable functions + */ + case s_setvar: + case s_clrvar: + case s_addvar: + { + + /* SYNTAX ---------------------------------------- + // + // set name "value" + // clr name + // add name "value" + */ + + /* Check that we got a name */ + if(!(ctx.curName && ctx.lenName)) + SYNTAX_ERROR("Missing variable name"); + + /* Check that we got data */ + if((ctx.curStatement == s_setvar || + ctx.curStatement == s_addvar) && !ctx.curData) + SYNTAX_ERROR("Missing variable value"); + + + /* OPS -------------------------------------------- + */ + + /* If not clearing then push text */ + if(ctx.curStatement != s_clrvar) + { + PUSH_OP(o_text); + pushText(&ctx, ctx.curData, ctx.lenData); + } + + /* If not adding then clear */ + if(ctx.curStatement != s_addvar) + { + PUSH_OP(o_clrvar); + pushVar(&ctx, ctx.curName, ctx.lenName); + } + + /* If not clearing then set */ + if(ctx.curStatement != s_clrvar) + { + PUSH_OP(o_setvar); + pushVar(&ctx, ctx.curName, ctx.lenName); + } + } + break; + + /*////////////////////////////////////////////////////// + // else + */ + case s_else: + { + + /* SYNTAX ---------------------------------------- + // + // else + // { + // ..... + // } + */ + + uint key; + + /* REMOVED the following because now else can follow any statement + // + // Make sure the previous statement was a match + // if(ctx.lastStatement != s_match && ctx.lastStatement != s_replace && + // ctx.lastStatement != s_call) + // SYNTAX_ERROR("'else' must follow a match, replace or call statement"); + */ + + /* Next statement must be a opening brace */ + ctx.nextStatement = s_opbrace; + + /* OPS -------------------------------------------- + // + // je <1> + // ...... + // <1> + */ + + key = ARG_MAKE_VALUE(ctx.curKey++); + + PUSH_OP_1(o_je, key); + PUSH_ROP_1(o_pos, key); + } + break; + + /*///////////////////////////////////////////////////// + // options + */ + case s_message: + { + /* SYNTAX ---------------------------------------- + // + // message "data" + */ + + /* Check that we got data */ + if(!ctx.curData) + SYNTAX_ERROR("Missing message text."); + + /* OPS -------------------------------------------- + // + // text "message" + // stop + */ + + PUSH_OP(o_text); + pushText(&ctx, ctx.curData, ctx.lenData); + PUSH_OP(o_msg); + } + break; + + + /*///////////////////////////////////////////////////// + // options + */ + case s_options: + { + /* SYNTAX ---------------------------------------- + // + // options(case, line) + */ + + /* Save the options into the context */ + ctx.code->curOptions = ctx.curFlags; + } + break; + + default: + ASSERT(false); + + } + } + + /* Pop out of any implied blocks */ + while(ctx.code->curContext & SYNTAX_IMPLIED) + POP_CODESTACK(&ctx); + + /* copy any remaining post ops */ + POP_CODESTACK(&ctx); + + /* This is the bottom of the main loop */ + PUSH_OP_1(o_test, r_ac); + PUSH_OP_1(o_je, ARG_MAKE_VALUE(0)); + + /* Put an end marker */ + PUSH_OP(o_end); + + + /* Check brace syntax */ + if(ctx.code->pPrev != NULL) + SYNTAX_ERROR("Not all braces matched."); + + /* Put the compiled script in the script */ + script->ops = ctx.beg; + script->len = ctx.cbops; + + /* This fixes all the jmp and removes pos */ + retv = compilerOptimize(script); + +cleanup: + if(retv == R_SYNTAX || retv == R_REGEXP) + script->errline = getLine(data, ctx.in) - 1; + + /* Unwind codestack */ + while(ctx.code) + ctx.code = popCodeStack(ctx.code); + + if(pFunctions) + free(pFunctions); + if(regexp) + free(regexp); + + if(retv < R_OK) + { + if(ctx.beg) + free(ctx.beg); + + script->ops = NULL; + script->len = 0; + } + + return retv; +} + + + +int compilerOptimize(r_script* scr) +{ + vmop_t* op = scr->ops; + int retv = R_OK; + + /* First find and remove all pos + // NOTE: land and pos are used interchangeably in this code */ + + uint* lands = NULL; + uint cur = 0; + uint alloc = 0; + + while(*op != o_end) + { + ASSERT(op < scr->ops + scr->len); + + switch(*op) + { + case o_pos: + { + if(alloc <= cur) + { + alloc += 0x40; + lands = (uint*)reallocf(lands, sizeof(uint) * 2 * alloc); + if(!lands) + RETURN(R_NOMEM); + + } + + ASSERT(ARG_TYPE(op[1]) == ARG_VAL_TYPE); + + /* Position in 0 */ + lands[cur * 2] = op - scr->ops; + /* key in 1 */ + lands[(cur * 2) + 1] = *((uint*)(op + 1)); + + cur++; + + /* + Okay now eat the rest of the stuff + total length of a pos should be 5 bytes + */ + scr->len -= 5; + memmove(op, op + 5, scr->len - (op - scr->ops)); + } + break; + + default: + opsIterate(&op); + break; + }; + } + + + /* Now fix all jumps and calls */ + op = scr->ops; + + while(*op != o_end) + { + ASSERT(op < scr->ops + scr->len); + + switch(*op) + { + case o_pos: + /* Shouldn't meet any pos ops after we removed them above */ + ASSERT(false); + break; + + case o_jmp: + case o_jne: + case o_je: + case o_call: + { + uint* parg = ((uint*)(op + 1)); + bool found = false; + size_t i; + + /* find the key in our array */ + for(i = 0; i < cur; i++) + { + if(lands[(i * 2) + 1] == *parg) + { + found = true; + *parg = ARG_MAKE_VALUE(lands[i * 2]); + break; + } + } + + if(!found) + ASSERT(false && "jump without a pos"); + } + /* (Note fall through) */ + + default: + opsIterate(&op); + break; + } + } + +cleanup: + if(lands) + free(lands); + + return retv; +} + +void opsIterate(vmop_t** ops) +{ + vmop_t op = *(*(ops)); + (*ops)++; + + /* increment *ops to next op point */ + switch(op) + { + /* ops without arguments */ + case o_end: + case o_nop: + case o_ret: + case o_msg: + break; + + /* ops with one argument */ + case o_push: + case o_pop: + case o_jmp: + case o_je: + case o_jne: + case o_test: + case o_call: + case o_stop: + INC_ARGUMENT(*ops); + break; + + /* ops with two arguments */ + case o_lock: + case o_check: + case o_cmp: + case o_add: + case o_sub: + case o_mov: + case o_repl: + INC_ARGUMENT(*ops); + INC_ARGUMENT(*ops); + break; + + /* Special cases */ + case o_match: + { + match_op* op; + INC_ARGUMENT(*ops); + INC_ARGUMENT(*ops); + op = (match_op*)(*ops); + (*ops) += match_op_size(*op); + } + break; + + case o_setvar: + case o_clrvar: + { + var_op* op = (var_op*)(*ops); + (*ops) += var_op_size(*op); + } + break; + case o_text: + { + text_op* op; + op = (text_op*)(*ops); + (*ops) += text_op_size(*op); + } + break; + + default: + ASSERT(false); + }; + +} + + +/* + TODO: individual ops do not need to be freed + any longer. Execution no longer changes them. +*/ + +/* Frees a set of ops */ +int opsFree(vmop_t* ops, size_t len) +{ + byte* cur = ops; + + if(len == 0) + len = ~0; + + while(cur < ops + len) + { + switch(*cur) + { + case o_end: + goto done; + + default: + opsIterate(&cur); + break; + } + } + +done: + free(ops); + return R_OK; + +} + + +static const char* getOpName(vmop_t op) +{ + #define RETOPNAME(r) case o_##r: return #r; + switch(op) + { + RETOPNAME(end); + RETOPNAME(stop); + RETOPNAME(nop); + RETOPNAME(ret); + RETOPNAME(push); + RETOPNAME(pop); + RETOPNAME(jmp); + RETOPNAME(je); + RETOPNAME(jne); + RETOPNAME(test); + RETOPNAME(call); + RETOPNAME(lock); + RETOPNAME(check); + RETOPNAME(cmp); + RETOPNAME(add); + RETOPNAME(sub); + RETOPNAME(mov); + RETOPNAME(match); + RETOPNAME(repl); + RETOPNAME(setvar); + RETOPNAME(clrvar); + RETOPNAME(pos); + RETOPNAME(text); + RETOPNAME(msg); + + default: + return ""; + } +} + +static const char* getRegisterName(byte reg) +{ + #define RETREGNAME(r) case r_##r: return #r; + switch(reg) + { + RETREGNAME(fe); + RETREGNAME(fg); + RETREGNAME(fl); + RETREGNAME(ac); + RETREGNAME(sp); + RETREGNAME(bp); + RETREGNAME(b0); + RETREGNAME(b1); + RETREGNAME(b2); + RETREGNAME(b3); + RETREGNAME(b4); + RETREGNAME(b5); + RETREGNAME(b6); + RETREGNAME(b7); + RETREGNAME(b8); + RETREGNAME(b9); + RETREGNAME(e0); + RETREGNAME(e1); + RETREGNAME(e2); + RETREGNAME(e3); + RETREGNAME(e4); + RETREGNAME(e5); + RETREGNAME(e6); + RETREGNAME(e7); + RETREGNAME(e8); + RETREGNAME(e9); + RETREGNAME(cg); + RETREGNAME(x0); + RETREGNAME(x1); + RETREGNAME(x2); + RETREGNAME(x3); + RETREGNAME(x4); + RETREGNAME(x5); + RETREGNAME(x6); + RETREGNAME(x7); + RETREGNAME(y0); + RETREGNAME(y1); + RETREGNAME(y2); + RETREGNAME(y3); + RETREGNAME(y4); + RETREGNAME(y5); + RETREGNAME(y6); + RETREGNAME(y7); + + default: + return ""; + } +}; + +void dumpArgument(FILE* f, vmop_t* ops) +{ + switch(ARG_TYPE(*ops)) + { + case ARG_VAL_TYPE: + fprintf(f, "0x%06x", ARG_GET_VALUE(*((uint*)ops))); + break; + + case ARG_MEM_TYPE: + fprintf(f, "", ARG_GET_VALUE(*((uint*)ops))); + break; + + case ARG_STACK_TYPE: + fprintf(f, "", ARG_GET_VALUE(*((uint*)ops))); + break; + + case ARG_REG_TYPE: + fprintf(f, getRegisterName(*ops)); + break; + + default: + ASSERT(false); + } +} + +/* Dump a string of ops to a stream */ +int opsDump(vmop_t* ops, FILE* f) +{ + vmop_t* beg = ops; + + while(*ops != o_end) + { + vmop_t op = *ops; + + fprintf(f, "%06x: %s ", (ops - beg), getOpName(op)); + + ops++; + + /* Now the arguments */ + switch(op) + { + case o_push: + case o_pop: + case o_jmp: + case o_je: + case o_jne: + case o_test: + case o_call: + case o_stop: + case o_pos: + dumpArgument(f, ops); + INC_ARGUMENT(ops); + break; + + case o_lock: + case o_check: + case o_cmp: + case o_add: + case o_sub: + case o_mov: + case o_match: + case o_repl: + dumpArgument(f, ops); + fprintf(f, ", "); + INC_ARGUMENT(ops); + dumpArgument(f, ops); + INC_ARGUMENT(ops); + break; + }; + + /* Now any additional data */ + switch(op) + { + case o_match: + { + match_op* op = (match_op*)ops; + if(op->type & kMatchPcre) + { + match_op_pcre* pcre = (match_op_pcre*)op; + fprintf(f, " %s", pcre->pattern); + } + else + { + fprintf(f, " "); + } + + ops += match_op_size(*op); + } + break; + + case o_setvar: + case o_clrvar: + { + var_op* vop = (var_op*)ops; + fprintf(f, " <%%%s>", vop->name); + ops += var_op_size(*vop); + } + break; + + case o_test: + { + text_op* op = (text_op*)ops; + fprintf(f, " <%s>", op->string); + ops += text_op_size(*op); + } + break; + }; + + + fprintf(f, "\n"); + } + + return R_OK; + +} + + + + -- cgit v1.2.3