/* * AUTHOR * N. Nielsen * * LICENSE * This software is in the public domain. * * The software is provided "as is", without warranty of any kind, * express or implied, including but not limited to the warranties * of merchantability, fitness for a particular purpose, and * noninfringement. In no event shall the author(s) be liable for any * claim, damages, or other liability, whether in an action of * contract, tort, or otherwise, arising from, out of, or in connection * with the software or the use or other dealings in the software. * * SUPPORT * Send bug reports to: */ /* ---------------------------------------------------------------------- // Recipe Compiler // 2000-2002 Copyright, Nate Nielsen */ #include #include "common/usuals.h" #include "common/compat.h" #include "lib/rlib.h" #include "priv.h" #include "execute.h" #include "ops.h" const byte kEncUTF8[] = { 0xEF, 0xBB, 0xBF }; const byte kEncUCS2_L[] = { 0xFF, 0xFE }; const byte kEncUCS2_R[] = { 0xFE, 0xFF }; const byte kEncUCS4_L[] = { 0xFF, 0xFE, 0x00, 0x00 }; const byte kEncUCS4_R[] = { 0x00, 0x00, 0xFE, 0xFF }; /* Flags Syntax: ----------------------------------------------------------------- */ /* All flags for available to statements */ typedef enum _syn_flags { f_not = 0x0001, /* match: Reverse effect */ f_once = 0x0002, /* match: Only execute once */ f_case = 0x0004, /* options: Case sensitive */ f_line = 0x0010, /* options: Limit to line matches */ f_find = 0x0020, /* match: match but don't move limits */ f_tag = 0x0040, /* tag: use special tag matching */ /* Special cases */ f_num = 0x0400 /* represents a number */ } syn_flags; /* Listing of all flags and their text representations */ typedef struct _flagmap { syn_flags flag; const char* text; } flagmap; flagmap kAllFlags[] = { { f_not, "not" }, { f_once, "once" }, { f_case, "case" }, { f_line, "line" }, { f_find, "find" }, { f_tag, "tag" }, /* Special cases */ { f_num, "" } }; /* Statement Syntax: -------------------------------------------------------------- */ typedef enum _syn_statements { s_none, s_function, s_loop, s_once, s_options, s_call, s_return, s_end, s_stop, s_match, s_replace, s_lock, s_setvar, s_addvar, s_clrvar, s_message, s_else, s_opbrace, s_clbrace, s_implied } syn_statements; typedef struct _syntaxmap { syn_statements syntax; /* id */ const char* text; /* text representation */ uint flags; /* which flags are valid */ uint args; /* Does it have arguments (either a name or a data block) */ uint context; /* In which context valid */ } syntaxmap; /* Context values */ #define SYNTAX_INROOT 0x00000001 /* Only allowed in the root script */ #define SYNTAX_INBRACE 0x00000004 /* Only allowed inside braces */ #define SYNTAX_INFUNCTION 0x00000008 /* Only allowed in a function */ #define SYNTAX_BLOCK 0x00000010 /* The statement is start of a block */ #define SYNTAX_IMPLIED 0x00000020 /* We're in an implied block */ #define SYNTAX_SPECIAL 0x00000100 /* Syntax not determined by parser */ #define SYNTAX_ANYWHERE (SYNTAX_INROOT | SYNTAX_INBRACE | SYNTAX_INFUNCTION) /* Argument values */ #define ARGUMENT_NAME 0x00000001 #define ARGUMENT_DATA 0x00000002 syntaxmap kAllStatements[] = { { s_none, "", 0, 0, SYNTAX_SPECIAL }, { s_function, "function", 0, ARGUMENT_NAME, SYNTAX_INROOT }, { s_loop, "loop", 0, 0, SYNTAX_ANYWHERE | SYNTAX_BLOCK }, { s_once, "once", 0, 0, SYNTAX_ANYWHERE | SYNTAX_BLOCK }, { s_options, "options", f_case | f_line, 0, SYNTAX_ANYWHERE }, { s_call, "call", 0, ARGUMENT_NAME, SYNTAX_ANYWHERE }, { s_return, "return", f_num, 0, SYNTAX_INFUNCTION }, { s_end, "end", 0, 0, SYNTAX_ANYWHERE }, { s_stop, "stop", 0, ARGUMENT_DATA, SYNTAX_ANYWHERE }, { s_match, "match", f_not|f_once|f_num|f_find|f_tag,ARGUMENT_DATA, SYNTAX_ANYWHERE | SYNTAX_BLOCK }, { s_replace, "replace", 0, ARGUMENT_DATA, SYNTAX_ANYWHERE }, { s_lock, "lock", 0, 0, SYNTAX_ANYWHERE }, { s_setvar, "set", 0, ARGUMENT_DATA | ARGUMENT_NAME, SYNTAX_ANYWHERE }, { s_addvar, "add", 0, ARGUMENT_DATA | ARGUMENT_NAME, SYNTAX_ANYWHERE }, { s_clrvar, "clr", 0, ARGUMENT_NAME, SYNTAX_ANYWHERE }, { s_message, "message", 0, ARGUMENT_DATA, SYNTAX_ANYWHERE }, { s_else, "else", 0, 0, SYNTAX_ANYWHERE | SYNTAX_BLOCK }, /* special cases */ { s_opbrace, "{", 0, 0, SYNTAX_SPECIAL }, { s_clbrace, "}", 0, 0, SYNTAX_SPECIAL }, { s_implied, "", 0, 0, SYNTAX_SPECIAL } }; /* Syntax Constants: ------------------------------------------------------------- */ const char* kValidNum = "0123465798"; const char* kValidDelim = "\"~`!@#$%^&*[]|'<>./?+=-;:"; const char* kValidBrace = "{}"; const char kFlagsStart = '('; const char kFlagsEnd = ')'; const char kFlagDelim = ','; const char kTagDelim = '='; const char kEscapeChar = '\\'; /* Maximum size of an identifier */ #define kMaxIdentifier 40 #define INVALID_PTR 0xFFFFFFFF /* ---------------------------------------------------------------------- // CODE and COMPILE FUNCTIONALITY */ const size_t OPS_BUFFER_SIZE = 0x1000; /* codestack: We use one of these for each level of braces. Helps // us maintain context. */ typedef struct _codestack { uint curContext; /* Current parse context */ uint curOptions; /* Current options in use */ uint numStatements; /* Number of statements in this block */ size_t insPos; /* The position to insert code */ size_t endPos; /* End of code owned by current codestack */ struct _codestack* pPrev; /* previous stack (if in list) */ } codestack; /* Add a new post op buffer and stash away current */ #define PUSH_CODESTACK(c) \ do { \ (c)->code = pushCodeStack((c)->code); \ if(!(c)->code) RETURN(R_NOMEM); \ } while(0) /* Dump current post op buffer and get previous */ #define POP_CODESTACK(c) \ do { \ commitCodeStack((c)->code); \ (c)->code = popCodeStack((c)->code); \ } while (0) \ /* Initialize a new post ops buffer and hook into previous */ static codestack* pushCodeStack(codestack* prev) { codestack* code = (codestack*)malloc(sizeof(codestack)); if(!code) return NULL; memset(code, 0, sizeof(codestack)); if(prev) { /* Things to carry over from previous */ code->insPos = code->endPos = prev->insPos; code->curOptions = prev->curOptions; code->curContext = prev->curContext; } else { code->curContext = SYNTAX_INROOT; } /* Init the context stuff */ code->pPrev = prev; return code; } /* Free a post op buffer and return previous */ static codestack* popCodeStack(codestack* code) { codestack* prev = code->pPrev; if(code->pPrev) { /* The insertion position needs fixing ... */ /* calc offset */ size_t offset = code->insPos - prev->insPos; prev->insPos += offset; prev->endPos += offset; } free(code); return prev; } #define commitCodeStack(code) ((code)->insPos = (code)->endPos) /* compilecontext: The main compile state. Only one used throughout // compilation */ typedef struct _compilecontext { syn_statements lastStatement; /* Last statement we had */ syn_statements nextStatement; /* Next statement we're expecting */ syn_statements curStatement; /* Current statement: */ const char* curName; /* - The Name */ size_t lenName; /* - Length of the name */ uint curFlags; /* - Flags */ const char* curFlagData; /* - Extra flag (f_num) */ size_t lenFlagData; /* - Length of extra flag */ const char* curData; /* - Data */ size_t lenData; /* - Length of data */ const char* in; /* Next location to compile in script */ long curKey; /* a unique key (id) which gets incremented */ /* for various uses such as jumps and calls */ vmop_t* beg; /* start of block of output code */ size_t cbops; /* number of bytes of output code */ size_t alloc; /* number of bytes allocated for output */ codestack* code; /* Current codestack */ bool failed : 1; /* did the last memory allocation fail? */ } compilecontext; /* ------------------------------------------------------------------- // POS OP // // the 'pos' op is a temporary place holder while compiling // it takes one value parameter and it's value is the same as is // temporarily placed in jmp, je, jne or call // it's removed and jumps are patched in the optimization stage */ #define o_pos ((vmop_t)(0xFF)) /* const vmop_t pos = 0xFF; */ /* ------------------------------------------------------------------- // OPS CODING FUNCTIONALITY */ /* These macros can only be used within repCompile */ #define PUSH_OP(op) \ pushValues(&ctx, true, sizeof(byte), op, 0) #define PUSH_OP_1(op, arg) \ pushValues(&ctx, true, sizeof(byte), op, sizeof(arg), arg, 0) #define PUSH_OP_2(op, arg, arg2) \ pushValues(&ctx, true, sizeof(byte), op, sizeof(arg), arg, sizeof(arg2), arg2, 0) /* Add ops in reverse */ #define PUSH_ROP(op) \ pushValues(&ctx, false, sizeof(byte), op, 0) #define PUSH_ROP_1(op, arg) \ pushValues(&ctx, false, sizeof(byte), op, sizeof(arg), arg, 0) #define PUSH_ROP_2(op, arg, arg2) \ pushValues(&ctx, false, sizeof(byte), op, sizeof(arg), arg, sizeof(arg2), arg2, 0) /* Get more stack space for ops */ static void moreOutput(compilecontext* ctx) { /* Reallocate */ ctx->beg = (byte*)reallocf(ctx->beg, ctx->alloc + OPS_BUFFER_SIZE); ctx->alloc += OPS_BUFFER_SIZE; /* Set flag if failed */ if(!ctx->beg) ctx->failed = true; } /* Allocate a specific amount from the stack */ #define allocOutput(ctx, len) pushData(ctx, NULL, len, true) /* Push any amount of data on ops buffer */ static void* pushData(compilecontext* ctx, const void* data, size_t len, bool forward) { void* pIns; if(ctx->cbops + len >= ctx->alloc) moreOutput(ctx); if(ctx->failed || !len) return NULL; pIns = ctx->beg + ctx->code->insPos; /* Make space at insertion point */ memmove(ctx->beg + ctx->code->insPos + len, ctx->beg + ctx->code->insPos, ctx->cbops - ctx->code->insPos); /* If we have data copy it in */ if(data) memcpy(pIns, data, len); /* In debug mode clear it */ #ifdef _DEBUG else memset(pIns, 0xCC, len); #endif ctx->cbops += len; ctx->code->endPos += len; if(forward) ctx->code->insPos += len; /* Only return value if no input data */ return data ? NULL : pIns; } static void pushValues(compilecontext* ctx, bool forward, ...) { va_list ap; size_t len; #define VAL_BUF 20 byte buff[VAL_BUF]; size_t cur = 0; va_start(ap, forward); while(len = va_arg(ap, size_t)) { if(cur + len > VAL_BUF) { pushData(ctx, buff, cur, forward); cur = 0; } switch(len) { case 1: buff[cur] = va_arg(ap, byte); break; case 2: *((unsigned short*)(buff + cur)) = va_arg(ap, unsigned short); break; case 4: *((unsigned int*)(buff + cur)) = va_arg(ap, unsigned int); break; default: ASSERT(false); break; } cur += len; } pushData(ctx, buff, cur, forward); } static int testRegexp(r_script* script, const char* regexp, short options) { int erroroffset; const char* error = NULL; pcre* re = pcre_compile(regexp, options, &error, &erroroffset, NULL); if(!re) { if(error) scriptSetError(script, error); return R_REGEXP; } else { free(re); return R_OK; } } /* ---------------------------------------------------------------------- // Functions for pushing specific types of ops on the stack */ static int pushMatch(r_script* script, compilecontext* ctx, const char* regexp) { int ret; /* Allocate */ size_t len = sizeof(match_op_pcre) + (sizeof(char) * strlen(regexp)); match_op_pcre* op = (match_op_pcre*)allocOutput(ctx, len); /* Setup op */ if(!op) return R_NOMEM; memset(op, 0, len); op->header.len = len; op->header.type = kMatchPcre; op->options = PCRE_DOLLAR_ENDONLY; /* TODO: Do we need to make an option for PCRE_MULTILINE? */ if(!(ctx->code->curOptions & f_line)) op->options |= PCRE_DOTALL; if(!(ctx->code->curOptions & f_case)) op->options |= PCRE_CASELESS; ret = testRegexp(script, regexp, op->options); if(ret < 0) return ret; /* Copy the uncompiled regular expression onto the ops stack */ strcpy(op->pattern, regexp); return R_OK; } static void pushText(compilecontext* ctx, const char* string, size_t len) { /* Allocate */ text_op* op = (text_op*)allocOutput(ctx, sizeof(text_op) + (sizeof(char) * (len))); /* Setup op */ if(op) { op->len = len; /* Copy the replacement string onto the ops stack */ /* TODO: Get this ready for binary replacements */ strncpy((char*)op->string, string, len); op->string[len] = 0; } } static void pushVar(compilecontext* ctx, const char* name, size_t lenName) { /* Allocate */ size_t len = lenName + 1; var_op* op = (var_op*)allocOutput(ctx, sizeof(var_op) + (sizeof(char) * len)); /* Setup op */ if(op) { op->len = len; /* Copy the variable name onto the ops stack */ /* TODO: Get this ready for binary replacements */ memcpy(op->name, name, lenName); op->name[lenName] = 0; } } /* ---------------------------------------------------------------------- // FUNCTIONS */ /* Keeps track of current functions seen */ typedef struct funcdef { char name[kMaxIdentifier + 1]; uint key; } funcdef; typedef struct _funcdefs { uint alloc; uint cur; funcdef defs[1]; } funcdefs; /* Add a function to the stack */ static bool addFunction(funcdefs** ppdefs, const char* name, size_t len, uint key) { /* Do allocation if necessary */ if(!*ppdefs || (*ppdefs)->cur >= (*ppdefs)->alloc) { uint alloc = *ppdefs ? (*ppdefs)->alloc : 0; uint cur = *ppdefs ? (*ppdefs)->cur : 0; alloc += 0x10; *ppdefs = (funcdefs*)reallocf(*ppdefs, sizeof(funcdefs) + (sizeof(funcdef) * alloc)); if(!*ppdefs) return false; (*ppdefs)->alloc = alloc; (*ppdefs)->cur = cur; } /* Push it on the back */ strlcpy((*ppdefs)->defs[(*ppdefs)->cur].name, name, min(kMaxIdentifier, len) + 1); (*ppdefs)->defs[(*ppdefs)->cur].key = key; (*ppdefs)->cur++; return true; } /* Check if a function exists */ static uint findFunction(funcdefs* pdefs, const char* name, size_t len) { if(pdefs) { char funcname[kMaxIdentifier + 1]; size_t i; strlcpy(funcname, name, min(len, kMaxIdentifier) + 1); /* Just loop through and compare names */ for(i = 0; i < pdefs->cur; i++) { if(!strcasecmp(pdefs->defs[i].name, funcname)) return pdefs->defs[i].key; } } return INVALID_PTR; } /* ---------------------------------------------------------------------- // SYNTAX FUNCTIONS */ /* Is a character escaped or not? */ bool isEscaped(const char* str, const char* posi) { /* Checks for a backslash before but note that backslash can be escaped to so... */ bool bEscaped = false; while(posi > str && posi[-1] == kEscapeChar) { bEscaped = !bEscaped; posi--; } return bEscaped; } /* Split a tag match into two for later use */ char* splitTagMatch(r_script* script, char* regexp) { char* second = regexp; while(second = strchr(second, kTagDelim)) { uint escs = 0; if(!isEscaped(regexp, second)) { second[0] = '\0'; return second + 1; } second++; } scriptSetError(script, "Couldn't find tags in match (separate with '%c').", kTagDelim); return NULL; } /* Eat spaces and comments */ static bool compileSpace(compilecontext* ctx) { /* Eat white space and comments here */ while(isspace(ctx->in[0]) || ctx->in[0] == '#') { /* Comments ... */ if(ctx->in[0] == '#') { /* Eat rest off line */ while(ctx->in[0] != '\n' && ctx->in[0] != '\0') ctx->in++; } ctx->in++; } /* Return true if not end of file */ return ctx->in[0] != '\0'; } int compileEncoding(r_script* script, compilecontext* ctx) { if(!memcmp(ctx->in, kEncUTF8, countof(kEncUTF8))) { ctx->in += countof(kEncUTF8); } else if(!memcmp(ctx->in, kEncUCS2_L, countof(kEncUCS2_L)) || !memcmp(ctx->in, kEncUCS2_R, countof(kEncUCS4_R)) || !memcmp(ctx->in, kEncUCS4_L, countof(kEncUCS4_L)) || !memcmp(ctx->in, kEncUCS4_R, countof(kEncUCS4_R))) { scriptSetError(script, "unsupported unicode encoding"); return R_SYNTAX; } return R_OK; } /* Compile a single statement */ int compileStatement(r_script* script, compilecontext* ctx) { /* Some MACROS */ #define SYNTAX_ERROR(s) \ do{ \ scriptSetError(script, s); \ RETURN(R_SYNTAX); \ } while(0) #define SYNTAX_ERROR_1(s, a1) \ do{ \ scriptSetError(script, s, a1); \ RETURN(R_SYNTAX); \ } while(0) /* Jump to cleanup label instead of return */ #define RETURN(r) \ do { \ retv = r; \ goto cleanup; \ } while (0) const char* end; int retv = R_OK; { int i; if(ctx->curStatement != s_opbrace && ctx->curStatement != s_clbrace) ctx->lastStatement = ctx->curStatement; ctx->curStatement = s_none; ctx->curName = NULL; ctx->lenName = 0; ctx->curFlags = 0; ctx->curFlagData = NULL; ctx->lenFlagData = 0; ctx->curData = NULL; ctx->lenData = 0; do { /* Eat all whitespace and comments */ compileSpace(ctx); /* Check for end of input */ if(ctx->in[0] == 0) RETURN(R_OK); /* Check for Statement */ if(!strcspn(ctx->in, kValidIdentifier)) end = ctx->in + strspn(ctx->in, kValidIdentifier); /* Check for braces */ else if(!strcspn(ctx->in, kValidBrace)) end = ctx->in + 1; /* Brace always just one character */ /* Anything else is bad */ else SYNTAX_ERROR_1("Unexpected character \'%c\'.", ctx->in[0]); /* Okay now determine which statement we got */ for(i = 0; i < countof(kAllStatements); i++) { if(!strncasecmp(kAllStatements[i].text, ctx->in, end - ctx->in)) { /* Check the context */ if(kAllStatements[i].context != SYNTAX_SPECIAL && !(ctx->code->curContext & kAllStatements[i].context)) SYNTAX_ERROR_1("\'%s\' not allowed here.", kAllStatements[i].text); ctx->curStatement = kAllStatements[i].syntax; break; } } /* Check that we got a statement if not then it should be a function call */ if(ctx->curStatement == s_none) { ctx->curStatement = s_call; ctx->curName = ctx->in; ctx->lenName = end - ctx->in; } /* Make sure it's what we were expecting */ if(ctx->nextStatement != s_none && ctx->nextStatement != ctx->curStatement) { /* If braces were required but not found, then do an impliedBlock */ if(ctx->nextStatement == s_opbrace && kAllStatements[ctx->lastStatement].context & SYNTAX_BLOCK) { ctx->nextStatement = ctx->curStatement; ctx->curStatement = s_implied; end = ctx->in; } /* Otherwise it's an error */ else { SYNTAX_ERROR_1("Expecting \'%s\' here.", kAllStatements[ctx->nextStatement].text); } } ctx->in = end; ctx->nextStatement = s_none; /* We process braces here */ if(ctx->curStatement == s_opbrace || ctx->curStatement == s_implied) { PUSH_CODESTACK(ctx); ctx->code->curContext |= SYNTAX_INBRACE; if(ctx->lastStatement == s_function) ctx->code->curContext |= SYNTAX_INFUNCTION; if(ctx->curStatement == s_implied) ctx->code->curContext |= SYNTAX_IMPLIED; ctx->curStatement = s_none; continue; } /* See if we need to pop any codestacks for implied blocks */ while(ctx->code->curContext & SYNTAX_IMPLIED && ctx->code->numStatements > 0) { /* Only should be one statement on an implied block */ ASSERT(ctx->code->numStatements == 1); POP_CODESTACK(ctx); } /* Closing braces here */ if(ctx->curStatement == s_clbrace) { POP_CODESTACK(ctx); ctx->curStatement = s_none; continue; } } while(ctx->curStatement == s_none); /* Okay now do name if we have one */ if(kAllStatements[ctx->curStatement].args & ARGUMENT_NAME && !ctx->curName) { compileSpace(ctx); /* Now look for a normal or name flag */ if(!strcspn(ctx->in, kValidIdentifier)) { end = ctx->in + strspn(ctx->in, kValidIdentifier); /* Any flags not found assume it's an identifier */ ctx->curName = ctx->in; ctx->lenName = end - ctx->in; if(ctx->lenName > kMaxIdentifier) SYNTAX_ERROR("Maximum length for an identifier is 40 characters."); ctx->in = end; } } /* Eat the next little bit of whitespace */ compileSpace(ctx); /* Okay now look for flags start */ if(ctx->in[0] == kFlagsStart) { ctx->in++; while(1) { compileSpace(ctx); /* Check for a number flag */ if(!strcspn(ctx->in, kValidNum)) { if(ctx->curFlagData) SYNTAX_ERROR("Invalid flags."); /* If found then just grab and go */ ctx->curFlags |= f_num; ctx->curFlagData = ctx->in; ctx->lenFlagData = strspn(ctx->in, kValidNum); ctx->in += ctx->lenFlagData; } /* Now look for a normal flag */ else if(!strcspn(ctx->in, kValidIdentifier)) { bool found = false; end = ctx->in + strspn(ctx->in, kValidIdentifier); /* Okay now try and map out that flag */ for(i = 0; i < countof(kAllFlags); i++) { if(!strncasecmp(kAllFlags[i].text, ctx->in, end - ctx->in)) { found = true; ctx->curFlags |= kAllFlags[i].flag; break; } } /* Any flags not found assume it's an identifier */ if(!found) { if(ctx->curName != NULL) SYNTAX_ERROR("Invalid flags."); ctx->curName = ctx->in; ctx->lenName = end - ctx->in; } ctx->in = end; } /* End ) of flags */ else if(ctx->in[0] == kFlagsEnd) { ctx->in++; break; } /* Separator , between flags */ else if(ctx->in[0] == kFlagDelim) ctx->in++; else SYNTAX_ERROR("Expected a flag."); } /* Now check the flags */ if((kAllStatements[ctx->curStatement].flags | ctx->curFlags) != kAllStatements[ctx->curStatement].flags) SYNTAX_ERROR("Invalid flags for this statement."); } /* Okay now do data processing */ if(kAllStatements[ctx->curStatement].args & ARGUMENT_DATA) { const char* delim; compileSpace(ctx); /* Check for a delimiter */ if(delim = strchr(kValidDelim, *(ctx->in))) { ctx->in++; end = ctx->in; /* Find end of data but checking for escaped delimeters */ while(1) { end = strchr(end, *delim); if(!end) SYNTAX_ERROR("Couldn't find end of data for this statement"); if(!isEscaped(ctx->in, end)) break; end++; } ctx->curData = ctx->in; ctx->lenData = (end - ctx->in); ctx->in = end + 1; } else { SYNTAX_ERROR("Expected data for this statement."); } } if(ctx->lenName > kMaxIdentifier) SYNTAX_ERROR("Maximum length for an identifier is 40 characters."); ctx->code->numStatements++; } /* done! */ cleanup: return retv; } static uint getLine(const char* beg, const char* cur) { size_t ret = 1; while(beg <= cur) { if(*beg == '\n') ret++; beg++; } return ret; } int compilerRun(r_script* script, const char* data) { int retv = R_OK; /* used by RETURN macro */ funcdefs* pFunctions = NULL; char* regexp = NULL; /* We allocate the main instruction buffer */ compilecontext ctx; memset(&ctx, 0, sizeof(ctx)); ctx.in = data; ctx.curKey = 0x00000100; ctx.beg = (vmop_t*)malloc(OPS_BUFFER_SIZE); ctx.alloc = OPS_BUFFER_SIZE; ctx.code = pushCodeStack(NULL); if(!ctx.beg || !ctx.code) RETURN(R_NOMEM); #ifdef _DEBUG { int i; /* Do a check here! Index should be equal to */ /* command name for each statement */ for(i = 0; i < countof(kAllStatements); i++) ASSERT(i == kAllStatements[i].syntax); } #endif /* Push a first empty pops stack We have to have one to pop below */ PUSH_CODESTACK(&ctx); ctx.code->curContext = SYNTAX_INROOT; /* Push our signature of 4 nop bytes */ PUSH_OP(o_nop); PUSH_OP(o_nop); PUSH_OP(o_nop); PUSH_OP(o_nop); /* Set up initial stack frame */ PUSH_OP_2(o_mov, r_bp, r_sp); /* Push the beginning of the main loop here */ PUSH_OP_1(o_pos, ARG_MAKE_VALUE(0L)); PUSH_OP_2(o_mov, r_ac, ARG_MAKE_VALUE(0L)); retv = compileEncoding(script, &ctx); if(retv < 0) RETURN(retv); while(ctx.in[0] != '\0') { if(ctx.cbops + 0x200 >= ctx.alloc) moreOutput(&ctx); /* Here we check if we have enough memory */ if(ctx.failed) RETURN(R_NOMEM); /* Get and parse the current statement */ retv = compileStatement(script, &ctx); if(retv < 0) RETURN(retv); if(ctx.curStatement == s_none) continue; /* Now we pop the commit end code from previous statement */ commitCodeStack(ctx.code); /* Okay now do the rest of the statements */ switch(ctx.curStatement) { /*////////////////////////////////////////////////////// // function */ case s_function: { /* SYNTAX ---------------------------------------- // // function(name) // { // ..... // } */ uint key, keyJmp; /* Check that we got a name */ if(!(ctx.curName && ctx.lenName)) SYNTAX_ERROR("'function' needs a name"); /* We need a opening brace next */ ctx.nextStatement = s_opbrace; key = ARG_MAKE_VALUE(ctx.curKey++); /* Put the function name at current op pos */ if(!addFunction(&pFunctions, ctx.curName, ctx.lenName, key)) RETURN(R_NOMEM); /* OPS -------------------------------------------- // // jmp <1> // pos // push bp // mov bp, r_sp // etc. // ...... // etc. // mov fe, 1 // mov sp, bp // pop bp // ret // <1> */ keyJmp = ARG_MAKE_VALUE(ctx.curKey++); PUSH_OP_1(o_jmp, keyJmp); PUSH_OP_1(o_pos, key); PUSH_OP_1(o_push, r_bp); PUSH_OP_2(o_mov, r_bp, r_sp); PUSH_ROP_1(o_pos, keyJmp); PUSH_ROP(o_ret); PUSH_ROP_1(o_pop, r_bp); PUSH_ROP_2(o_mov, r_sp, r_bp); PUSH_ROP_2(o_mov, r_fe, ARG_MAKE_VALUE(1)); } break; /*////////////////////////////////////////////////////// // loop */ case s_loop: { uint key, key2; /* SYNTAX ---------------------------------------- // // loop // { // ..... // } */ /* We need a opening brace next */ ctx.nextStatement = s_opbrace; /* OPS -------------------------------------------- // // push ac // <1> // mov ac, 0 // ..... // pop x2 pop the previously pushed action value // test ac test current action // jne <2> if action // mov x2, ac then overide previous action value // <2> // push x2 push back changed action value // je <1> // pop ac pop out the */ key = ARG_MAKE_VALUE(ctx.curKey++); key2 = ARG_MAKE_VALUE(ctx.curKey++); PUSH_OP_1(o_push, r_ac); PUSH_OP_1(o_pos, key); PUSH_OP_2(o_mov, r_ac, ARG_MAKE_VALUE(0L)); PUSH_ROP_1(o_pop, r_ac); PUSH_ROP_1(o_je, key); PUSH_ROP_1(o_push, r_x2); PUSH_ROP_1(o_pos, key2); PUSH_ROP_2(o_mov, r_x2, r_ac); PUSH_ROP_1(o_jne, key2); PUSH_ROP_1(o_test, r_ac); PUSH_ROP_1(o_pop, r_x2); } break; /*////////////////////////////////////////////////////// // once */ case s_once: { uint keyOnce, key1, key5; /* SYNTAX ---------------------------------------- // // once // { // ..... // } */ /* We need a opening brace next */ ctx.nextStatement = s_opbrace; /* OPS -------------------------------------------- // // test mem(value) Pull in the flag from memory // jne <1> If not present then jump to containing code // mov fe, 0 Otherwise set fail flag // jmp <5> And skip the match // <1> // ..... // // mov mem(value), 1 // <5> */ keyOnce = ARG_MAKE_MEMORY(ctx.curKey++); key1 = ARG_MAKE_VALUE(ctx.curKey++); key5 = ARG_MAKE_VALUE(ctx.curKey++); /* Once code */ PUSH_OP_1(o_test, keyOnce); PUSH_OP_1(o_jne, key1); PUSH_OP_2(o_mov, r_fe, ARG_MAKE_VALUE(0)); PUSH_OP_1(o_jmp, key5); PUSH_OP_1(o_pos, key1); PUSH_ROP_1(o_pos, key5); PUSH_ROP_2(o_mov, keyOnce, ARG_MAKE_VALUE(1)); } break; /*///////////////////////////////////////////////////// // call */ case s_call: { uint key; /* SYNTAX ---------------------------------------- // // call(name) */ /* Check that we got a name */ if(!(ctx.curName && ctx.lenName)) SYNTAX_ERROR("'call' needs a function name"); /* Find the function */ key = findFunction(pFunctions, ctx.curName, ctx.lenName); if(key == INVALID_PTR) SYNTAX_ERROR("function does not exist"); /* OPS -------------------------------------------- // // call */ PUSH_OP_1(o_call, key); } break; /*/////////////////////////////////////////////////////// // return */ case s_return: { /* SYNTAX ---------------------------------------- // // return(0) */ uint code = ARG_MAKE_VALUE(1); /* Check for the number */ if(ctx.curFlags & f_num) { if(ctx.lenFlagData > 1 || (ctx.curFlagData[0] != '0' && ctx.curFlagData[0] != '1')) SYNTAX_ERROR("Return code must be 0 or 1"); if(ctx.curFlagData[0] == '0') code = ARG_MAKE_VALUE(0); } /* OPS -------------------------------------------- // // mov fe, 0 (or 1 depending on code) // mov sp, r_bp // pop bp // ret */ PUSH_OP_2(o_mov, r_fe, code); PUSH_OP_2(o_mov, r_sp, r_bp); PUSH_OP_1(o_pop, r_bp); PUSH_OP(o_ret); } break; /*////////////////////////////////////////////////////// // end */ case s_end: { /* SYNTAX ---------------------------------------- // // end */ /* OPS -------------------------------------------- // // stop */ PUSH_OP(o_text); pushText(&ctx, NULL, 0); PUSH_OP_1(o_stop, ARG_MAKE_VALUE(0)); } break; /*////////////////////////////////////////////////////// // end */ case s_stop: { /* SYNTAX ---------------------------------------- // // stop "message" */ /* OPS -------------------------------------------- // // text "message" // stop */ PUSH_OP(o_text); pushText(&ctx, ctx.curData, ctx.lenData); PUSH_OP_1(o_stop, ARG_MAKE_VALUE(1)); } break; /*///////////////////////////////////////////////////// // match */ case s_match: { /* SYNTAX ---------------------------------------- // // match(not, once) "regexp" */ uint keyOnce, keyWatermark, keyJmp1, keyJmp2, keyJmp3, keyJmp4, keyJmp5, keyJmp6, keyJmp7, groupNum, key1, key4, key5, key9; /* Get the flags */ bool bNot = ctx.curFlags & f_not ? true : false; bool bOnce = ctx.curFlags & f_once ? true : false; bool bHas = ctx.curFlags & f_find ? true : false; bool bTag = ctx.curFlags & f_tag ? true : false; /* Check that we got data */ if(!ctx.curData || !ctx.lenData) SYNTAX_ERROR("'match' needs a regular expression"); /* We need a opening brace next */ ctx.nextStatement = s_opbrace; regexp = strndup(ctx.curData, ctx.lenData); if(!regexp) RETURN(R_NOMEM); groupNum = 0; if(ctx.curFlagData && ctx.lenFlagData) { if(ctx.lenFlagData > 1) SYNTAX_ERROR("Group specifier must be between 0 and 9."); if(bHas) SYNTAX_ERROR("Group specifier invalid with 'has' flag."); /* Get the number to use */ groupNum = ctx.curFlagData[0] - '0'; } /* OPS -------------------------------------------- // // push x1 Save limits // push y1 " " // test mem(value) (once) Pull in the flag from memory // jne <1> (once) If not present then jump to match code // mov fe, 0 (once) Otherwise set fail flag // jmp <5> (once) And skip the match // <1> (once) // mov x6, mem(key_value) (watermark) Get the watermark // cmp x1, r_x6 (watermark) If watermark higher than match area // mov fe, r_fg (watermark) // jne <2> (watermark) skip // mov x6, r_x1 (watermark) Otherwise bring up watermark to match area // <2> (watermark) // match x6, r_y1 Do match // mov x4, fe // <5> (once) // jne <3> If match failed skip set below // mov ac, 1 Set action flag // cmp cg, 2 See if we have enough groups // mov fe, r_fg "" // jne <7> // mov fe, 0 If not then set failed // jmp <3> And skip to failed part // <7> // mov x1, r_b2 Set new limit for inside // mov y1, e2 different registers (depending on number parameter) // mov x2, r_b0 (watermark) Get the start of batch // add x2, 1 (watermark) Add one to it // mov mem(value), x2 (watermark) Stash it away in memory // <3> // cmp x4, 1 (not) Compare success against 0 or 1 depending on not // push fe // jne <4> Skip if no match // mov mem(value), 1 (once) // ..... // <4> // pop fe // pop y1 // pop x1 */ keyOnce = ARG_MAKE_MEMORY(ctx.curKey++); keyWatermark = ARG_MAKE_STACK(ctx.curKey++); keyJmp1 = ARG_MAKE_VALUE(ctx.curKey++); keyJmp2 = ARG_MAKE_VALUE(ctx.curKey++); keyJmp3 = ARG_MAKE_VALUE(ctx.curKey++); keyJmp4 = ARG_MAKE_VALUE(ctx.curKey++); keyJmp5 = ARG_MAKE_VALUE(ctx.curKey++); keyJmp6 = ARG_MAKE_VALUE(ctx.curKey++); keyJmp7 = ARG_MAKE_VALUE(ctx.curKey++); /* Save for later */ PUSH_OP_1(o_push, r_x1); PUSH_OP_1(o_push, r_y1); if(bOnce) { /* Once code */ PUSH_OP_1(o_test, keyOnce); PUSH_OP_1(o_jne, keyJmp1); PUSH_OP_2(o_mov, r_fe, ARG_MAKE_VALUE(0)); PUSH_OP_1(o_jmp, keyJmp5); PUSH_OP_1(o_pos, keyJmp1); } /* Watermark code */ PUSH_OP_2(o_mov, r_x6, keyWatermark); PUSH_OP_2(o_cmp, r_x1, r_x6); PUSH_OP_2(o_mov, r_fe, r_fg); PUSH_OP_1(o_jne, keyJmp2); PUSH_OP_2(o_mov, r_x6, r_x1); PUSH_OP_1(o_pos, keyJmp2); /* Is it a simple match? */ if(!bTag) { /* Actual match */ PUSH_OP_2(o_match, r_x6, r_y1); retv = pushMatch(script, &ctx, regexp); if(retv < 0) RETURN(retv); } /* Or the very complicated tag statement */ else { /* Split the regular expression */ char* second = splitTagMatch(script, regexp); if(!second) RETURN(R_SYNTAX); /* (See code docs in tag.txt file) */ key1 = ARG_MAKE_VALUE(ctx.curKey++); key4 = ARG_MAKE_VALUE(ctx.curKey++); key5 = ARG_MAKE_VALUE(ctx.curKey++); key9 = ARG_MAKE_VALUE(ctx.curKey++); /* Setup */ PUSH_OP_2(o_mov, r_x2, ARG_MAKE_VALUE(0)); PUSH_OP_2(o_mov, r_x0, r_x6); PUSH_OP_2(o_mov, r_y0, r_y1); /* Top of loop */ PUSH_OP_1(o_pos, key1); /* Start code */ PUSH_OP_2(o_match, r_x0, r_y1); retv = pushMatch(script, &ctx, regexp); if(retv < 0) RETURN(retv); PUSH_OP_1(o_jne, key5); PUSH_OP_2(o_cmp, r_b0, r_y0); PUSH_OP_1(o_je, key5); PUSH_OP_2(o_mov, r_fe, r_fg); PUSH_OP_1(o_je, key5); PUSH_OP_2(o_cmp, r_x2, ARG_MAKE_VALUE(0)); PUSH_OP_1(o_jne, key4); PUSH_OP_2(o_mov, r_x5, r_b0); PUSH_OP_2(o_mov, r_y5, r_e0); PUSH_OP_2(o_mov, r_y0, r_e0); PUSH_OP_1(o_pos, key4); PUSH_OP_2(o_mov, r_x2, ARG_MAKE_VALUE(0)); PUSH_OP_2(o_mov, r_x0, r_e0); /* Endless loop check */ PUSH_OP_2(o_cmp, r_y2, ARG_MAKE_VALUE(0x00100000)); PUSH_OP_1(o_je, key5); /* End code */ PUSH_OP_2(o_match, r_y0, r_y1); retv = pushMatch(script, &ctx, second); if(retv < 0) RETURN(retv); PUSH_OP_1(o_jne, key5); PUSH_OP_2(o_mov, r_y0, r_e0); PUSH_OP_2(o_mov, r_x7, r_b0); PUSH_OP_2(o_mov, r_y7, r_e0); /* Locks check */ PUSH_OP_2(o_check, r_y5, r_x7); PUSH_OP_2(o_mov, r_x2, r_fe); /* End of loop */ PUSH_OP_1(o_jmp, key1); PUSH_OP_1(o_pos, key5); /* Wrap up */ PUSH_OP_2(o_cmp, r_x2, ARG_MAKE_VALUE(0)); PUSH_OP_1(o_je, key9); PUSH_OP_2(o_mov, r_b0, r_x5); PUSH_OP_2(o_mov, r_e0, r_y7); PUSH_OP_2(o_mov, r_b1, r_x5); PUSH_OP_2(o_mov, r_e1, r_y5); PUSH_OP_2(o_mov, r_b2, r_y5); PUSH_OP_2(o_mov, r_e2, r_x7); PUSH_OP_2(o_mov, r_b3, r_x7); PUSH_OP_2(o_mov, r_e3, r_y7); PUSH_OP_2(o_mov, r_cg, ARG_MAKE_VALUE(4)); PUSH_OP_1(o_pos, key9); PUSH_OP_2(o_mov, r_fe, r_x2); } PUSH_OP_2(o_mov, r_x4, r_fe); if(bOnce) PUSH_OP_1(o_pos, keyJmp5); /* Skip all the rest of the setup if failed */ PUSH_OP_1(o_jne, keyJmp3); /* Set action flag */ PUSH_OP_2(o_mov, r_ac, ARG_MAKE_VALUE(1)); if(!bHas) { /* Group validation code */ PUSH_OP_2(o_cmp, ARG_MAKE_VALUE(groupNum + 1), r_cg); PUSH_OP_2(o_mov, r_fe, r_fg); PUSH_OP_1(o_jne, keyJmp7); PUSH_OP_2(o_mov, r_fe, ARG_MAKE_VALUE(0)); PUSH_OP_1(o_jmp, keyJmp3); PUSH_OP_1(o_pos, keyJmp7); /* Now depending on group number set do this we set a set of registers to check */ PUSH_OP_2(o_mov, r_x1, ARG_MAKE_REGISTER(ARG_GET_REGISTER(r_b0) + groupNum)); PUSH_OP_2(o_mov, r_y1, ARG_MAKE_REGISTER(ARG_GET_REGISTER(r_e0) + groupNum)); } /* Watermark */ PUSH_OP_2(o_mov, keyWatermark, r_e0); /* End of setup code */ PUSH_OP_1(o_pos, keyJmp3); /* Implement 'not' */ PUSH_OP_2(o_cmp, r_x4, ARG_MAKE_VALUE(bNot ? 0 : 1)); PUSH_OP_1(o_push, r_fe); PUSH_OP_1(o_jne, keyJmp4); /* Now we're inside */ if(bOnce) PUSH_OP_2(o_mov, keyOnce, ARG_MAKE_VALUE(1)); /* Wrap up code */ PUSH_ROP_1(o_pop, r_x1); PUSH_ROP_1(o_pop, r_y1); PUSH_ROP_1(o_pop, r_fe); PUSH_ROP_1(o_pos, keyJmp4); free(regexp); regexp = NULL; } break; /*////////////////////////////////////////////////////// // replace */ case s_replace: { /* SYNTAX ---------------------------------------- // // replace "replacetext" */ uint key; /* Check that we got data */ if(!ctx.curData) SYNTAX_ERROR("'replace' needs text to replace"); /* OPS -------------------------------------------- // // check x1, r_y1 // jne <1> // repl x1, r_y1 // lock x1, r_y1 // <1> */ key = ARG_MAKE_VALUE(ctx.curKey++); PUSH_OP_2(o_check, r_x1, r_y1); PUSH_OP_1(o_jne, key); PUSH_OP(o_text); pushText(&ctx, ctx.curData, ctx.lenData); PUSH_OP_2(o_repl, r_x1, r_y1); PUSH_OP_2(o_lock, r_x1, r_y1); PUSH_OP_1(o_pos, key); } break; /*////////////////////////////////////////////////////// // lock */ case s_lock: { /* SYNTAX ---------------------------------------- // // lock */ /* OPS -------------------------------------------- // // lock x1, r_y1 */ PUSH_OP_2(o_lock, r_x1, r_y1); } break; /*////////////////////////////////////////////////////// // variable functions */ case s_setvar: case s_clrvar: case s_addvar: { /* SYNTAX ---------------------------------------- // // set name "value" // clr name // add name "value" */ /* Check that we got a name */ if(!(ctx.curName && ctx.lenName)) SYNTAX_ERROR("Missing variable name"); /* Check that we got data */ if((ctx.curStatement == s_setvar || ctx.curStatement == s_addvar) && !ctx.curData) SYNTAX_ERROR("Missing variable value"); /* OPS -------------------------------------------- */ /* If not clearing then push text */ if(ctx.curStatement != s_clrvar) { PUSH_OP(o_text); pushText(&ctx, ctx.curData, ctx.lenData); } /* If not adding then clear */ if(ctx.curStatement != s_addvar) { PUSH_OP(o_clrvar); pushVar(&ctx, ctx.curName, ctx.lenName); } /* If not clearing then set */ if(ctx.curStatement != s_clrvar) { PUSH_OP(o_setvar); pushVar(&ctx, ctx.curName, ctx.lenName); } } break; /*////////////////////////////////////////////////////// // else */ case s_else: { /* SYNTAX ---------------------------------------- // // else // { // ..... // } */ uint key; /* REMOVED the following because now else can follow any statement // // Make sure the previous statement was a match // if(ctx.lastStatement != s_match && ctx.lastStatement != s_replace && // ctx.lastStatement != s_call) // SYNTAX_ERROR("'else' must follow a match, replace or call statement"); */ /* Next statement must be a opening brace */ ctx.nextStatement = s_opbrace; /* OPS -------------------------------------------- // // je <1> // ...... // <1> */ key = ARG_MAKE_VALUE(ctx.curKey++); PUSH_OP_1(o_je, key); PUSH_ROP_1(o_pos, key); } break; /*///////////////////////////////////////////////////// // options */ case s_message: { /* SYNTAX ---------------------------------------- // // message "data" */ /* Check that we got data */ if(!ctx.curData) SYNTAX_ERROR("Missing message text."); /* OPS -------------------------------------------- // // text "message" // stop */ PUSH_OP(o_text); pushText(&ctx, ctx.curData, ctx.lenData); PUSH_OP(o_msg); } break; /*///////////////////////////////////////////////////// // options */ case s_options: { /* SYNTAX ---------------------------------------- // // options(case, line) */ /* Save the options into the context */ ctx.code->curOptions = ctx.curFlags; } break; default: ASSERT(false); } } /* Pop out of any implied blocks */ while(ctx.code->curContext & SYNTAX_IMPLIED) POP_CODESTACK(&ctx); /* copy any remaining post ops */ POP_CODESTACK(&ctx); /* This is the bottom of the main loop */ PUSH_OP_1(o_test, r_ac); PUSH_OP_1(o_je, ARG_MAKE_VALUE(0)); /* Put an end marker */ PUSH_OP(o_end); /* Check brace syntax */ if(ctx.code->pPrev != NULL) SYNTAX_ERROR("Not all braces matched."); /* Put the compiled script in the script */ script->ops = ctx.beg; script->len = ctx.cbops; /* This fixes all the jmp and removes pos */ retv = compilerOptimize(script); cleanup: if(retv == R_SYNTAX || retv == R_REGEXP) script->errline = getLine(data, ctx.in) - 1; /* Unwind codestack */ while(ctx.code) ctx.code = popCodeStack(ctx.code); if(pFunctions) free(pFunctions); if(regexp) free(regexp); if(retv < R_OK) { if(ctx.beg) free(ctx.beg); script->ops = NULL; script->len = 0; } return retv; } int compilerOptimize(r_script* scr) { vmop_t* op = scr->ops; int retv = R_OK; /* First find and remove all pos // NOTE: land and pos are used interchangeably in this code */ uint* lands = NULL; uint cur = 0; uint alloc = 0; while(*op != o_end) { ASSERT(op < scr->ops + scr->len); switch(*op) { case o_pos: { if(alloc <= cur) { alloc += 0x40; lands = (uint*)reallocf(lands, sizeof(uint) * 2 * alloc); if(!lands) RETURN(R_NOMEM); } ASSERT(ARG_TYPE(op[1]) == ARG_VAL_TYPE); /* Position in 0 */ lands[cur * 2] = op - scr->ops; /* key in 1 */ lands[(cur * 2) + 1] = *((uint*)(op + 1)); cur++; /* Okay now eat the rest of the stuff total length of a pos should be 5 bytes */ scr->len -= 5; memmove(op, op + 5, scr->len - (op - scr->ops)); } break; default: opsIterate(&op); break; }; } /* Now fix all jumps and calls */ op = scr->ops; while(*op != o_end) { ASSERT(op < scr->ops + scr->len); switch(*op) { case o_pos: /* Shouldn't meet any pos ops after we removed them above */ ASSERT(false); break; case o_jmp: case o_jne: case o_je: case o_call: { uint* parg = ((uint*)(op + 1)); bool found = false; size_t i; /* find the key in our array */ for(i = 0; i < cur; i++) { if(lands[(i * 2) + 1] == *parg) { found = true; *parg = ARG_MAKE_VALUE(lands[i * 2]); break; } } if(!found) ASSERT(false && "jump without a pos"); } /* (Note fall through) */ default: opsIterate(&op); break; } } cleanup: if(lands) free(lands); return retv; } void opsIterate(vmop_t** ops) { vmop_t op = *(*(ops)); (*ops)++; /* increment *ops to next op point */ switch(op) { /* ops without arguments */ case o_end: case o_nop: case o_ret: case o_msg: break; /* ops with one argument */ case o_push: case o_pop: case o_jmp: case o_je: case o_jne: case o_test: case o_call: case o_stop: INC_ARGUMENT(*ops); break; /* ops with two arguments */ case o_lock: case o_check: case o_cmp: case o_add: case o_sub: case o_mov: case o_repl: INC_ARGUMENT(*ops); INC_ARGUMENT(*ops); break; /* Special cases */ case o_match: { match_op* op; INC_ARGUMENT(*ops); INC_ARGUMENT(*ops); op = (match_op*)(*ops); (*ops) += match_op_size(*op); } break; case o_setvar: case o_clrvar: { var_op* op = (var_op*)(*ops); (*ops) += var_op_size(*op); } break; case o_text: { text_op* op; op = (text_op*)(*ops); (*ops) += text_op_size(*op); } break; default: ASSERT(false); }; } /* TODO: individual ops do not need to be freed any longer. Execution no longer changes them. */ /* Frees a set of ops */ int opsFree(vmop_t* ops, size_t len) { byte* cur = ops; if(len == 0) len = ~0; while(cur < ops + len) { switch(*cur) { case o_end: goto done; default: opsIterate(&cur); break; } } done: free(ops); return R_OK; } static const char* getOpName(vmop_t op) { #define RETOPNAME(r) case o_##r: return #r; switch(op) { RETOPNAME(end); RETOPNAME(stop); RETOPNAME(nop); RETOPNAME(ret); RETOPNAME(push); RETOPNAME(pop); RETOPNAME(jmp); RETOPNAME(je); RETOPNAME(jne); RETOPNAME(test); RETOPNAME(call); RETOPNAME(lock); RETOPNAME(check); RETOPNAME(cmp); RETOPNAME(add); RETOPNAME(sub); RETOPNAME(mov); RETOPNAME(match); RETOPNAME(repl); RETOPNAME(setvar); RETOPNAME(clrvar); RETOPNAME(pos); RETOPNAME(text); RETOPNAME(msg); default: return ""; } } static const char* getRegisterName(byte reg) { #define RETREGNAME(r) case r_##r: return #r; switch(reg) { RETREGNAME(fe); RETREGNAME(fg); RETREGNAME(fl); RETREGNAME(ac); RETREGNAME(sp); RETREGNAME(bp); RETREGNAME(b0); RETREGNAME(b1); RETREGNAME(b2); RETREGNAME(b3); RETREGNAME(b4); RETREGNAME(b5); RETREGNAME(b6); RETREGNAME(b7); RETREGNAME(b8); RETREGNAME(b9); RETREGNAME(e0); RETREGNAME(e1); RETREGNAME(e2); RETREGNAME(e3); RETREGNAME(e4); RETREGNAME(e5); RETREGNAME(e6); RETREGNAME(e7); RETREGNAME(e8); RETREGNAME(e9); RETREGNAME(cg); RETREGNAME(x0); RETREGNAME(x1); RETREGNAME(x2); RETREGNAME(x3); RETREGNAME(x4); RETREGNAME(x5); RETREGNAME(x6); RETREGNAME(x7); RETREGNAME(y0); RETREGNAME(y1); RETREGNAME(y2); RETREGNAME(y3); RETREGNAME(y4); RETREGNAME(y5); RETREGNAME(y6); RETREGNAME(y7); default: return ""; } }; void dumpArgument(FILE* f, vmop_t* ops) { switch(ARG_TYPE(*ops)) { case ARG_VAL_TYPE: fprintf(f, "0x%06x", ARG_GET_VALUE(*((uint*)ops))); break; case ARG_MEM_TYPE: fprintf(f, "", ARG_GET_VALUE(*((uint*)ops))); break; case ARG_STACK_TYPE: fprintf(f, "", ARG_GET_VALUE(*((uint*)ops))); break; case ARG_REG_TYPE: fprintf(f, getRegisterName(*ops)); break; default: ASSERT(false); } } /* Dump a string of ops to a stream */ int opsDump(vmop_t* ops, FILE* f) { vmop_t* beg = ops; while(*ops != o_end) { vmop_t op = *ops; fprintf(f, "%06x: %s ", (ops - beg), getOpName(op)); ops++; /* Now the arguments */ switch(op) { case o_push: case o_pop: case o_jmp: case o_je: case o_jne: case o_test: case o_call: case o_stop: case o_pos: dumpArgument(f, ops); INC_ARGUMENT(ops); break; case o_lock: case o_check: case o_cmp: case o_add: case o_sub: case o_mov: case o_match: case o_repl: dumpArgument(f, ops); fprintf(f, ", "); INC_ARGUMENT(ops); dumpArgument(f, ops); INC_ARGUMENT(ops); break; }; /* Now any additional data */ switch(op) { case o_match: { match_op* op = (match_op*)ops; if(op->type & kMatchPcre) { match_op_pcre* pcre = (match_op_pcre*)op; fprintf(f, " %s", pcre->pattern); } else { fprintf(f, " "); } ops += match_op_size(*op); } break; case o_setvar: case o_clrvar: { var_op* vop = (var_op*)ops; fprintf(f, " <%%%s>", vop->name); ops += var_op_size(*vop); } break; case o_test: { text_op* op = (text_op*)ops; fprintf(f, " <%s>", op->string); ops += text_op_size(*op); } break; }; fprintf(f, "\n"); } return R_OK; }