summaryrefslogtreecommitdiff
path: root/lib/compile.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/compile.c')
-rw-r--r--lib/compile.c2337
1 files changed, 2337 insertions, 0 deletions
diff --git a/lib/compile.c b/lib/compile.c
new file mode 100644
index 0000000..e92a2f6
--- /dev/null
+++ b/lib/compile.c
@@ -0,0 +1,2337 @@
+/*
+ * AUTHOR
+ * N. Nielsen
+ *
+ * LICENSE
+ * This software is in the public domain.
+ *
+ * The software is provided "as is", without warranty of any kind,
+ * express or implied, including but not limited to the warranties
+ * of merchantability, fitness for a particular purpose, and
+ * noninfringement. In no event shall the author(s) be liable for any
+ * claim, damages, or other liability, whether in an action of
+ * contract, tort, or otherwise, arising from, out of, or in connection
+ * with the software or the use or other dealings in the software.
+ *
+ * SUPPORT
+ * Send bug reports to: <nielsen@memberwebs.com>
+ */
+
+/* ----------------------------------------------------------------------
+// Recipe Compiler
+// 2000-2002 Copyright, Nate Nielsen
+*/
+
+
+#include <stdarg.h>
+#include "common/usuals.h"
+#include "common/compat.h"
+#include "lib/rlib.h"
+#include "priv.h"
+#include "execute.h"
+#include "ops.h"
+
+const byte kEncUTF8[] = { 0xEF, 0xBB, 0xBF };
+const byte kEncUCS2_L[] = { 0xFF, 0xFE };
+const byte kEncUCS2_R[] = { 0xFE, 0xFF };
+const byte kEncUCS4_L[] = { 0xFF, 0xFE, 0x00, 0x00 };
+const byte kEncUCS4_R[] = { 0x00, 0x00, 0xFE, 0xFF };
+
+/* Flags Syntax: -----------------------------------------------------------------
+ */
+
+/* All flags for available to statements */
+typedef enum _syn_flags
+{
+ f_not = 0x0001, /* match: Reverse effect */
+ f_once = 0x0002, /* match: Only execute once */
+ f_case = 0x0004, /* options: Case sensitive */
+ f_line = 0x0010, /* options: Limit to line matches */
+ f_find = 0x0020, /* match: match but don't move limits */
+ f_tag = 0x0040, /* tag: use special tag matching */
+
+ /* Special cases */
+ f_num = 0x0400 /* represents a number */
+}
+syn_flags;
+
+
+/* Listing of all flags and their text representations */
+typedef struct _flagmap
+{
+ syn_flags flag;
+ const char* text;
+}
+flagmap;
+
+flagmap kAllFlags[] = {
+ { f_not, "not" },
+ { f_once, "once" },
+ { f_case, "case" },
+ { f_line, "line" },
+ { f_find, "find" },
+ { f_tag, "tag" },
+
+ /* Special cases */
+ { f_num, "" }
+};
+
+
+/* Statement Syntax: --------------------------------------------------------------
+ */
+
+typedef enum _syn_statements
+{
+ s_none,
+ s_function,
+ s_loop,
+ s_once,
+ s_options,
+ s_call,
+ s_return,
+ s_end,
+ s_stop,
+ s_match,
+ s_replace,
+ s_lock,
+ s_setvar,
+ s_addvar,
+ s_clrvar,
+ s_message,
+ s_else,
+ s_opbrace,
+ s_clbrace,
+ s_implied
+}
+syn_statements;
+
+typedef struct _syntaxmap
+{
+ syn_statements syntax; /* id */
+ const char* text; /* text representation */
+ uint flags; /* which flags are valid */
+ uint args; /* Does it have arguments (either a name or a data block) */
+ uint context; /* In which context valid */
+}
+syntaxmap;
+
+/* Context values */
+#define SYNTAX_INROOT 0x00000001 /* Only allowed in the root script */
+#define SYNTAX_INBRACE 0x00000004 /* Only allowed inside braces */
+#define SYNTAX_INFUNCTION 0x00000008 /* Only allowed in a function */
+#define SYNTAX_BLOCK 0x00000010 /* The statement is start of a block */
+#define SYNTAX_IMPLIED 0x00000020 /* We're in an implied block */
+#define SYNTAX_SPECIAL 0x00000100 /* Syntax not determined by parser */
+#define SYNTAX_ANYWHERE (SYNTAX_INROOT | SYNTAX_INBRACE | SYNTAX_INFUNCTION)
+
+/* Argument values */
+#define ARGUMENT_NAME 0x00000001
+#define ARGUMENT_DATA 0x00000002
+
+syntaxmap kAllStatements[] = {
+ { s_none, "", 0, 0, SYNTAX_SPECIAL },
+ { s_function, "function", 0, ARGUMENT_NAME, SYNTAX_INROOT },
+ { s_loop, "loop", 0, 0, SYNTAX_ANYWHERE | SYNTAX_BLOCK },
+ { s_once, "once", 0, 0, SYNTAX_ANYWHERE | SYNTAX_BLOCK },
+ { s_options, "options", f_case | f_line, 0, SYNTAX_ANYWHERE },
+ { s_call, "call", 0, ARGUMENT_NAME, SYNTAX_ANYWHERE },
+ { s_return, "return", f_num, 0, SYNTAX_INFUNCTION },
+ { s_end, "end", 0, 0, SYNTAX_ANYWHERE },
+ { s_stop, "stop", 0, ARGUMENT_DATA, SYNTAX_ANYWHERE },
+ { s_match, "match", f_not|f_once|f_num|f_find|f_tag,ARGUMENT_DATA, SYNTAX_ANYWHERE | SYNTAX_BLOCK },
+ { s_replace, "replace", 0, ARGUMENT_DATA, SYNTAX_ANYWHERE },
+ { s_lock, "lock", 0, 0, SYNTAX_ANYWHERE },
+ { s_setvar, "set", 0, ARGUMENT_DATA | ARGUMENT_NAME, SYNTAX_ANYWHERE },
+ { s_addvar, "add", 0, ARGUMENT_DATA | ARGUMENT_NAME, SYNTAX_ANYWHERE },
+ { s_clrvar, "clr", 0, ARGUMENT_NAME, SYNTAX_ANYWHERE },
+ { s_message, "message", 0, ARGUMENT_DATA, SYNTAX_ANYWHERE },
+ { s_else, "else", 0, 0, SYNTAX_ANYWHERE | SYNTAX_BLOCK },
+
+ /* special cases */
+ { s_opbrace, "{", 0, 0, SYNTAX_SPECIAL },
+ { s_clbrace, "}", 0, 0, SYNTAX_SPECIAL },
+ { s_implied, "", 0, 0, SYNTAX_SPECIAL }
+};
+
+
+
+
+/* Syntax Constants: -------------------------------------------------------------
+ */
+
+const char* kValidNum = "0123465798";
+const char* kValidDelim = "\"~`!@#$%^&*[]|'<>./?+=-;:";
+const char* kValidBrace = "{}";
+const char kFlagsStart = '(';
+const char kFlagsEnd = ')';
+const char kFlagDelim = ',';
+const char kTagDelim = '=';
+const char kEscapeChar = '\\';
+
+/* Maximum size of an identifier */
+#define kMaxIdentifier 40
+
+
+#define INVALID_PTR 0xFFFFFFFF
+
+
+
+/* ----------------------------------------------------------------------
+// CODE and COMPILE FUNCTIONALITY
+*/
+
+const size_t OPS_BUFFER_SIZE = 0x1000;
+
+
+/* codestack: We use one of these for each level of braces. Helps
+// us maintain context. */
+
+typedef struct _codestack
+{
+ uint curContext; /* Current parse context */
+ uint curOptions; /* Current options in use */
+ uint numStatements; /* Number of statements in this block */
+
+ size_t insPos; /* The position to insert code */
+ size_t endPos; /* End of code owned by current codestack */
+
+ struct _codestack* pPrev; /* previous stack (if in list) */
+}
+codestack;
+
+/* Add a new post op buffer and stash away current */
+#define PUSH_CODESTACK(c) \
+do { \
+ (c)->code = pushCodeStack((c)->code); \
+ if(!(c)->code) RETURN(R_NOMEM); \
+} while(0)
+
+/* Dump current post op buffer and get previous */
+#define POP_CODESTACK(c) \
+do { \
+ commitCodeStack((c)->code); \
+ (c)->code = popCodeStack((c)->code); \
+} while (0) \
+
+
+/* Initialize a new post ops buffer and hook into previous */
+static codestack* pushCodeStack(codestack* prev)
+{
+ codestack* code = (codestack*)malloc(sizeof(codestack));
+ if(!code)
+ return NULL;
+
+ memset(code, 0, sizeof(codestack));
+
+ if(prev)
+ {
+ /* Things to carry over from previous */
+ code->insPos = code->endPos = prev->insPos;
+ code->curOptions = prev->curOptions;
+ code->curContext = prev->curContext;
+ }
+ else
+ {
+ code->curContext = SYNTAX_INROOT;
+ }
+
+ /* Init the context stuff */
+
+
+ code->pPrev = prev;
+ return code;
+}
+
+/* Free a post op buffer and return previous */
+static codestack* popCodeStack(codestack* code)
+{
+
+ codestack* prev = code->pPrev;
+
+ if(code->pPrev)
+ {
+
+ /* The insertion position needs fixing ... */
+ /* calc offset */
+ size_t offset = code->insPos - prev->insPos;
+ prev->insPos += offset;
+ prev->endPos += offset;
+ }
+
+ free(code);
+
+ return prev;
+}
+
+#define commitCodeStack(code) ((code)->insPos = (code)->endPos)
+
+
+/* compilecontext: The main compile state. Only one used throughout
+// compilation
+*/
+typedef struct _compilecontext
+{
+ syn_statements lastStatement; /* Last statement we had */
+ syn_statements nextStatement; /* Next statement we're expecting */
+
+ syn_statements curStatement; /* Current statement: */
+ const char* curName; /* - The Name */
+ size_t lenName; /* - Length of the name */
+ uint curFlags; /* - Flags */
+ const char* curFlagData; /* - Extra flag (f_num) */
+ size_t lenFlagData; /* - Length of extra flag */
+ const char* curData; /* - Data */
+ size_t lenData; /* - Length of data */
+
+
+ const char* in; /* Next location to compile in script */
+ long curKey; /* a unique key (id) which gets incremented */
+ /* for various uses such as jumps and calls */
+
+ vmop_t* beg; /* start of block of output code */
+ size_t cbops; /* number of bytes of output code */
+ size_t alloc; /* number of bytes allocated for output */
+
+ codestack* code; /* Current codestack */
+ bool failed : 1; /* did the last memory allocation fail? */
+}
+compilecontext;
+
+
+
+/* -------------------------------------------------------------------
+// POS OP
+//
+// the 'pos' op is a temporary place holder while compiling
+// it takes one value parameter and it's value is the same as is
+// temporarily placed in jmp, je, jne or call
+// it's removed and jumps are patched in the optimization stage
+*/
+
+#define o_pos ((vmop_t)(0xFF))
+/* const vmop_t pos = 0xFF; */
+
+
+
+/* -------------------------------------------------------------------
+// OPS CODING FUNCTIONALITY
+*/
+
+/* These macros can only be used within repCompile */
+
+#define PUSH_OP(op) \
+ pushValues(&ctx, true, sizeof(byte), op, 0)
+
+#define PUSH_OP_1(op, arg) \
+ pushValues(&ctx, true, sizeof(byte), op, sizeof(arg), arg, 0)
+
+#define PUSH_OP_2(op, arg, arg2) \
+ pushValues(&ctx, true, sizeof(byte), op, sizeof(arg), arg, sizeof(arg2), arg2, 0)
+
+/* Add ops in reverse */
+#define PUSH_ROP(op) \
+ pushValues(&ctx, false, sizeof(byte), op, 0)
+
+#define PUSH_ROP_1(op, arg) \
+ pushValues(&ctx, false, sizeof(byte), op, sizeof(arg), arg, 0)
+
+#define PUSH_ROP_2(op, arg, arg2) \
+ pushValues(&ctx, false, sizeof(byte), op, sizeof(arg), arg, sizeof(arg2), arg2, 0)
+
+
+/* Get more stack space for ops */
+static void moreOutput(compilecontext* ctx)
+{
+ /* Reallocate */
+ ctx->beg = (byte*)reallocf(ctx->beg, ctx->alloc + OPS_BUFFER_SIZE);
+ ctx->alloc += OPS_BUFFER_SIZE;
+
+ /* Set flag if failed */
+ if(!ctx->beg)
+ ctx->failed = true;
+}
+
+/* Allocate a specific amount from the stack */
+#define allocOutput(ctx, len) pushData(ctx, NULL, len, true)
+
+
+
+
+/* Push any amount of data on ops buffer */
+static void* pushData(compilecontext* ctx, const void* data, size_t len, bool forward)
+{
+ void* pIns;
+
+ if(ctx->cbops + len >= ctx->alloc)
+ moreOutput(ctx);
+
+ if(ctx->failed || !len)
+ return NULL;
+
+ pIns = ctx->beg + ctx->code->insPos;
+
+ /* Make space at insertion point */
+ memmove(ctx->beg + ctx->code->insPos + len, ctx->beg + ctx->code->insPos,
+ ctx->cbops - ctx->code->insPos);
+
+ /* If we have data copy it in */
+ if(data)
+ memcpy(pIns, data, len);
+
+ /* In debug mode clear it */
+#ifdef _DEBUG
+ else
+ memset(pIns, 0xCC, len);
+#endif
+
+ ctx->cbops += len;
+ ctx->code->endPos += len;
+
+ if(forward)
+ ctx->code->insPos += len;
+
+ /* Only return value if no input data */
+ return data ? NULL : pIns;
+}
+
+
+static void pushValues(compilecontext* ctx, bool forward, ...)
+{
+ va_list ap;
+ size_t len;
+
+ #define VAL_BUF 20
+ byte buff[VAL_BUF];
+ size_t cur = 0;
+
+ va_start(ap, forward);
+ while(len = va_arg(ap, size_t))
+ {
+ if(cur + len > VAL_BUF)
+ {
+ pushData(ctx, buff, cur, forward);
+ cur = 0;
+ }
+
+ switch(len)
+ {
+ case 1:
+ buff[cur] = va_arg(ap, byte);
+ break;
+ case 2:
+ *((unsigned short*)(buff + cur)) = va_arg(ap, unsigned short);
+ break;
+ case 4:
+ *((unsigned int*)(buff + cur)) = va_arg(ap, unsigned int);
+ break;
+ default:
+ ASSERT(false);
+ break;
+ }
+
+ cur += len;
+ }
+
+ pushData(ctx, buff, cur, forward);
+}
+
+static int testRegexp(r_script* script, const char* regexp, short options)
+{
+ int erroroffset;
+ const char* error = NULL;
+ pcre* re = pcre_compile(regexp, options, &error, &erroroffset, NULL);
+
+ if(!re)
+ {
+ if(error)
+ scriptSetError(script, error);
+ return R_REGEXP;
+ }
+ else
+ {
+ free(re);
+ return R_OK;
+ }
+}
+
+/* ----------------------------------------------------------------------
+// Functions for pushing specific types of ops on the stack
+*/
+static int pushMatch(r_script* script, compilecontext* ctx, const char* regexp)
+{
+ int ret;
+
+ /* Allocate */
+ size_t len = sizeof(match_op_pcre) + (sizeof(char) * strlen(regexp));
+ match_op_pcre* op = (match_op_pcre*)allocOutput(ctx, len);
+
+ /* Setup op */
+ if(!op)
+ return R_NOMEM;
+
+ memset(op, 0, len);
+ op->header.len = len;
+ op->header.type = kMatchPcre;
+
+ op->options = PCRE_DOLLAR_ENDONLY;
+
+ /* TODO: Do we need to make an option for PCRE_MULTILINE? */
+ if(!(ctx->code->curOptions & f_line))
+ op->options |= PCRE_DOTALL;
+ if(!(ctx->code->curOptions & f_case))
+ op->options |= PCRE_CASELESS;
+
+ ret = testRegexp(script, regexp, op->options);
+ if(ret < 0)
+ return ret;
+
+ /* Copy the uncompiled regular expression onto the ops stack */
+ strcpy(op->pattern, regexp);
+
+ return R_OK;
+}
+
+static void pushText(compilecontext* ctx, const char* string, size_t len)
+{
+ /* Allocate */
+ text_op* op = (text_op*)allocOutput(ctx, sizeof(text_op) + (sizeof(char) * (len)));
+
+ /* Setup op */
+ if(op)
+ {
+ op->len = len;
+
+ /* Copy the replacement string onto the ops stack */
+ /* TODO: Get this ready for binary replacements */
+ strncpy((char*)op->string, string, len);
+ op->string[len] = 0;
+ }
+}
+
+static void pushVar(compilecontext* ctx, const char* name, size_t lenName)
+{
+ /* Allocate */
+ size_t len = lenName + 1;
+ var_op* op = (var_op*)allocOutput(ctx, sizeof(var_op) + (sizeof(char) * len));
+
+ /* Setup op */
+ if(op)
+ {
+ op->len = len;
+
+ /* Copy the variable name onto the ops stack */
+ /* TODO: Get this ready for binary replacements */
+ memcpy(op->name, name, lenName);
+ op->name[lenName] = 0;
+ }
+}
+
+
+/* ----------------------------------------------------------------------
+// FUNCTIONS
+*/
+
+/* Keeps track of current functions seen */
+typedef struct funcdef
+{
+ char name[kMaxIdentifier + 1];
+ uint key;
+}
+funcdef;
+
+typedef struct _funcdefs
+{
+ uint alloc;
+ uint cur;
+ funcdef defs[1];
+}
+funcdefs;
+
+/* Add a function to the stack */
+static bool addFunction(funcdefs** ppdefs, const char* name, size_t len, uint key)
+{
+ /* Do allocation if necessary */
+ if(!*ppdefs || (*ppdefs)->cur >= (*ppdefs)->alloc)
+ {
+ uint alloc = *ppdefs ? (*ppdefs)->alloc : 0;
+ uint cur = *ppdefs ? (*ppdefs)->cur : 0;
+
+ alloc += 0x10;
+
+ *ppdefs = (funcdefs*)reallocf(*ppdefs, sizeof(funcdefs) + (sizeof(funcdef) * alloc));
+ if(!*ppdefs) return false;
+
+ (*ppdefs)->alloc = alloc;
+ (*ppdefs)->cur = cur;
+ }
+
+ /* Push it on the back */
+ strlcpy((*ppdefs)->defs[(*ppdefs)->cur].name, name, min(kMaxIdentifier, len) + 1);
+ (*ppdefs)->defs[(*ppdefs)->cur].key = key;
+ (*ppdefs)->cur++;
+
+ return true;
+}
+
+/* Check if a function exists */
+static uint findFunction(funcdefs* pdefs, const char* name, size_t len)
+{
+ if(pdefs)
+ {
+ char funcname[kMaxIdentifier + 1];
+ size_t i;
+
+ strlcpy(funcname, name, min(len, kMaxIdentifier) + 1);
+
+ /* Just loop through and compare names */
+ for(i = 0; i < pdefs->cur; i++)
+ {
+ if(!strcasecmp(pdefs->defs[i].name, funcname))
+ return pdefs->defs[i].key;
+ }
+ }
+
+ return INVALID_PTR;
+}
+
+/* ----------------------------------------------------------------------
+// SYNTAX FUNCTIONS
+*/
+
+/* Is a character escaped or not? */
+bool isEscaped(const char* str, const char* posi)
+{
+ /*
+ Checks for a backslash before
+ but note that backslash can be escaped to so...
+ */
+ bool bEscaped = false;
+ while(posi > str && posi[-1] == kEscapeChar)
+ {
+ bEscaped = !bEscaped;
+ posi--;
+ }
+
+ return bEscaped;
+}
+
+
+/* Split a tag match into two for later use */
+char* splitTagMatch(r_script* script, char* regexp)
+{
+ char* second = regexp;
+ while(second = strchr(second, kTagDelim))
+ {
+ uint escs = 0;
+ if(!isEscaped(regexp, second))
+ {
+ second[0] = '\0';
+ return second + 1;
+ }
+
+ second++;
+ }
+
+ scriptSetError(script, "Couldn't find tags in match (separate with '%c').", kTagDelim);
+ return NULL;
+}
+
+
+/* Eat spaces and comments */
+static bool compileSpace(compilecontext* ctx)
+{
+ /* Eat white space and comments here */
+ while(isspace(ctx->in[0]) || ctx->in[0] == '#')
+ {
+ /* Comments ... */
+ if(ctx->in[0] == '#')
+ {
+ /* Eat rest off line */
+ while(ctx->in[0] != '\n' && ctx->in[0] != '\0')
+ ctx->in++;
+ }
+
+ ctx->in++;
+ }
+
+ /* Return true if not end of file */
+ return ctx->in[0] != '\0';
+}
+
+int compileEncoding(r_script* script, compilecontext* ctx)
+{
+ if(!memcmp(ctx->in, kEncUTF8, countof(kEncUTF8)))
+ {
+ ctx->in += countof(kEncUTF8);
+ }
+ else if(!memcmp(ctx->in, kEncUCS2_L, countof(kEncUCS2_L)) ||
+ !memcmp(ctx->in, kEncUCS2_R, countof(kEncUCS4_R)) ||
+ !memcmp(ctx->in, kEncUCS4_L, countof(kEncUCS4_L)) ||
+ !memcmp(ctx->in, kEncUCS4_R, countof(kEncUCS4_R)))
+ {
+ scriptSetError(script, "unsupported unicode encoding");
+ return R_SYNTAX;
+ }
+
+ return R_OK;
+}
+
+/* Compile a single statement */
+int compileStatement(r_script* script, compilecontext* ctx)
+{
+ /* Some MACROS */
+ #define SYNTAX_ERROR(s) \
+ do{ \
+ scriptSetError(script, s); \
+ RETURN(R_SYNTAX); \
+ } while(0)
+
+ #define SYNTAX_ERROR_1(s, a1) \
+ do{ \
+ scriptSetError(script, s, a1); \
+ RETURN(R_SYNTAX); \
+ } while(0)
+
+ /* Jump to cleanup label instead of return */
+ #define RETURN(r) \
+ do { \
+ retv = r; \
+ goto cleanup; \
+ } while (0)
+
+ const char* end;
+ int retv = R_OK;
+
+ {
+ int i;
+
+ if(ctx->curStatement != s_opbrace && ctx->curStatement != s_clbrace)
+ ctx->lastStatement = ctx->curStatement;
+
+ ctx->curStatement = s_none;
+ ctx->curName = NULL;
+ ctx->lenName = 0;
+ ctx->curFlags = 0;
+ ctx->curFlagData = NULL;
+ ctx->lenFlagData = 0;
+ ctx->curData = NULL;
+ ctx->lenData = 0;
+
+
+ do
+ {
+ /* Eat all whitespace and comments */
+ compileSpace(ctx);
+
+ /* Check for end of input */
+ if(ctx->in[0] == 0)
+ RETURN(R_OK);
+
+ /* Check for Statement */
+ if(!strcspn(ctx->in, kValidIdentifier))
+ end = ctx->in + strspn(ctx->in, kValidIdentifier);
+
+ /* Check for braces */
+ else if(!strcspn(ctx->in, kValidBrace))
+ end = ctx->in + 1; /* Brace always just one character */
+
+ /* Anything else is bad */
+ else
+ SYNTAX_ERROR_1("Unexpected character \'%c\'.", ctx->in[0]);
+
+ /* Okay now determine which statement we got */
+ for(i = 0; i < countof(kAllStatements); i++)
+ {
+ if(!strncasecmp(kAllStatements[i].text, ctx->in, end - ctx->in))
+ {
+ /* Check the context */
+ if(kAllStatements[i].context != SYNTAX_SPECIAL &&
+ !(ctx->code->curContext & kAllStatements[i].context))
+ SYNTAX_ERROR_1("\'%s\' not allowed here.", kAllStatements[i].text);
+
+ ctx->curStatement = kAllStatements[i].syntax;
+ break;
+ }
+ }
+
+
+ /* Check that we got a statement
+ if not then it should be a function call */
+ if(ctx->curStatement == s_none)
+ {
+ ctx->curStatement = s_call;
+ ctx->curName = ctx->in;
+ ctx->lenName = end - ctx->in;
+ }
+
+
+ /* Make sure it's what we were expecting */
+ if(ctx->nextStatement != s_none &&
+ ctx->nextStatement != ctx->curStatement)
+ {
+ /* If braces were required but not found, then do an impliedBlock */
+ if(ctx->nextStatement == s_opbrace &&
+ kAllStatements[ctx->lastStatement].context & SYNTAX_BLOCK)
+ {
+ ctx->nextStatement = ctx->curStatement;
+ ctx->curStatement = s_implied;
+ end = ctx->in;
+ }
+
+ /* Otherwise it's an error */
+ else
+ {
+ SYNTAX_ERROR_1("Expecting \'%s\' here.", kAllStatements[ctx->nextStatement].text);
+ }
+ }
+
+
+ ctx->in = end;
+ ctx->nextStatement = s_none;
+
+ /* We process braces here */
+ if(ctx->curStatement == s_opbrace ||
+ ctx->curStatement == s_implied)
+ {
+ PUSH_CODESTACK(ctx);
+ ctx->code->curContext |= SYNTAX_INBRACE;
+
+ if(ctx->lastStatement == s_function)
+ ctx->code->curContext |= SYNTAX_INFUNCTION;
+
+ if(ctx->curStatement == s_implied)
+ ctx->code->curContext |= SYNTAX_IMPLIED;
+
+ ctx->curStatement = s_none;
+ continue;
+ }
+
+ /* See if we need to pop any codestacks for implied blocks */
+ while(ctx->code->curContext & SYNTAX_IMPLIED &&
+ ctx->code->numStatements > 0)
+ {
+ /* Only should be one statement on an implied block */
+ ASSERT(ctx->code->numStatements == 1);
+ POP_CODESTACK(ctx);
+ }
+
+ /* Closing braces here */
+ if(ctx->curStatement == s_clbrace)
+ {
+ POP_CODESTACK(ctx);
+ ctx->curStatement = s_none;
+ continue;
+ }
+
+ }
+ while(ctx->curStatement == s_none);
+
+
+ /* Okay now do name if we have one */
+ if(kAllStatements[ctx->curStatement].args & ARGUMENT_NAME && !ctx->curName)
+ {
+ compileSpace(ctx);
+
+ /* Now look for a normal or name flag */
+ if(!strcspn(ctx->in, kValidIdentifier))
+ {
+ end = ctx->in + strspn(ctx->in, kValidIdentifier);
+
+ /* Any flags not found assume it's an identifier */
+ ctx->curName = ctx->in;
+ ctx->lenName = end - ctx->in;
+
+ if(ctx->lenName > kMaxIdentifier)
+ SYNTAX_ERROR("Maximum length for an identifier is 40 characters.");
+
+ ctx->in = end;
+ }
+ }
+
+
+ /* Eat the next little bit of whitespace */
+ compileSpace(ctx);
+
+ /* Okay now look for flags start */
+ if(ctx->in[0] == kFlagsStart)
+ {
+ ctx->in++;
+
+ while(1)
+ {
+ compileSpace(ctx);
+
+
+ /* Check for a number flag */
+ if(!strcspn(ctx->in, kValidNum))
+ {
+ if(ctx->curFlagData)
+ SYNTAX_ERROR("Invalid flags.");
+
+ /* If found then just grab and go */
+ ctx->curFlags |= f_num;
+ ctx->curFlagData = ctx->in;
+ ctx->lenFlagData = strspn(ctx->in, kValidNum);
+
+ ctx->in += ctx->lenFlagData;
+ }
+
+ /* Now look for a normal flag */
+ else if(!strcspn(ctx->in, kValidIdentifier))
+ {
+ bool found = false;
+ end = ctx->in + strspn(ctx->in, kValidIdentifier);
+
+ /* Okay now try and map out that flag */
+ for(i = 0; i < countof(kAllFlags); i++)
+ {
+ if(!strncasecmp(kAllFlags[i].text, ctx->in, end - ctx->in))
+ {
+ found = true;
+ ctx->curFlags |= kAllFlags[i].flag;
+ break;
+ }
+ }
+
+ /* Any flags not found assume it's an identifier */
+ if(!found)
+ {
+ if(ctx->curName != NULL)
+ SYNTAX_ERROR("Invalid flags.");
+
+ ctx->curName = ctx->in;
+ ctx->lenName = end - ctx->in;
+ }
+
+ ctx->in = end;
+ }
+
+ /* End ) of flags */
+ else if(ctx->in[0] == kFlagsEnd)
+ {
+ ctx->in++;
+ break;
+ }
+
+ /* Separator , between flags */
+ else if(ctx->in[0] == kFlagDelim)
+ ctx->in++;
+
+ else
+ SYNTAX_ERROR("Expected a flag.");
+ }
+
+ /* Now check the flags */
+ if((kAllStatements[ctx->curStatement].flags | ctx->curFlags)
+ != kAllStatements[ctx->curStatement].flags)
+ SYNTAX_ERROR("Invalid flags for this statement.");
+
+ }
+
+
+ /* Okay now do data processing */
+ if(kAllStatements[ctx->curStatement].args & ARGUMENT_DATA)
+ {
+ const char* delim;
+ compileSpace(ctx);
+
+ /* Check for a delimiter */
+ if(delim = strchr(kValidDelim, *(ctx->in)))
+ {
+ ctx->in++;
+ end = ctx->in;
+
+ /* Find end of data but checking for escaped delimeters */
+ while(1)
+ {
+ end = strchr(end, *delim);
+
+ if(!end)
+ SYNTAX_ERROR("Couldn't find end of data for this statement");
+
+ if(!isEscaped(ctx->in, end))
+ break;
+
+ end++;
+ }
+
+
+ ctx->curData = ctx->in;
+ ctx->lenData = (end - ctx->in);
+
+ ctx->in = end + 1;
+ }
+ else
+ {
+ SYNTAX_ERROR("Expected data for this statement.");
+ }
+ }
+
+ if(ctx->lenName > kMaxIdentifier)
+ SYNTAX_ERROR("Maximum length for an identifier is 40 characters.");
+
+ ctx->code->numStatements++;
+ }
+
+ /* done! */
+
+cleanup:
+ return retv;
+}
+
+static uint getLine(const char* beg, const char* cur)
+{
+ size_t ret = 1;
+ while(beg <= cur)
+ {
+ if(*beg == '\n')
+ ret++;
+
+ beg++;
+ }
+
+ return ret;
+}
+
+int compilerRun(r_script* script, const char* data)
+{
+
+ int retv = R_OK; /* used by RETURN macro */
+ funcdefs* pFunctions = NULL;
+ char* regexp = NULL;
+
+ /* We allocate the main instruction buffer */
+ compilecontext ctx;
+ memset(&ctx, 0, sizeof(ctx));
+
+ ctx.in = data;
+ ctx.curKey = 0x00000100;
+ ctx.beg = (vmop_t*)malloc(OPS_BUFFER_SIZE);
+ ctx.alloc = OPS_BUFFER_SIZE;
+ ctx.code = pushCodeStack(NULL);
+
+ if(!ctx.beg || !ctx.code)
+ RETURN(R_NOMEM);
+
+#ifdef _DEBUG
+ {
+ int i;
+ /* Do a check here! Index should be equal to */
+ /* command name for each statement */
+ for(i = 0; i < countof(kAllStatements); i++)
+ ASSERT(i == kAllStatements[i].syntax);
+ }
+#endif
+
+
+ /*
+ Push a first empty pops stack
+ We have to have one to pop below
+ */
+ PUSH_CODESTACK(&ctx);
+ ctx.code->curContext = SYNTAX_INROOT;
+
+ /* Push our signature of 4 nop bytes */
+ PUSH_OP(o_nop);
+ PUSH_OP(o_nop);
+ PUSH_OP(o_nop);
+ PUSH_OP(o_nop);
+
+ /* Set up initial stack frame */
+ PUSH_OP_2(o_mov, r_bp, r_sp);
+ /* Push the beginning of the main loop here */
+ PUSH_OP_1(o_pos, ARG_MAKE_VALUE(0L));
+ PUSH_OP_2(o_mov, r_ac, ARG_MAKE_VALUE(0L));
+
+ retv = compileEncoding(script, &ctx);
+ if(retv < 0)
+ RETURN(retv);
+
+ while(ctx.in[0] != '\0')
+ {
+ if(ctx.cbops + 0x200 >= ctx.alloc)
+ moreOutput(&ctx);
+
+ /* Here we check if we have enough memory */
+ if(ctx.failed)
+ RETURN(R_NOMEM);
+
+ /* Get and parse the current statement */
+ retv = compileStatement(script, &ctx);
+ if(retv < 0)
+ RETURN(retv);
+
+ if(ctx.curStatement == s_none)
+ continue;
+
+
+
+
+ /* Now we pop the commit end code from previous statement */
+ commitCodeStack(ctx.code);
+
+ /* Okay now do the rest of the statements */
+ switch(ctx.curStatement)
+ {
+
+
+ /*//////////////////////////////////////////////////////
+ // function
+ */
+ case s_function:
+ {
+ /* SYNTAX ----------------------------------------
+ //
+ // function(name)
+ // {
+ // .....
+ // }
+ */
+
+ uint key, keyJmp;
+
+ /* Check that we got a name */
+ if(!(ctx.curName && ctx.lenName))
+ SYNTAX_ERROR("'function' needs a name");
+
+ /* We need a opening brace next */
+ ctx.nextStatement = s_opbrace;
+
+
+ key = ARG_MAKE_VALUE(ctx.curKey++);
+
+ /* Put the function name at current op pos */
+ if(!addFunction(&pFunctions, ctx.curName,
+ ctx.lenName, key))
+ RETURN(R_NOMEM);
+
+
+
+ /* OPS --------------------------------------------
+ //
+ // jmp <1>
+ // pos <function>
+ // push bp
+ // mov bp, r_sp
+ // etc.
+ // ......
+ // etc.
+ // mov fe, 1
+ // mov sp, bp
+ // pop bp
+ // ret
+ // <1>
+ */
+
+ keyJmp = ARG_MAKE_VALUE(ctx.curKey++);
+
+ PUSH_OP_1(o_jmp, keyJmp);
+ PUSH_OP_1(o_pos, key);
+ PUSH_OP_1(o_push, r_bp);
+ PUSH_OP_2(o_mov, r_bp, r_sp);
+
+
+ PUSH_ROP_1(o_pos, keyJmp);
+ PUSH_ROP(o_ret);
+ PUSH_ROP_1(o_pop, r_bp);
+ PUSH_ROP_2(o_mov, r_sp, r_bp);
+ PUSH_ROP_2(o_mov, r_fe, ARG_MAKE_VALUE(1));
+
+ }
+ break;
+
+
+ /*//////////////////////////////////////////////////////
+ // loop
+ */
+ case s_loop:
+ {
+
+ uint key, key2;
+
+ /* SYNTAX ----------------------------------------
+ //
+ // loop
+ // {
+ // .....
+ // }
+ */
+
+ /* We need a opening brace next */
+ ctx.nextStatement = s_opbrace;
+
+
+
+ /* OPS --------------------------------------------
+ //
+ // push ac
+ // <1>
+ // mov ac, 0
+ // .....
+ // pop x2 pop the previously pushed action value
+ // test ac test current action
+ // jne <2> if action
+ // mov x2, ac then overide previous action value
+ // <2>
+ // push x2 push back changed action value
+ // je <1>
+ // pop ac pop out the
+ */
+
+ key = ARG_MAKE_VALUE(ctx.curKey++);
+ key2 = ARG_MAKE_VALUE(ctx.curKey++);
+
+ PUSH_OP_1(o_push, r_ac);
+ PUSH_OP_1(o_pos, key);
+ PUSH_OP_2(o_mov, r_ac, ARG_MAKE_VALUE(0L));
+
+ PUSH_ROP_1(o_pop, r_ac);
+ PUSH_ROP_1(o_je, key);
+ PUSH_ROP_1(o_push, r_x2);
+ PUSH_ROP_1(o_pos, key2);
+ PUSH_ROP_2(o_mov, r_x2, r_ac);
+ PUSH_ROP_1(o_jne, key2);
+ PUSH_ROP_1(o_test, r_ac);
+ PUSH_ROP_1(o_pop, r_x2);
+ }
+ break;
+
+
+ /*//////////////////////////////////////////////////////
+ // once
+ */
+ case s_once:
+ {
+ uint keyOnce, key1, key5;
+
+ /* SYNTAX ----------------------------------------
+ //
+ // once
+ // {
+ // .....
+ // }
+ */
+
+ /* We need a opening brace next */
+ ctx.nextStatement = s_opbrace;
+
+
+
+ /* OPS --------------------------------------------
+ //
+ // test mem(value) Pull in the flag from memory
+ // jne <1> If not present then jump to containing code
+ // mov fe, 0 Otherwise set fail flag
+ // jmp <5> And skip the match
+ // <1>
+ // .....
+ //
+ // mov mem(value), 1
+ // <5>
+ */
+
+
+ keyOnce = ARG_MAKE_MEMORY(ctx.curKey++);
+ key1 = ARG_MAKE_VALUE(ctx.curKey++);
+ key5 = ARG_MAKE_VALUE(ctx.curKey++);
+
+
+ /* Once code */
+ PUSH_OP_1(o_test, keyOnce);
+ PUSH_OP_1(o_jne, key1);
+ PUSH_OP_2(o_mov, r_fe, ARG_MAKE_VALUE(0));
+ PUSH_OP_1(o_jmp, key5);
+ PUSH_OP_1(o_pos, key1);
+
+
+ PUSH_ROP_1(o_pos, key5);
+ PUSH_ROP_2(o_mov, keyOnce, ARG_MAKE_VALUE(1));
+ }
+ break;
+
+
+
+ /*/////////////////////////////////////////////////////
+ // call
+ */
+ case s_call:
+ {
+ uint key;
+
+ /* SYNTAX ----------------------------------------
+ //
+ // call(name)
+ */
+
+ /* Check that we got a name */
+ if(!(ctx.curName && ctx.lenName))
+ SYNTAX_ERROR("'call' needs a function name");
+
+ /* Find the function */
+ key = findFunction(pFunctions, ctx.curName,
+ ctx.lenName);
+
+ if(key == INVALID_PTR)
+ SYNTAX_ERROR("function does not exist");
+
+
+ /* OPS --------------------------------------------
+ //
+ // call <funcaddr>
+ */
+
+ PUSH_OP_1(o_call, key);
+ }
+ break;
+
+
+ /*///////////////////////////////////////////////////////
+ // return
+ */
+ case s_return:
+ {
+ /* SYNTAX ----------------------------------------
+ //
+ // return(0)
+ */
+
+ uint code = ARG_MAKE_VALUE(1);
+
+ /* Check for the number */
+ if(ctx.curFlags & f_num)
+ {
+ if(ctx.lenFlagData > 1 || (ctx.curFlagData[0] != '0' && ctx.curFlagData[0] != '1'))
+ SYNTAX_ERROR("Return code must be 0 or 1");
+
+ if(ctx.curFlagData[0] == '0')
+ code = ARG_MAKE_VALUE(0);
+ }
+
+ /* OPS --------------------------------------------
+ //
+ // mov fe, 0 (or 1 depending on code)
+ // mov sp, r_bp
+ // pop bp
+ // ret
+ */
+
+ PUSH_OP_2(o_mov, r_fe, code);
+ PUSH_OP_2(o_mov, r_sp, r_bp);
+ PUSH_OP_1(o_pop, r_bp);
+ PUSH_OP(o_ret);
+ }
+ break;
+
+
+
+ /*//////////////////////////////////////////////////////
+ // end
+ */
+ case s_end:
+ {
+
+ /* SYNTAX ----------------------------------------
+ //
+ // end
+ */
+
+
+ /* OPS --------------------------------------------
+ //
+ // stop
+ */
+
+ PUSH_OP(o_text);
+ pushText(&ctx, NULL, 0);
+ PUSH_OP_1(o_stop, ARG_MAKE_VALUE(0));
+ }
+ break;
+
+
+
+ /*//////////////////////////////////////////////////////
+ // end
+ */
+ case s_stop:
+ {
+
+ /* SYNTAX ----------------------------------------
+ //
+ // stop "message"
+ */
+
+ /* OPS --------------------------------------------
+ //
+ // text "message"
+ // stop
+ */
+
+ PUSH_OP(o_text);
+ pushText(&ctx, ctx.curData, ctx.lenData);
+ PUSH_OP_1(o_stop, ARG_MAKE_VALUE(1));
+ }
+ break;
+
+
+ /*/////////////////////////////////////////////////////
+ // match
+ */
+ case s_match:
+ {
+
+ /* SYNTAX ----------------------------------------
+ //
+ // match(not, once) "regexp"
+ */
+
+ uint keyOnce, keyWatermark, keyJmp1, keyJmp2, keyJmp3,
+ keyJmp4, keyJmp5, keyJmp6, keyJmp7, groupNum,
+ key1, key4, key5, key9;
+
+ /* Get the flags */
+ bool bNot = ctx.curFlags & f_not ? true : false;
+ bool bOnce = ctx.curFlags & f_once ? true : false;
+ bool bHas = ctx.curFlags & f_find ? true : false;
+ bool bTag = ctx.curFlags & f_tag ? true : false;
+
+ /* Check that we got data */
+ if(!ctx.curData || !ctx.lenData)
+ SYNTAX_ERROR("'match' needs a regular expression");
+
+ /* We need a opening brace next */
+ ctx.nextStatement = s_opbrace;
+
+ regexp = strndup(ctx.curData, ctx.lenData);
+ if(!regexp)
+ RETURN(R_NOMEM);
+
+ groupNum = 0;
+
+ if(ctx.curFlagData && ctx.lenFlagData)
+ {
+ if(ctx.lenFlagData > 1)
+ SYNTAX_ERROR("Group specifier must be between 0 and 9.");
+
+ if(bHas)
+ SYNTAX_ERROR("Group specifier invalid with 'has' flag.");
+
+
+ /* Get the number to use */
+ groupNum = ctx.curFlagData[0] - '0';
+ }
+
+
+ /* OPS --------------------------------------------
+ //
+ // push x1 Save limits
+ // push y1 " "
+
+ // test mem(value) (once) Pull in the flag from memory
+ // jne <1> (once) If not present then jump to match code
+ // mov fe, 0 (once) Otherwise set fail flag
+ // jmp <5> (once) And skip the match
+ // <1> (once)
+
+ // mov x6, mem(key_value) (watermark) Get the watermark
+ // cmp x1, r_x6 (watermark) If watermark higher than match area
+ // mov fe, r_fg (watermark)
+ // jne <2> (watermark) skip
+ // mov x6, r_x1 (watermark) Otherwise bring up watermark to match area
+ // <2> (watermark)
+
+ // match x6, r_y1 Do match
+ // mov x4, fe
+
+ // <5> (once)
+
+ // jne <3> If match failed skip set below
+
+ // mov ac, 1 Set action flag
+
+ // cmp cg, 2 See if we have enough groups
+ // mov fe, r_fg ""
+ // jne <7>
+ // mov fe, 0 If not then set failed
+ // jmp <3> And skip to failed part
+ // <7>
+ // mov x1, r_b2 Set new limit for inside
+ // mov y1, e2 different registers (depending on number parameter)
+
+ // mov x2, r_b0 (watermark) Get the start of batch
+ // add x2, 1 (watermark) Add one to it
+ // mov mem(value), x2 (watermark) Stash it away in memory
+
+ // <3>
+
+ // cmp x4, 1 (not) Compare success against 0 or 1 depending on not
+ // push fe
+ // jne <4> Skip if no match
+
+ // mov mem(value), 1 (once)
+
+ // .....
+
+ // <4>
+ // pop fe
+
+ // pop y1
+ // pop x1
+ */
+
+ keyOnce = ARG_MAKE_MEMORY(ctx.curKey++);
+ keyWatermark = ARG_MAKE_STACK(ctx.curKey++);
+ keyJmp1 = ARG_MAKE_VALUE(ctx.curKey++);
+ keyJmp2 = ARG_MAKE_VALUE(ctx.curKey++);
+ keyJmp3 = ARG_MAKE_VALUE(ctx.curKey++);
+ keyJmp4 = ARG_MAKE_VALUE(ctx.curKey++);
+ keyJmp5 = ARG_MAKE_VALUE(ctx.curKey++);
+ keyJmp6 = ARG_MAKE_VALUE(ctx.curKey++);
+ keyJmp7 = ARG_MAKE_VALUE(ctx.curKey++);
+
+ /* Save for later */
+ PUSH_OP_1(o_push, r_x1);
+ PUSH_OP_1(o_push, r_y1);
+
+ if(bOnce)
+ {
+ /* Once code */
+ PUSH_OP_1(o_test, keyOnce);
+ PUSH_OP_1(o_jne, keyJmp1);
+ PUSH_OP_2(o_mov, r_fe, ARG_MAKE_VALUE(0));
+ PUSH_OP_1(o_jmp, keyJmp5);
+ PUSH_OP_1(o_pos, keyJmp1);
+ }
+
+ /* Watermark code */
+ PUSH_OP_2(o_mov, r_x6, keyWatermark);
+ PUSH_OP_2(o_cmp, r_x1, r_x6);
+ PUSH_OP_2(o_mov, r_fe, r_fg);
+ PUSH_OP_1(o_jne, keyJmp2);
+ PUSH_OP_2(o_mov, r_x6, r_x1);
+ PUSH_OP_1(o_pos, keyJmp2);
+
+
+ /* Is it a simple match? */
+ if(!bTag)
+ {
+ /* Actual match */
+ PUSH_OP_2(o_match, r_x6, r_y1);
+ retv = pushMatch(script, &ctx, regexp);
+ if(retv < 0) RETURN(retv);
+ }
+
+ /* Or the very complicated tag statement */
+ else
+ {
+ /* Split the regular expression */
+ char* second = splitTagMatch(script, regexp);
+ if(!second) RETURN(R_SYNTAX);
+
+
+ /* (See code docs in tag.txt file) */
+ key1 = ARG_MAKE_VALUE(ctx.curKey++);
+ key4 = ARG_MAKE_VALUE(ctx.curKey++);
+ key5 = ARG_MAKE_VALUE(ctx.curKey++);
+ key9 = ARG_MAKE_VALUE(ctx.curKey++);
+
+
+ /* Setup */
+ PUSH_OP_2(o_mov, r_x2, ARG_MAKE_VALUE(0));
+ PUSH_OP_2(o_mov, r_x0, r_x6);
+ PUSH_OP_2(o_mov, r_y0, r_y1);
+
+ /* Top of loop */
+ PUSH_OP_1(o_pos, key1);
+
+ /* Start code */
+ PUSH_OP_2(o_match, r_x0, r_y1);
+
+ retv = pushMatch(script, &ctx, regexp);
+ if(retv < 0) RETURN(retv);
+
+ PUSH_OP_1(o_jne, key5);
+ PUSH_OP_2(o_cmp, r_b0, r_y0);
+ PUSH_OP_1(o_je, key5);
+ PUSH_OP_2(o_mov, r_fe, r_fg);
+ PUSH_OP_1(o_je, key5);
+
+ PUSH_OP_2(o_cmp, r_x2, ARG_MAKE_VALUE(0));
+ PUSH_OP_1(o_jne, key4);
+ PUSH_OP_2(o_mov, r_x5, r_b0);
+ PUSH_OP_2(o_mov, r_y5, r_e0);
+ PUSH_OP_2(o_mov, r_y0, r_e0);
+ PUSH_OP_1(o_pos, key4);
+
+ PUSH_OP_2(o_mov, r_x2, ARG_MAKE_VALUE(0));
+ PUSH_OP_2(o_mov, r_x0, r_e0);
+
+ /* Endless loop check */
+ PUSH_OP_2(o_cmp, r_y2, ARG_MAKE_VALUE(0x00100000));
+ PUSH_OP_1(o_je, key5);
+
+ /* End code */
+ PUSH_OP_2(o_match, r_y0, r_y1);
+ retv = pushMatch(script, &ctx, second);
+ if(retv < 0) RETURN(retv);
+
+ PUSH_OP_1(o_jne, key5);
+ PUSH_OP_2(o_mov, r_y0, r_e0);
+ PUSH_OP_2(o_mov, r_x7, r_b0);
+ PUSH_OP_2(o_mov, r_y7, r_e0);
+
+ /* Locks check */
+ PUSH_OP_2(o_check, r_y5, r_x7);
+ PUSH_OP_2(o_mov, r_x2, r_fe);
+
+ /* End of loop */
+ PUSH_OP_1(o_jmp, key1);
+ PUSH_OP_1(o_pos, key5);
+
+ /* Wrap up */
+ PUSH_OP_2(o_cmp, r_x2, ARG_MAKE_VALUE(0));
+ PUSH_OP_1(o_je, key9);
+ PUSH_OP_2(o_mov, r_b0, r_x5);
+ PUSH_OP_2(o_mov, r_e0, r_y7);
+ PUSH_OP_2(o_mov, r_b1, r_x5);
+ PUSH_OP_2(o_mov, r_e1, r_y5);
+ PUSH_OP_2(o_mov, r_b2, r_y5);
+ PUSH_OP_2(o_mov, r_e2, r_x7);
+ PUSH_OP_2(o_mov, r_b3, r_x7);
+ PUSH_OP_2(o_mov, r_e3, r_y7);
+ PUSH_OP_2(o_mov, r_cg, ARG_MAKE_VALUE(4));
+ PUSH_OP_1(o_pos, key9);
+ PUSH_OP_2(o_mov, r_fe, r_x2);
+
+ }
+
+
+ PUSH_OP_2(o_mov, r_x4, r_fe);
+
+ if(bOnce)
+ PUSH_OP_1(o_pos, keyJmp5);
+
+ /* Skip all the rest of the setup if failed */
+ PUSH_OP_1(o_jne, keyJmp3);
+
+ /* Set action flag */
+ PUSH_OP_2(o_mov, r_ac, ARG_MAKE_VALUE(1));
+
+ if(!bHas)
+ {
+ /* Group validation code */
+ PUSH_OP_2(o_cmp, ARG_MAKE_VALUE(groupNum + 1), r_cg);
+ PUSH_OP_2(o_mov, r_fe, r_fg);
+ PUSH_OP_1(o_jne, keyJmp7);
+ PUSH_OP_2(o_mov, r_fe, ARG_MAKE_VALUE(0));
+ PUSH_OP_1(o_jmp, keyJmp3);
+ PUSH_OP_1(o_pos, keyJmp7);
+
+ /* Now depending on group number set do this we set a set of registers to check */
+ PUSH_OP_2(o_mov, r_x1, ARG_MAKE_REGISTER(ARG_GET_REGISTER(r_b0) + groupNum));
+ PUSH_OP_2(o_mov, r_y1, ARG_MAKE_REGISTER(ARG_GET_REGISTER(r_e0) + groupNum));
+ }
+
+ /* Watermark */
+ PUSH_OP_2(o_mov, keyWatermark, r_e0);
+
+ /* End of setup code */
+ PUSH_OP_1(o_pos, keyJmp3);
+
+ /* Implement 'not' */
+ PUSH_OP_2(o_cmp, r_x4, ARG_MAKE_VALUE(bNot ? 0 : 1));
+ PUSH_OP_1(o_push, r_fe);
+ PUSH_OP_1(o_jne, keyJmp4);
+
+ /* Now we're inside */
+ if(bOnce)
+ PUSH_OP_2(o_mov, keyOnce, ARG_MAKE_VALUE(1));
+
+
+ /* Wrap up code */
+ PUSH_ROP_1(o_pop, r_x1);
+ PUSH_ROP_1(o_pop, r_y1);
+ PUSH_ROP_1(o_pop, r_fe);
+ PUSH_ROP_1(o_pos, keyJmp4);
+
+
+ free(regexp);
+ regexp = NULL;
+ }
+ break;
+
+
+ /*//////////////////////////////////////////////////////
+ // replace
+ */
+ case s_replace:
+ {
+
+ /* SYNTAX ----------------------------------------
+ //
+ // replace "replacetext"
+ */
+
+ uint key;
+
+ /* Check that we got data */
+ if(!ctx.curData)
+ SYNTAX_ERROR("'replace' needs text to replace");
+
+ /* OPS --------------------------------------------
+ //
+ // check x1, r_y1
+ // jne <1>
+ // repl x1, r_y1
+ // lock x1, r_y1
+ // <1>
+ */
+
+ key = ARG_MAKE_VALUE(ctx.curKey++);
+
+ PUSH_OP_2(o_check, r_x1, r_y1);
+ PUSH_OP_1(o_jne, key);
+
+ PUSH_OP(o_text);
+ pushText(&ctx, ctx.curData, ctx.lenData);
+ PUSH_OP_2(o_repl, r_x1, r_y1);
+
+ PUSH_OP_2(o_lock, r_x1, r_y1);
+ PUSH_OP_1(o_pos, key);
+ }
+ break;
+
+
+ /*//////////////////////////////////////////////////////
+ // lock
+ */
+ case s_lock:
+ {
+
+ /* SYNTAX ----------------------------------------
+ //
+ // lock
+ */
+
+ /* OPS --------------------------------------------
+ //
+ // lock x1, r_y1
+ */
+
+ PUSH_OP_2(o_lock, r_x1, r_y1);
+ }
+ break;
+
+
+
+ /*//////////////////////////////////////////////////////
+ // variable functions
+ */
+ case s_setvar:
+ case s_clrvar:
+ case s_addvar:
+ {
+
+ /* SYNTAX ----------------------------------------
+ //
+ // set name "value"
+ // clr name
+ // add name "value"
+ */
+
+ /* Check that we got a name */
+ if(!(ctx.curName && ctx.lenName))
+ SYNTAX_ERROR("Missing variable name");
+
+ /* Check that we got data */
+ if((ctx.curStatement == s_setvar ||
+ ctx.curStatement == s_addvar) && !ctx.curData)
+ SYNTAX_ERROR("Missing variable value");
+
+
+ /* OPS --------------------------------------------
+ */
+
+ /* If not clearing then push text */
+ if(ctx.curStatement != s_clrvar)
+ {
+ PUSH_OP(o_text);
+ pushText(&ctx, ctx.curData, ctx.lenData);
+ }
+
+ /* If not adding then clear */
+ if(ctx.curStatement != s_addvar)
+ {
+ PUSH_OP(o_clrvar);
+ pushVar(&ctx, ctx.curName, ctx.lenName);
+ }
+
+ /* If not clearing then set */
+ if(ctx.curStatement != s_clrvar)
+ {
+ PUSH_OP(o_setvar);
+ pushVar(&ctx, ctx.curName, ctx.lenName);
+ }
+ }
+ break;
+
+ /*//////////////////////////////////////////////////////
+ // else
+ */
+ case s_else:
+ {
+
+ /* SYNTAX ----------------------------------------
+ //
+ // else
+ // {
+ // .....
+ // }
+ */
+
+ uint key;
+
+ /* REMOVED the following because now else can follow any statement
+ //
+ // Make sure the previous statement was a match
+ // if(ctx.lastStatement != s_match && ctx.lastStatement != s_replace &&
+ // ctx.lastStatement != s_call)
+ // SYNTAX_ERROR("'else' must follow a match, replace or call statement");
+ */
+
+ /* Next statement must be a opening brace */
+ ctx.nextStatement = s_opbrace;
+
+ /* OPS --------------------------------------------
+ //
+ // je <1>
+ // ......
+ // <1>
+ */
+
+ key = ARG_MAKE_VALUE(ctx.curKey++);
+
+ PUSH_OP_1(o_je, key);
+ PUSH_ROP_1(o_pos, key);
+ }
+ break;
+
+ /*/////////////////////////////////////////////////////
+ // options
+ */
+ case s_message:
+ {
+ /* SYNTAX ----------------------------------------
+ //
+ // message "data"
+ */
+
+ /* Check that we got data */
+ if(!ctx.curData)
+ SYNTAX_ERROR("Missing message text.");
+
+ /* OPS --------------------------------------------
+ //
+ // text "message"
+ // stop
+ */
+
+ PUSH_OP(o_text);
+ pushText(&ctx, ctx.curData, ctx.lenData);
+ PUSH_OP(o_msg);
+ }
+ break;
+
+
+ /*/////////////////////////////////////////////////////
+ // options
+ */
+ case s_options:
+ {
+ /* SYNTAX ----------------------------------------
+ //
+ // options(case, line)
+ */
+
+ /* Save the options into the context */
+ ctx.code->curOptions = ctx.curFlags;
+ }
+ break;
+
+ default:
+ ASSERT(false);
+
+ }
+ }
+
+ /* Pop out of any implied blocks */
+ while(ctx.code->curContext & SYNTAX_IMPLIED)
+ POP_CODESTACK(&ctx);
+
+ /* copy any remaining post ops */
+ POP_CODESTACK(&ctx);
+
+ /* This is the bottom of the main loop */
+ PUSH_OP_1(o_test, r_ac);
+ PUSH_OP_1(o_je, ARG_MAKE_VALUE(0));
+
+ /* Put an end marker */
+ PUSH_OP(o_end);
+
+
+ /* Check brace syntax */
+ if(ctx.code->pPrev != NULL)
+ SYNTAX_ERROR("Not all braces matched.");
+
+ /* Put the compiled script in the script */
+ script->ops = ctx.beg;
+ script->len = ctx.cbops;
+
+ /* This fixes all the jmp and removes pos */
+ retv = compilerOptimize(script);
+
+cleanup:
+ if(retv == R_SYNTAX || retv == R_REGEXP)
+ script->errline = getLine(data, ctx.in) - 1;
+
+ /* Unwind codestack */
+ while(ctx.code)
+ ctx.code = popCodeStack(ctx.code);
+
+ if(pFunctions)
+ free(pFunctions);
+ if(regexp)
+ free(regexp);
+
+ if(retv < R_OK)
+ {
+ if(ctx.beg)
+ free(ctx.beg);
+
+ script->ops = NULL;
+ script->len = 0;
+ }
+
+ return retv;
+}
+
+
+
+int compilerOptimize(r_script* scr)
+{
+ vmop_t* op = scr->ops;
+ int retv = R_OK;
+
+ /* First find and remove all pos
+ // NOTE: land and pos are used interchangeably in this code */
+
+ uint* lands = NULL;
+ uint cur = 0;
+ uint alloc = 0;
+
+ while(*op != o_end)
+ {
+ ASSERT(op < scr->ops + scr->len);
+
+ switch(*op)
+ {
+ case o_pos:
+ {
+ if(alloc <= cur)
+ {
+ alloc += 0x40;
+ lands = (uint*)reallocf(lands, sizeof(uint) * 2 * alloc);
+ if(!lands)
+ RETURN(R_NOMEM);
+
+ }
+
+ ASSERT(ARG_TYPE(op[1]) == ARG_VAL_TYPE);
+
+ /* Position in 0 */
+ lands[cur * 2] = op - scr->ops;
+ /* key in 1 */
+ lands[(cur * 2) + 1] = *((uint*)(op + 1));
+
+ cur++;
+
+ /*
+ Okay now eat the rest of the stuff
+ total length of a pos should be 5 bytes
+ */
+ scr->len -= 5;
+ memmove(op, op + 5, scr->len - (op - scr->ops));
+ }
+ break;
+
+ default:
+ opsIterate(&op);
+ break;
+ };
+ }
+
+
+ /* Now fix all jumps and calls */
+ op = scr->ops;
+
+ while(*op != o_end)
+ {
+ ASSERT(op < scr->ops + scr->len);
+
+ switch(*op)
+ {
+ case o_pos:
+ /* Shouldn't meet any pos ops after we removed them above */
+ ASSERT(false);
+ break;
+
+ case o_jmp:
+ case o_jne:
+ case o_je:
+ case o_call:
+ {
+ uint* parg = ((uint*)(op + 1));
+ bool found = false;
+ size_t i;
+
+ /* find the key in our array */
+ for(i = 0; i < cur; i++)
+ {
+ if(lands[(i * 2) + 1] == *parg)
+ {
+ found = true;
+ *parg = ARG_MAKE_VALUE(lands[i * 2]);
+ break;
+ }
+ }
+
+ if(!found)
+ ASSERT(false && "jump without a pos");
+ }
+ /* (Note fall through) */
+
+ default:
+ opsIterate(&op);
+ break;
+ }
+ }
+
+cleanup:
+ if(lands)
+ free(lands);
+
+ return retv;
+}
+
+void opsIterate(vmop_t** ops)
+{
+ vmop_t op = *(*(ops));
+ (*ops)++;
+
+ /* increment *ops to next op point */
+ switch(op)
+ {
+ /* ops without arguments */
+ case o_end:
+ case o_nop:
+ case o_ret:
+ case o_msg:
+ break;
+
+ /* ops with one argument */
+ case o_push:
+ case o_pop:
+ case o_jmp:
+ case o_je:
+ case o_jne:
+ case o_test:
+ case o_call:
+ case o_stop:
+ INC_ARGUMENT(*ops);
+ break;
+
+ /* ops with two arguments */
+ case o_lock:
+ case o_check:
+ case o_cmp:
+ case o_add:
+ case o_sub:
+ case o_mov:
+ case o_repl:
+ INC_ARGUMENT(*ops);
+ INC_ARGUMENT(*ops);
+ break;
+
+ /* Special cases */
+ case o_match:
+ {
+ match_op* op;
+ INC_ARGUMENT(*ops);
+ INC_ARGUMENT(*ops);
+ op = (match_op*)(*ops);
+ (*ops) += match_op_size(*op);
+ }
+ break;
+
+ case o_setvar:
+ case o_clrvar:
+ {
+ var_op* op = (var_op*)(*ops);
+ (*ops) += var_op_size(*op);
+ }
+ break;
+ case o_text:
+ {
+ text_op* op;
+ op = (text_op*)(*ops);
+ (*ops) += text_op_size(*op);
+ }
+ break;
+
+ default:
+ ASSERT(false);
+ };
+
+}
+
+
+/*
+ TODO: individual ops do not need to be freed
+ any longer. Execution no longer changes them.
+*/
+
+/* Frees a set of ops */
+int opsFree(vmop_t* ops, size_t len)
+{
+ byte* cur = ops;
+
+ if(len == 0)
+ len = ~0;
+
+ while(cur < ops + len)
+ {
+ switch(*cur)
+ {
+ case o_end:
+ goto done;
+
+ default:
+ opsIterate(&cur);
+ break;
+ }
+ }
+
+done:
+ free(ops);
+ return R_OK;
+
+}
+
+
+static const char* getOpName(vmop_t op)
+{
+ #define RETOPNAME(r) case o_##r: return #r;
+ switch(op)
+ {
+ RETOPNAME(end);
+ RETOPNAME(stop);
+ RETOPNAME(nop);
+ RETOPNAME(ret);
+ RETOPNAME(push);
+ RETOPNAME(pop);
+ RETOPNAME(jmp);
+ RETOPNAME(je);
+ RETOPNAME(jne);
+ RETOPNAME(test);
+ RETOPNAME(call);
+ RETOPNAME(lock);
+ RETOPNAME(check);
+ RETOPNAME(cmp);
+ RETOPNAME(add);
+ RETOPNAME(sub);
+ RETOPNAME(mov);
+ RETOPNAME(match);
+ RETOPNAME(repl);
+ RETOPNAME(setvar);
+ RETOPNAME(clrvar);
+ RETOPNAME(pos);
+ RETOPNAME(text);
+ RETOPNAME(msg);
+
+ default:
+ return "<INVALID>";
+ }
+}
+
+static const char* getRegisterName(byte reg)
+{
+ #define RETREGNAME(r) case r_##r: return #r;
+ switch(reg)
+ {
+ RETREGNAME(fe);
+ RETREGNAME(fg);
+ RETREGNAME(fl);
+ RETREGNAME(ac);
+ RETREGNAME(sp);
+ RETREGNAME(bp);
+ RETREGNAME(b0);
+ RETREGNAME(b1);
+ RETREGNAME(b2);
+ RETREGNAME(b3);
+ RETREGNAME(b4);
+ RETREGNAME(b5);
+ RETREGNAME(b6);
+ RETREGNAME(b7);
+ RETREGNAME(b8);
+ RETREGNAME(b9);
+ RETREGNAME(e0);
+ RETREGNAME(e1);
+ RETREGNAME(e2);
+ RETREGNAME(e3);
+ RETREGNAME(e4);
+ RETREGNAME(e5);
+ RETREGNAME(e6);
+ RETREGNAME(e7);
+ RETREGNAME(e8);
+ RETREGNAME(e9);
+ RETREGNAME(cg);
+ RETREGNAME(x0);
+ RETREGNAME(x1);
+ RETREGNAME(x2);
+ RETREGNAME(x3);
+ RETREGNAME(x4);
+ RETREGNAME(x5);
+ RETREGNAME(x6);
+ RETREGNAME(x7);
+ RETREGNAME(y0);
+ RETREGNAME(y1);
+ RETREGNAME(y2);
+ RETREGNAME(y3);
+ RETREGNAME(y4);
+ RETREGNAME(y5);
+ RETREGNAME(y6);
+ RETREGNAME(y7);
+
+ default:
+ return "<INVALID>";
+ }
+};
+
+void dumpArgument(FILE* f, vmop_t* ops)
+{
+ switch(ARG_TYPE(*ops))
+ {
+ case ARG_VAL_TYPE:
+ fprintf(f, "0x%06x", ARG_GET_VALUE(*((uint*)ops)));
+ break;
+
+ case ARG_MEM_TYPE:
+ fprintf(f, "<mem:0x%06x>", ARG_GET_VALUE(*((uint*)ops)));
+ break;
+
+ case ARG_STACK_TYPE:
+ fprintf(f, "<stack:0x%06x>", ARG_GET_VALUE(*((uint*)ops)));
+ break;
+
+ case ARG_REG_TYPE:
+ fprintf(f, getRegisterName(*ops));
+ break;
+
+ default:
+ ASSERT(false);
+ }
+}
+
+/* Dump a string of ops to a stream */
+int opsDump(vmop_t* ops, FILE* f)
+{
+ vmop_t* beg = ops;
+
+ while(*ops != o_end)
+ {
+ vmop_t op = *ops;
+
+ fprintf(f, "%06x: %s ", (ops - beg), getOpName(op));
+
+ ops++;
+
+ /* Now the arguments */
+ switch(op)
+ {
+ case o_push:
+ case o_pop:
+ case o_jmp:
+ case o_je:
+ case o_jne:
+ case o_test:
+ case o_call:
+ case o_stop:
+ case o_pos:
+ dumpArgument(f, ops);
+ INC_ARGUMENT(ops);
+ break;
+
+ case o_lock:
+ case o_check:
+ case o_cmp:
+ case o_add:
+ case o_sub:
+ case o_mov:
+ case o_match:
+ case o_repl:
+ dumpArgument(f, ops);
+ fprintf(f, ", ");
+ INC_ARGUMENT(ops);
+ dumpArgument(f, ops);
+ INC_ARGUMENT(ops);
+ break;
+ };
+
+ /* Now any additional data */
+ switch(op)
+ {
+ case o_match:
+ {
+ match_op* op = (match_op*)ops;
+ if(op->type & kMatchPcre)
+ {
+ match_op_pcre* pcre = (match_op_pcre*)op;
+ fprintf(f, " %s", pcre->pattern);
+ }
+ else
+ {
+ fprintf(f, " <regexp>");
+ }
+
+ ops += match_op_size(*op);
+ }
+ break;
+
+ case o_setvar:
+ case o_clrvar:
+ {
+ var_op* vop = (var_op*)ops;
+ fprintf(f, " <%%%s>", vop->name);
+ ops += var_op_size(*vop);
+ }
+ break;
+
+ case o_test:
+ {
+ text_op* op = (text_op*)ops;
+ fprintf(f, " <%s>", op->string);
+ ops += text_op_size(*op);
+ }
+ break;
+ };
+
+
+ fprintf(f, "\n");
+ }
+
+ return R_OK;
+
+}
+
+
+
+