summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorStef Walter <stef@thewalter.net>2003-09-20 07:12:49 +0000
committerStef Walter <stef@thewalter.net>2003-09-20 07:12:49 +0000
commitb49d8ebefe9b10c53a6a09ad564e22111b7b25c6 (patch)
tree1d5dd4abc38170a7bc106dabbc59b915017222f0 /lib
parent1cda9ebbd62916c7c2136722597a86c583e1ecf6 (diff)
Initial Import
Diffstat (limited to 'lib')
-rw-r--r--lib/Makefile.am9
-rw-r--r--lib/compile.c2337
-rw-r--r--lib/execute.c1715
-rw-r--r--lib/execute.h179
-rw-r--r--lib/ops.h349
-rw-r--r--lib/priv.h95
-rw-r--r--lib/rep.h107
-rw-r--r--lib/rlib.c591
-rw-r--r--lib/rlib.dsp140
-rw-r--r--lib/rlib.h261
10 files changed, 5783 insertions, 0 deletions
diff --git a/lib/Makefile.am b/lib/Makefile.am
new file mode 100644
index 0000000..ccc509e
--- /dev/null
+++ b/lib/Makefile.am
@@ -0,0 +1,9 @@
+include_HEADERS = rlib.h rep.h
+lib_LTLIBRARIES = librlib.la
+
+librlib_la_CFLAGS = -I${top_srcdir} -I/usr/local/include
+librlib_la_SOURCES = compile.c execute.c execute.h priv.h ops.h rlib.c \
+../common/xstring.c ../common/compat.c ../common/binfile.c ../common/repfile.c
+librlib_la_LDFLAGS = -version-info 2:3
+
+EXTRA_DIST = rlib.dsp \ No newline at end of file
diff --git a/lib/compile.c b/lib/compile.c
new file mode 100644
index 0000000..e92a2f6
--- /dev/null
+++ b/lib/compile.c
@@ -0,0 +1,2337 @@
+/*
+ * AUTHOR
+ * N. Nielsen
+ *
+ * LICENSE
+ * This software is in the public domain.
+ *
+ * The software is provided "as is", without warranty of any kind,
+ * express or implied, including but not limited to the warranties
+ * of merchantability, fitness for a particular purpose, and
+ * noninfringement. In no event shall the author(s) be liable for any
+ * claim, damages, or other liability, whether in an action of
+ * contract, tort, or otherwise, arising from, out of, or in connection
+ * with the software or the use or other dealings in the software.
+ *
+ * SUPPORT
+ * Send bug reports to: <nielsen@memberwebs.com>
+ */
+
+/* ----------------------------------------------------------------------
+// Recipe Compiler
+// 2000-2002 Copyright, Nate Nielsen
+*/
+
+
+#include <stdarg.h>
+#include "common/usuals.h"
+#include "common/compat.h"
+#include "lib/rlib.h"
+#include "priv.h"
+#include "execute.h"
+#include "ops.h"
+
+const byte kEncUTF8[] = { 0xEF, 0xBB, 0xBF };
+const byte kEncUCS2_L[] = { 0xFF, 0xFE };
+const byte kEncUCS2_R[] = { 0xFE, 0xFF };
+const byte kEncUCS4_L[] = { 0xFF, 0xFE, 0x00, 0x00 };
+const byte kEncUCS4_R[] = { 0x00, 0x00, 0xFE, 0xFF };
+
+/* Flags Syntax: -----------------------------------------------------------------
+ */
+
+/* All flags for available to statements */
+typedef enum _syn_flags
+{
+ f_not = 0x0001, /* match: Reverse effect */
+ f_once = 0x0002, /* match: Only execute once */
+ f_case = 0x0004, /* options: Case sensitive */
+ f_line = 0x0010, /* options: Limit to line matches */
+ f_find = 0x0020, /* match: match but don't move limits */
+ f_tag = 0x0040, /* tag: use special tag matching */
+
+ /* Special cases */
+ f_num = 0x0400 /* represents a number */
+}
+syn_flags;
+
+
+/* Listing of all flags and their text representations */
+typedef struct _flagmap
+{
+ syn_flags flag;
+ const char* text;
+}
+flagmap;
+
+flagmap kAllFlags[] = {
+ { f_not, "not" },
+ { f_once, "once" },
+ { f_case, "case" },
+ { f_line, "line" },
+ { f_find, "find" },
+ { f_tag, "tag" },
+
+ /* Special cases */
+ { f_num, "" }
+};
+
+
+/* Statement Syntax: --------------------------------------------------------------
+ */
+
+typedef enum _syn_statements
+{
+ s_none,
+ s_function,
+ s_loop,
+ s_once,
+ s_options,
+ s_call,
+ s_return,
+ s_end,
+ s_stop,
+ s_match,
+ s_replace,
+ s_lock,
+ s_setvar,
+ s_addvar,
+ s_clrvar,
+ s_message,
+ s_else,
+ s_opbrace,
+ s_clbrace,
+ s_implied
+}
+syn_statements;
+
+typedef struct _syntaxmap
+{
+ syn_statements syntax; /* id */
+ const char* text; /* text representation */
+ uint flags; /* which flags are valid */
+ uint args; /* Does it have arguments (either a name or a data block) */
+ uint context; /* In which context valid */
+}
+syntaxmap;
+
+/* Context values */
+#define SYNTAX_INROOT 0x00000001 /* Only allowed in the root script */
+#define SYNTAX_INBRACE 0x00000004 /* Only allowed inside braces */
+#define SYNTAX_INFUNCTION 0x00000008 /* Only allowed in a function */
+#define SYNTAX_BLOCK 0x00000010 /* The statement is start of a block */
+#define SYNTAX_IMPLIED 0x00000020 /* We're in an implied block */
+#define SYNTAX_SPECIAL 0x00000100 /* Syntax not determined by parser */
+#define SYNTAX_ANYWHERE (SYNTAX_INROOT | SYNTAX_INBRACE | SYNTAX_INFUNCTION)
+
+/* Argument values */
+#define ARGUMENT_NAME 0x00000001
+#define ARGUMENT_DATA 0x00000002
+
+syntaxmap kAllStatements[] = {
+ { s_none, "", 0, 0, SYNTAX_SPECIAL },
+ { s_function, "function", 0, ARGUMENT_NAME, SYNTAX_INROOT },
+ { s_loop, "loop", 0, 0, SYNTAX_ANYWHERE | SYNTAX_BLOCK },
+ { s_once, "once", 0, 0, SYNTAX_ANYWHERE | SYNTAX_BLOCK },
+ { s_options, "options", f_case | f_line, 0, SYNTAX_ANYWHERE },
+ { s_call, "call", 0, ARGUMENT_NAME, SYNTAX_ANYWHERE },
+ { s_return, "return", f_num, 0, SYNTAX_INFUNCTION },
+ { s_end, "end", 0, 0, SYNTAX_ANYWHERE },
+ { s_stop, "stop", 0, ARGUMENT_DATA, SYNTAX_ANYWHERE },
+ { s_match, "match", f_not|f_once|f_num|f_find|f_tag,ARGUMENT_DATA, SYNTAX_ANYWHERE | SYNTAX_BLOCK },
+ { s_replace, "replace", 0, ARGUMENT_DATA, SYNTAX_ANYWHERE },
+ { s_lock, "lock", 0, 0, SYNTAX_ANYWHERE },
+ { s_setvar, "set", 0, ARGUMENT_DATA | ARGUMENT_NAME, SYNTAX_ANYWHERE },
+ { s_addvar, "add", 0, ARGUMENT_DATA | ARGUMENT_NAME, SYNTAX_ANYWHERE },
+ { s_clrvar, "clr", 0, ARGUMENT_NAME, SYNTAX_ANYWHERE },
+ { s_message, "message", 0, ARGUMENT_DATA, SYNTAX_ANYWHERE },
+ { s_else, "else", 0, 0, SYNTAX_ANYWHERE | SYNTAX_BLOCK },
+
+ /* special cases */
+ { s_opbrace, "{", 0, 0, SYNTAX_SPECIAL },
+ { s_clbrace, "}", 0, 0, SYNTAX_SPECIAL },
+ { s_implied, "", 0, 0, SYNTAX_SPECIAL }
+};
+
+
+
+
+/* Syntax Constants: -------------------------------------------------------------
+ */
+
+const char* kValidNum = "0123465798";
+const char* kValidDelim = "\"~`!@#$%^&*[]|'<>./?+=-;:";
+const char* kValidBrace = "{}";
+const char kFlagsStart = '(';
+const char kFlagsEnd = ')';
+const char kFlagDelim = ',';
+const char kTagDelim = '=';
+const char kEscapeChar = '\\';
+
+/* Maximum size of an identifier */
+#define kMaxIdentifier 40
+
+
+#define INVALID_PTR 0xFFFFFFFF
+
+
+
+/* ----------------------------------------------------------------------
+// CODE and COMPILE FUNCTIONALITY
+*/
+
+const size_t OPS_BUFFER_SIZE = 0x1000;
+
+
+/* codestack: We use one of these for each level of braces. Helps
+// us maintain context. */
+
+typedef struct _codestack
+{
+ uint curContext; /* Current parse context */
+ uint curOptions; /* Current options in use */
+ uint numStatements; /* Number of statements in this block */
+
+ size_t insPos; /* The position to insert code */
+ size_t endPos; /* End of code owned by current codestack */
+
+ struct _codestack* pPrev; /* previous stack (if in list) */
+}
+codestack;
+
+/* Add a new post op buffer and stash away current */
+#define PUSH_CODESTACK(c) \
+do { \
+ (c)->code = pushCodeStack((c)->code); \
+ if(!(c)->code) RETURN(R_NOMEM); \
+} while(0)
+
+/* Dump current post op buffer and get previous */
+#define POP_CODESTACK(c) \
+do { \
+ commitCodeStack((c)->code); \
+ (c)->code = popCodeStack((c)->code); \
+} while (0) \
+
+
+/* Initialize a new post ops buffer and hook into previous */
+static codestack* pushCodeStack(codestack* prev)
+{
+ codestack* code = (codestack*)malloc(sizeof(codestack));
+ if(!code)
+ return NULL;
+
+ memset(code, 0, sizeof(codestack));
+
+ if(prev)
+ {
+ /* Things to carry over from previous */
+ code->insPos = code->endPos = prev->insPos;
+ code->curOptions = prev->curOptions;
+ code->curContext = prev->curContext;
+ }
+ else
+ {
+ code->curContext = SYNTAX_INROOT;
+ }
+
+ /* Init the context stuff */
+
+
+ code->pPrev = prev;
+ return code;
+}
+
+/* Free a post op buffer and return previous */
+static codestack* popCodeStack(codestack* code)
+{
+
+ codestack* prev = code->pPrev;
+
+ if(code->pPrev)
+ {
+
+ /* The insertion position needs fixing ... */
+ /* calc offset */
+ size_t offset = code->insPos - prev->insPos;
+ prev->insPos += offset;
+ prev->endPos += offset;
+ }
+
+ free(code);
+
+ return prev;
+}
+
+#define commitCodeStack(code) ((code)->insPos = (code)->endPos)
+
+
+/* compilecontext: The main compile state. Only one used throughout
+// compilation
+*/
+typedef struct _compilecontext
+{
+ syn_statements lastStatement; /* Last statement we had */
+ syn_statements nextStatement; /* Next statement we're expecting */
+
+ syn_statements curStatement; /* Current statement: */
+ const char* curName; /* - The Name */
+ size_t lenName; /* - Length of the name */
+ uint curFlags; /* - Flags */
+ const char* curFlagData; /* - Extra flag (f_num) */
+ size_t lenFlagData; /* - Length of extra flag */
+ const char* curData; /* - Data */
+ size_t lenData; /* - Length of data */
+
+
+ const char* in; /* Next location to compile in script */
+ long curKey; /* a unique key (id) which gets incremented */
+ /* for various uses such as jumps and calls */
+
+ vmop_t* beg; /* start of block of output code */
+ size_t cbops; /* number of bytes of output code */
+ size_t alloc; /* number of bytes allocated for output */
+
+ codestack* code; /* Current codestack */
+ bool failed : 1; /* did the last memory allocation fail? */
+}
+compilecontext;
+
+
+
+/* -------------------------------------------------------------------
+// POS OP
+//
+// the 'pos' op is a temporary place holder while compiling
+// it takes one value parameter and it's value is the same as is
+// temporarily placed in jmp, je, jne or call
+// it's removed and jumps are patched in the optimization stage
+*/
+
+#define o_pos ((vmop_t)(0xFF))
+/* const vmop_t pos = 0xFF; */
+
+
+
+/* -------------------------------------------------------------------
+// OPS CODING FUNCTIONALITY
+*/
+
+/* These macros can only be used within repCompile */
+
+#define PUSH_OP(op) \
+ pushValues(&ctx, true, sizeof(byte), op, 0)
+
+#define PUSH_OP_1(op, arg) \
+ pushValues(&ctx, true, sizeof(byte), op, sizeof(arg), arg, 0)
+
+#define PUSH_OP_2(op, arg, arg2) \
+ pushValues(&ctx, true, sizeof(byte), op, sizeof(arg), arg, sizeof(arg2), arg2, 0)
+
+/* Add ops in reverse */
+#define PUSH_ROP(op) \
+ pushValues(&ctx, false, sizeof(byte), op, 0)
+
+#define PUSH_ROP_1(op, arg) \
+ pushValues(&ctx, false, sizeof(byte), op, sizeof(arg), arg, 0)
+
+#define PUSH_ROP_2(op, arg, arg2) \
+ pushValues(&ctx, false, sizeof(byte), op, sizeof(arg), arg, sizeof(arg2), arg2, 0)
+
+
+/* Get more stack space for ops */
+static void moreOutput(compilecontext* ctx)
+{
+ /* Reallocate */
+ ctx->beg = (byte*)reallocf(ctx->beg, ctx->alloc + OPS_BUFFER_SIZE);
+ ctx->alloc += OPS_BUFFER_SIZE;
+
+ /* Set flag if failed */
+ if(!ctx->beg)
+ ctx->failed = true;
+}
+
+/* Allocate a specific amount from the stack */
+#define allocOutput(ctx, len) pushData(ctx, NULL, len, true)
+
+
+
+
+/* Push any amount of data on ops buffer */
+static void* pushData(compilecontext* ctx, const void* data, size_t len, bool forward)
+{
+ void* pIns;
+
+ if(ctx->cbops + len >= ctx->alloc)
+ moreOutput(ctx);
+
+ if(ctx->failed || !len)
+ return NULL;
+
+ pIns = ctx->beg + ctx->code->insPos;
+
+ /* Make space at insertion point */
+ memmove(ctx->beg + ctx->code->insPos + len, ctx->beg + ctx->code->insPos,
+ ctx->cbops - ctx->code->insPos);
+
+ /* If we have data copy it in */
+ if(data)
+ memcpy(pIns, data, len);
+
+ /* In debug mode clear it */
+#ifdef _DEBUG
+ else
+ memset(pIns, 0xCC, len);
+#endif
+
+ ctx->cbops += len;
+ ctx->code->endPos += len;
+
+ if(forward)
+ ctx->code->insPos += len;
+
+ /* Only return value if no input data */
+ return data ? NULL : pIns;
+}
+
+
+static void pushValues(compilecontext* ctx, bool forward, ...)
+{
+ va_list ap;
+ size_t len;
+
+ #define VAL_BUF 20
+ byte buff[VAL_BUF];
+ size_t cur = 0;
+
+ va_start(ap, forward);
+ while(len = va_arg(ap, size_t))
+ {
+ if(cur + len > VAL_BUF)
+ {
+ pushData(ctx, buff, cur, forward);
+ cur = 0;
+ }
+
+ switch(len)
+ {
+ case 1:
+ buff[cur] = va_arg(ap, byte);
+ break;
+ case 2:
+ *((unsigned short*)(buff + cur)) = va_arg(ap, unsigned short);
+ break;
+ case 4:
+ *((unsigned int*)(buff + cur)) = va_arg(ap, unsigned int);
+ break;
+ default:
+ ASSERT(false);
+ break;
+ }
+
+ cur += len;
+ }
+
+ pushData(ctx, buff, cur, forward);
+}
+
+static int testRegexp(r_script* script, const char* regexp, short options)
+{
+ int erroroffset;
+ const char* error = NULL;
+ pcre* re = pcre_compile(regexp, options, &error, &erroroffset, NULL);
+
+ if(!re)
+ {
+ if(error)
+ scriptSetError(script, error);
+ return R_REGEXP;
+ }
+ else
+ {
+ free(re);
+ return R_OK;
+ }
+}
+
+/* ----------------------------------------------------------------------
+// Functions for pushing specific types of ops on the stack
+*/
+static int pushMatch(r_script* script, compilecontext* ctx, const char* regexp)
+{
+ int ret;
+
+ /* Allocate */
+ size_t len = sizeof(match_op_pcre) + (sizeof(char) * strlen(regexp));
+ match_op_pcre* op = (match_op_pcre*)allocOutput(ctx, len);
+
+ /* Setup op */
+ if(!op)
+ return R_NOMEM;
+
+ memset(op, 0, len);
+ op->header.len = len;
+ op->header.type = kMatchPcre;
+
+ op->options = PCRE_DOLLAR_ENDONLY;
+
+ /* TODO: Do we need to make an option for PCRE_MULTILINE? */
+ if(!(ctx->code->curOptions & f_line))
+ op->options |= PCRE_DOTALL;
+ if(!(ctx->code->curOptions & f_case))
+ op->options |= PCRE_CASELESS;
+
+ ret = testRegexp(script, regexp, op->options);
+ if(ret < 0)
+ return ret;
+
+ /* Copy the uncompiled regular expression onto the ops stack */
+ strcpy(op->pattern, regexp);
+
+ return R_OK;
+}
+
+static void pushText(compilecontext* ctx, const char* string, size_t len)
+{
+ /* Allocate */
+ text_op* op = (text_op*)allocOutput(ctx, sizeof(text_op) + (sizeof(char) * (len)));
+
+ /* Setup op */
+ if(op)
+ {
+ op->len = len;
+
+ /* Copy the replacement string onto the ops stack */
+ /* TODO: Get this ready for binary replacements */
+ strncpy((char*)op->string, string, len);
+ op->string[len] = 0;
+ }
+}
+
+static void pushVar(compilecontext* ctx, const char* name, size_t lenName)
+{
+ /* Allocate */
+ size_t len = lenName + 1;
+ var_op* op = (var_op*)allocOutput(ctx, sizeof(var_op) + (sizeof(char) * len));
+
+ /* Setup op */
+ if(op)
+ {
+ op->len = len;
+
+ /* Copy the variable name onto the ops stack */
+ /* TODO: Get this ready for binary replacements */
+ memcpy(op->name, name, lenName);
+ op->name[lenName] = 0;
+ }
+}
+
+
+/* ----------------------------------------------------------------------
+// FUNCTIONS
+*/
+
+/* Keeps track of current functions seen */
+typedef struct funcdef
+{
+ char name[kMaxIdentifier + 1];
+ uint key;
+}
+funcdef;
+
+typedef struct _funcdefs
+{
+ uint alloc;
+ uint cur;
+ funcdef defs[1];
+}
+funcdefs;
+
+/* Add a function to the stack */
+static bool addFunction(funcdefs** ppdefs, const char* name, size_t len, uint key)
+{
+ /* Do allocation if necessary */
+ if(!*ppdefs || (*ppdefs)->cur >= (*ppdefs)->alloc)
+ {
+ uint alloc = *ppdefs ? (*ppdefs)->alloc : 0;
+ uint cur = *ppdefs ? (*ppdefs)->cur : 0;
+
+ alloc += 0x10;
+
+ *ppdefs = (funcdefs*)reallocf(*ppdefs, sizeof(funcdefs) + (sizeof(funcdef) * alloc));
+ if(!*ppdefs) return false;
+
+ (*ppdefs)->alloc = alloc;
+ (*ppdefs)->cur = cur;
+ }
+
+ /* Push it on the back */
+ strlcpy((*ppdefs)->defs[(*ppdefs)->cur].name, name, min(kMaxIdentifier, len) + 1);
+ (*ppdefs)->defs[(*ppdefs)->cur].key = key;
+ (*ppdefs)->cur++;
+
+ return true;
+}
+
+/* Check if a function exists */
+static uint findFunction(funcdefs* pdefs, const char* name, size_t len)
+{
+ if(pdefs)
+ {
+ char funcname[kMaxIdentifier + 1];
+ size_t i;
+
+ strlcpy(funcname, name, min(len, kMaxIdentifier) + 1);
+
+ /* Just loop through and compare names */
+ for(i = 0; i < pdefs->cur; i++)
+ {
+ if(!strcasecmp(pdefs->defs[i].name, funcname))
+ return pdefs->defs[i].key;
+ }
+ }
+
+ return INVALID_PTR;
+}
+
+/* ----------------------------------------------------------------------
+// SYNTAX FUNCTIONS
+*/
+
+/* Is a character escaped or not? */
+bool isEscaped(const char* str, const char* posi)
+{
+ /*
+ Checks for a backslash before
+ but note that backslash can be escaped to so...
+ */
+ bool bEscaped = false;
+ while(posi > str && posi[-1] == kEscapeChar)
+ {
+ bEscaped = !bEscaped;
+ posi--;
+ }
+
+ return bEscaped;
+}
+
+
+/* Split a tag match into two for later use */
+char* splitTagMatch(r_script* script, char* regexp)
+{
+ char* second = regexp;
+ while(second = strchr(second, kTagDelim))
+ {
+ uint escs = 0;
+ if(!isEscaped(regexp, second))
+ {
+ second[0] = '\0';
+ return second + 1;
+ }
+
+ second++;
+ }
+
+ scriptSetError(script, "Couldn't find tags in match (separate with '%c').", kTagDelim);
+ return NULL;
+}
+
+
+/* Eat spaces and comments */
+static bool compileSpace(compilecontext* ctx)
+{
+ /* Eat white space and comments here */
+ while(isspace(ctx->in[0]) || ctx->in[0] == '#')
+ {
+ /* Comments ... */
+ if(ctx->in[0] == '#')
+ {
+ /* Eat rest off line */
+ while(ctx->in[0] != '\n' && ctx->in[0] != '\0')
+ ctx->in++;
+ }
+
+ ctx->in++;
+ }
+
+ /* Return true if not end of file */
+ return ctx->in[0] != '\0';
+}
+
+int compileEncoding(r_script* script, compilecontext* ctx)
+{
+ if(!memcmp(ctx->in, kEncUTF8, countof(kEncUTF8)))
+ {
+ ctx->in += countof(kEncUTF8);
+ }
+ else if(!memcmp(ctx->in, kEncUCS2_L, countof(kEncUCS2_L)) ||
+ !memcmp(ctx->in, kEncUCS2_R, countof(kEncUCS4_R)) ||
+ !memcmp(ctx->in, kEncUCS4_L, countof(kEncUCS4_L)) ||
+ !memcmp(ctx->in, kEncUCS4_R, countof(kEncUCS4_R)))
+ {
+ scriptSetError(script, "unsupported unicode encoding");
+ return R_SYNTAX;
+ }
+
+ return R_OK;
+}
+
+/* Compile a single statement */
+int compileStatement(r_script* script, compilecontext* ctx)
+{
+ /* Some MACROS */
+ #define SYNTAX_ERROR(s) \
+ do{ \
+ scriptSetError(script, s); \
+ RETURN(R_SYNTAX); \
+ } while(0)
+
+ #define SYNTAX_ERROR_1(s, a1) \
+ do{ \
+ scriptSetError(script, s, a1); \
+ RETURN(R_SYNTAX); \
+ } while(0)
+
+ /* Jump to cleanup label instead of return */
+ #define RETURN(r) \
+ do { \
+ retv = r; \
+ goto cleanup; \
+ } while (0)
+
+ const char* end;
+ int retv = R_OK;
+
+ {
+ int i;
+
+ if(ctx->curStatement != s_opbrace && ctx->curStatement != s_clbrace)
+ ctx->lastStatement = ctx->curStatement;
+
+ ctx->curStatement = s_none;
+ ctx->curName = NULL;
+ ctx->lenName = 0;
+ ctx->curFlags = 0;
+ ctx->curFlagData = NULL;
+ ctx->lenFlagData = 0;
+ ctx->curData = NULL;
+ ctx->lenData = 0;
+
+
+ do
+ {
+ /* Eat all whitespace and comments */
+ compileSpace(ctx);
+
+ /* Check for end of input */
+ if(ctx->in[0] == 0)
+ RETURN(R_OK);
+
+ /* Check for Statement */
+ if(!strcspn(ctx->in, kValidIdentifier))
+ end = ctx->in + strspn(ctx->in, kValidIdentifier);
+
+ /* Check for braces */
+ else if(!strcspn(ctx->in, kValidBrace))
+ end = ctx->in + 1; /* Brace always just one character */
+
+ /* Anything else is bad */
+ else
+ SYNTAX_ERROR_1("Unexpected character \'%c\'.", ctx->in[0]);
+
+ /* Okay now determine which statement we got */
+ for(i = 0; i < countof(kAllStatements); i++)
+ {
+ if(!strncasecmp(kAllStatements[i].text, ctx->in, end - ctx->in))
+ {
+ /* Check the context */
+ if(kAllStatements[i].context != SYNTAX_SPECIAL &&
+ !(ctx->code->curContext & kAllStatements[i].context))
+ SYNTAX_ERROR_1("\'%s\' not allowed here.", kAllStatements[i].text);
+
+ ctx->curStatement = kAllStatements[i].syntax;
+ break;
+ }
+ }
+
+
+ /* Check that we got a statement
+ if not then it should be a function call */
+ if(ctx->curStatement == s_none)
+ {
+ ctx->curStatement = s_call;
+ ctx->curName = ctx->in;
+ ctx->lenName = end - ctx->in;
+ }
+
+
+ /* Make sure it's what we were expecting */
+ if(ctx->nextStatement != s_none &&
+ ctx->nextStatement != ctx->curStatement)
+ {
+ /* If braces were required but not found, then do an impliedBlock */
+ if(ctx->nextStatement == s_opbrace &&
+ kAllStatements[ctx->lastStatement].context & SYNTAX_BLOCK)
+ {
+ ctx->nextStatement = ctx->curStatement;
+ ctx->curStatement = s_implied;
+ end = ctx->in;
+ }
+
+ /* Otherwise it's an error */
+ else
+ {
+ SYNTAX_ERROR_1("Expecting \'%s\' here.", kAllStatements[ctx->nextStatement].text);
+ }
+ }
+
+
+ ctx->in = end;
+ ctx->nextStatement = s_none;
+
+ /* We process braces here */
+ if(ctx->curStatement == s_opbrace ||
+ ctx->curStatement == s_implied)
+ {
+ PUSH_CODESTACK(ctx);
+ ctx->code->curContext |= SYNTAX_INBRACE;
+
+ if(ctx->lastStatement == s_function)
+ ctx->code->curContext |= SYNTAX_INFUNCTION;
+
+ if(ctx->curStatement == s_implied)
+ ctx->code->curContext |= SYNTAX_IMPLIED;
+
+ ctx->curStatement = s_none;
+ continue;
+ }
+
+ /* See if we need to pop any codestacks for implied blocks */
+ while(ctx->code->curContext & SYNTAX_IMPLIED &&
+ ctx->code->numStatements > 0)
+ {
+ /* Only should be one statement on an implied block */
+ ASSERT(ctx->code->numStatements == 1);
+ POP_CODESTACK(ctx);
+ }
+
+ /* Closing braces here */
+ if(ctx->curStatement == s_clbrace)
+ {
+ POP_CODESTACK(ctx);
+ ctx->curStatement = s_none;
+ continue;
+ }
+
+ }
+ while(ctx->curStatement == s_none);
+
+
+ /* Okay now do name if we have one */
+ if(kAllStatements[ctx->curStatement].args & ARGUMENT_NAME && !ctx->curName)
+ {
+ compileSpace(ctx);
+
+ /* Now look for a normal or name flag */
+ if(!strcspn(ctx->in, kValidIdentifier))
+ {
+ end = ctx->in + strspn(ctx->in, kValidIdentifier);
+
+ /* Any flags not found assume it's an identifier */
+ ctx->curName = ctx->in;
+ ctx->lenName = end - ctx->in;
+
+ if(ctx->lenName > kMaxIdentifier)
+ SYNTAX_ERROR("Maximum length for an identifier is 40 characters.");
+
+ ctx->in = end;
+ }
+ }
+
+
+ /* Eat the next little bit of whitespace */
+ compileSpace(ctx);
+
+ /* Okay now look for flags start */
+ if(ctx->in[0] == kFlagsStart)
+ {
+ ctx->in++;
+
+ while(1)
+ {
+ compileSpace(ctx);
+
+
+ /* Check for a number flag */
+ if(!strcspn(ctx->in, kValidNum))
+ {
+ if(ctx->curFlagData)
+ SYNTAX_ERROR("Invalid flags.");
+
+ /* If found then just grab and go */
+ ctx->curFlags |= f_num;
+ ctx->curFlagData = ctx->in;
+ ctx->lenFlagData = strspn(ctx->in, kValidNum);
+
+ ctx->in += ctx->lenFlagData;
+ }
+
+ /* Now look for a normal flag */
+ else if(!strcspn(ctx->in, kValidIdentifier))
+ {
+ bool found = false;
+ end = ctx->in + strspn(ctx->in, kValidIdentifier);
+
+ /* Okay now try and map out that flag */
+ for(i = 0; i < countof(kAllFlags); i++)
+ {
+ if(!strncasecmp(kAllFlags[i].text, ctx->in, end - ctx->in))
+ {
+ found = true;
+ ctx->curFlags |= kAllFlags[i].flag;
+ break;
+ }
+ }
+
+ /* Any flags not found assume it's an identifier */
+ if(!found)
+ {
+ if(ctx->curName != NULL)
+ SYNTAX_ERROR("Invalid flags.");
+
+ ctx->curName = ctx->in;
+ ctx->lenName = end - ctx->in;
+ }
+
+ ctx->in = end;
+ }
+
+ /* End ) of flags */
+ else if(ctx->in[0] == kFlagsEnd)
+ {
+ ctx->in++;
+ break;
+ }
+
+ /* Separator , between flags */
+ else if(ctx->in[0] == kFlagDelim)
+ ctx->in++;
+
+ else
+ SYNTAX_ERROR("Expected a flag.");
+ }
+
+ /* Now check the flags */
+ if((kAllStatements[ctx->curStatement].flags | ctx->curFlags)
+ != kAllStatements[ctx->curStatement].flags)
+ SYNTAX_ERROR("Invalid flags for this statement.");
+
+ }
+
+
+ /* Okay now do data processing */
+ if(kAllStatements[ctx->curStatement].args & ARGUMENT_DATA)
+ {
+ const char* delim;
+ compileSpace(ctx);
+
+ /* Check for a delimiter */
+ if(delim = strchr(kValidDelim, *(ctx->in)))
+ {
+ ctx->in++;
+ end = ctx->in;
+
+ /* Find end of data but checking for escaped delimeters */
+ while(1)
+ {
+ end = strchr(end, *delim);
+
+ if(!end)
+ SYNTAX_ERROR("Couldn't find end of data for this statement");
+
+ if(!isEscaped(ctx->in, end))
+ break;
+
+ end++;
+ }
+
+
+ ctx->curData = ctx->in;
+ ctx->lenData = (end - ctx->in);
+
+ ctx->in = end + 1;
+ }
+ else
+ {
+ SYNTAX_ERROR("Expected data for this statement.");
+ }
+ }
+
+ if(ctx->lenName > kMaxIdentifier)
+ SYNTAX_ERROR("Maximum length for an identifier is 40 characters.");
+
+ ctx->code->numStatements++;
+ }
+
+ /* done! */
+
+cleanup:
+ return retv;
+}
+
+static uint getLine(const char* beg, const char* cur)
+{
+ size_t ret = 1;
+ while(beg <= cur)
+ {
+ if(*beg == '\n')
+ ret++;
+
+ beg++;
+ }
+
+ return ret;
+}
+
+int compilerRun(r_script* script, const char* data)
+{
+
+ int retv = R_OK; /* used by RETURN macro */
+ funcdefs* pFunctions = NULL;
+ char* regexp = NULL;
+
+ /* We allocate the main instruction buffer */
+ compilecontext ctx;
+ memset(&ctx, 0, sizeof(ctx));
+
+ ctx.in = data;
+ ctx.curKey = 0x00000100;
+ ctx.beg = (vmop_t*)malloc(OPS_BUFFER_SIZE);
+ ctx.alloc = OPS_BUFFER_SIZE;
+ ctx.code = pushCodeStack(NULL);
+
+ if(!ctx.beg || !ctx.code)
+ RETURN(R_NOMEM);
+
+#ifdef _DEBUG
+ {
+ int i;
+ /* Do a check here! Index should be equal to */
+ /* command name for each statement */
+ for(i = 0; i < countof(kAllStatements); i++)
+ ASSERT(i == kAllStatements[i].syntax);
+ }
+#endif
+
+
+ /*
+ Push a first empty pops stack
+ We have to have one to pop below
+ */
+ PUSH_CODESTACK(&ctx);
+ ctx.code->curContext = SYNTAX_INROOT;
+
+ /* Push our signature of 4 nop bytes */
+ PUSH_OP(o_nop);
+ PUSH_OP(o_nop);
+ PUSH_OP(o_nop);
+ PUSH_OP(o_nop);
+
+ /* Set up initial stack frame */
+ PUSH_OP_2(o_mov, r_bp, r_sp);
+ /* Push the beginning of the main loop here */
+ PUSH_OP_1(o_pos, ARG_MAKE_VALUE(0L));
+ PUSH_OP_2(o_mov, r_ac, ARG_MAKE_VALUE(0L));
+
+ retv = compileEncoding(script, &ctx);
+ if(retv < 0)
+ RETURN(retv);
+
+ while(ctx.in[0] != '\0')
+ {
+ if(ctx.cbops + 0x200 >= ctx.alloc)
+ moreOutput(&ctx);
+
+ /* Here we check if we have enough memory */
+ if(ctx.failed)
+ RETURN(R_NOMEM);
+
+ /* Get and parse the current statement */
+ retv = compileStatement(script, &ctx);
+ if(retv < 0)
+ RETURN(retv);
+
+ if(ctx.curStatement == s_none)
+ continue;
+
+
+
+
+ /* Now we pop the commit end code from previous statement */
+ commitCodeStack(ctx.code);
+
+ /* Okay now do the rest of the statements */
+ switch(ctx.curStatement)
+ {
+
+
+ /*//////////////////////////////////////////////////////
+ // function
+ */
+ case s_function:
+ {
+ /* SYNTAX ----------------------------------------
+ //
+ // function(name)
+ // {
+ // .....
+ // }
+ */
+
+ uint key, keyJmp;
+
+ /* Check that we got a name */
+ if(!(ctx.curName && ctx.lenName))
+ SYNTAX_ERROR("'function' needs a name");
+
+ /* We need a opening brace next */
+ ctx.nextStatement = s_opbrace;
+
+
+ key = ARG_MAKE_VALUE(ctx.curKey++);
+
+ /* Put the function name at current op pos */
+ if(!addFunction(&pFunctions, ctx.curName,
+ ctx.lenName, key))
+ RETURN(R_NOMEM);
+
+
+
+ /* OPS --------------------------------------------
+ //
+ // jmp <1>
+ // pos <function>
+ // push bp
+ // mov bp, r_sp
+ // etc.
+ // ......
+ // etc.
+ // mov fe, 1
+ // mov sp, bp
+ // pop bp
+ // ret
+ // <1>
+ */
+
+ keyJmp = ARG_MAKE_VALUE(ctx.curKey++);
+
+ PUSH_OP_1(o_jmp, keyJmp);
+ PUSH_OP_1(o_pos, key);
+ PUSH_OP_1(o_push, r_bp);
+ PUSH_OP_2(o_mov, r_bp, r_sp);
+
+
+ PUSH_ROP_1(o_pos, keyJmp);
+ PUSH_ROP(o_ret);
+ PUSH_ROP_1(o_pop, r_bp);
+ PUSH_ROP_2(o_mov, r_sp, r_bp);
+ PUSH_ROP_2(o_mov, r_fe, ARG_MAKE_VALUE(1));
+
+ }
+ break;
+
+
+ /*//////////////////////////////////////////////////////
+ // loop
+ */
+ case s_loop:
+ {
+
+ uint key, key2;
+
+ /* SYNTAX ----------------------------------------
+ //
+ // loop
+ // {
+ // .....
+ // }
+ */
+
+ /* We need a opening brace next */
+ ctx.nextStatement = s_opbrace;
+
+
+
+ /* OPS --------------------------------------------
+ //
+ // push ac
+ // <1>
+ // mov ac, 0
+ // .....
+ // pop x2 pop the previously pushed action value
+ // test ac test current action
+ // jne <2> if action
+ // mov x2, ac then overide previous action value
+ // <2>
+ // push x2 push back changed action value
+ // je <1>
+ // pop ac pop out the
+ */
+
+ key = ARG_MAKE_VALUE(ctx.curKey++);
+ key2 = ARG_MAKE_VALUE(ctx.curKey++);
+
+ PUSH_OP_1(o_push, r_ac);
+ PUSH_OP_1(o_pos, key);
+ PUSH_OP_2(o_mov, r_ac, ARG_MAKE_VALUE(0L));
+
+ PUSH_ROP_1(o_pop, r_ac);
+ PUSH_ROP_1(o_je, key);
+ PUSH_ROP_1(o_push, r_x2);
+ PUSH_ROP_1(o_pos, key2);
+ PUSH_ROP_2(o_mov, r_x2, r_ac);
+ PUSH_ROP_1(o_jne, key2);
+ PUSH_ROP_1(o_test, r_ac);
+ PUSH_ROP_1(o_pop, r_x2);
+ }
+ break;
+
+
+ /*//////////////////////////////////////////////////////
+ // once
+ */
+ case s_once:
+ {
+ uint keyOnce, key1, key5;
+
+ /* SYNTAX ----------------------------------------
+ //
+ // once
+ // {
+ // .....
+ // }
+ */
+
+ /* We need a opening brace next */
+ ctx.nextStatement = s_opbrace;
+
+
+
+ /* OPS --------------------------------------------
+ //
+ // test mem(value) Pull in the flag from memory
+ // jne <1> If not present then jump to containing code
+ // mov fe, 0 Otherwise set fail flag
+ // jmp <5> And skip the match
+ // <1>
+ // .....
+ //
+ // mov mem(value), 1
+ // <5>
+ */
+
+
+ keyOnce = ARG_MAKE_MEMORY(ctx.curKey++);
+ key1 = ARG_MAKE_VALUE(ctx.curKey++);
+ key5 = ARG_MAKE_VALUE(ctx.curKey++);
+
+
+ /* Once code */
+ PUSH_OP_1(o_test, keyOnce);
+ PUSH_OP_1(o_jne, key1);
+ PUSH_OP_2(o_mov, r_fe, ARG_MAKE_VALUE(0));
+ PUSH_OP_1(o_jmp, key5);
+ PUSH_OP_1(o_pos, key1);
+
+
+ PUSH_ROP_1(o_pos, key5);
+ PUSH_ROP_2(o_mov, keyOnce, ARG_MAKE_VALUE(1));
+ }
+ break;
+
+
+
+ /*/////////////////////////////////////////////////////
+ // call
+ */
+ case s_call:
+ {
+ uint key;
+
+ /* SYNTAX ----------------------------------------
+ //
+ // call(name)
+ */
+
+ /* Check that we got a name */
+ if(!(ctx.curName && ctx.lenName))
+ SYNTAX_ERROR("'call' needs a function name");
+
+ /* Find the function */
+ key = findFunction(pFunctions, ctx.curName,
+ ctx.lenName);
+
+ if(key == INVALID_PTR)
+ SYNTAX_ERROR("function does not exist");
+
+
+ /* OPS --------------------------------------------
+ //
+ // call <funcaddr>
+ */
+
+ PUSH_OP_1(o_call, key);
+ }
+ break;
+
+
+ /*///////////////////////////////////////////////////////
+ // return
+ */
+ case s_return:
+ {
+ /* SYNTAX ----------------------------------------
+ //
+ // return(0)
+ */
+
+ uint code = ARG_MAKE_VALUE(1);
+
+ /* Check for the number */
+ if(ctx.curFlags & f_num)
+ {
+ if(ctx.lenFlagData > 1 || (ctx.curFlagData[0] != '0' && ctx.curFlagData[0] != '1'))
+ SYNTAX_ERROR("Return code must be 0 or 1");
+
+ if(ctx.curFlagData[0] == '0')
+ code = ARG_MAKE_VALUE(0);
+ }
+
+ /* OPS --------------------------------------------
+ //
+ // mov fe, 0 (or 1 depending on code)
+ // mov sp, r_bp
+ // pop bp
+ // ret
+ */
+
+ PUSH_OP_2(o_mov, r_fe, code);
+ PUSH_OP_2(o_mov, r_sp, r_bp);
+ PUSH_OP_1(o_pop, r_bp);
+ PUSH_OP(o_ret);
+ }
+ break;
+
+
+
+ /*//////////////////////////////////////////////////////
+ // end
+ */
+ case s_end:
+ {
+
+ /* SYNTAX ----------------------------------------
+ //
+ // end
+ */
+
+
+ /* OPS --------------------------------------------
+ //
+ // stop
+ */
+
+ PUSH_OP(o_text);
+ pushText(&ctx, NULL, 0);
+ PUSH_OP_1(o_stop, ARG_MAKE_VALUE(0));
+ }
+ break;
+
+
+
+ /*//////////////////////////////////////////////////////
+ // end
+ */
+ case s_stop:
+ {
+
+ /* SYNTAX ----------------------------------------
+ //
+ // stop "message"
+ */
+
+ /* OPS --------------------------------------------
+ //
+ // text "message"
+ // stop
+ */
+
+ PUSH_OP(o_text);
+ pushText(&ctx, ctx.curData, ctx.lenData);
+ PUSH_OP_1(o_stop, ARG_MAKE_VALUE(1));
+ }
+ break;
+
+
+ /*/////////////////////////////////////////////////////
+ // match
+ */
+ case s_match:
+ {
+
+ /* SYNTAX ----------------------------------------
+ //
+ // match(not, once) "regexp"
+ */
+
+ uint keyOnce, keyWatermark, keyJmp1, keyJmp2, keyJmp3,
+ keyJmp4, keyJmp5, keyJmp6, keyJmp7, groupNum,
+ key1, key4, key5, key9;
+
+ /* Get the flags */
+ bool bNot = ctx.curFlags & f_not ? true : false;
+ bool bOnce = ctx.curFlags & f_once ? true : false;
+ bool bHas = ctx.curFlags & f_find ? true : false;
+ bool bTag = ctx.curFlags & f_tag ? true : false;
+
+ /* Check that we got data */
+ if(!ctx.curData || !ctx.lenData)
+ SYNTAX_ERROR("'match' needs a regular expression");
+
+ /* We need a opening brace next */
+ ctx.nextStatement = s_opbrace;
+
+ regexp = strndup(ctx.curData, ctx.lenData);
+ if(!regexp)
+ RETURN(R_NOMEM);
+
+ groupNum = 0;
+
+ if(ctx.curFlagData && ctx.lenFlagData)
+ {
+ if(ctx.lenFlagData > 1)
+ SYNTAX_ERROR("Group specifier must be between 0 and 9.");
+
+ if(bHas)
+ SYNTAX_ERROR("Group specifier invalid with 'has' flag.");
+
+
+ /* Get the number to use */
+ groupNum = ctx.curFlagData[0] - '0';
+ }
+
+
+ /* OPS --------------------------------------------
+ //
+ // push x1 Save limits
+ // push y1 " "
+
+ // test mem(value) (once) Pull in the flag from memory
+ // jne <1> (once) If not present then jump to match code
+ // mov fe, 0 (once) Otherwise set fail flag
+ // jmp <5> (once) And skip the match
+ // <1> (once)
+
+ // mov x6, mem(key_value) (watermark) Get the watermark
+ // cmp x1, r_x6 (watermark) If watermark higher than match area
+ // mov fe, r_fg (watermark)
+ // jne <2> (watermark) skip
+ // mov x6, r_x1 (watermark) Otherwise bring up watermark to match area
+ // <2> (watermark)
+
+ // match x6, r_y1 Do match
+ // mov x4, fe
+
+ // <5> (once)
+
+ // jne <3> If match failed skip set below
+
+ // mov ac, 1 Set action flag
+
+ // cmp cg, 2 See if we have enough groups
+ // mov fe, r_fg ""
+ // jne <7>
+ // mov fe, 0 If not then set failed
+ // jmp <3> And skip to failed part
+ // <7>
+ // mov x1, r_b2 Set new limit for inside
+ // mov y1, e2 different registers (depending on number parameter)
+
+ // mov x2, r_b0 (watermark) Get the start of batch
+ // add x2, 1 (watermark) Add one to it
+ // mov mem(value), x2 (watermark) Stash it away in memory
+
+ // <3>
+
+ // cmp x4, 1 (not) Compare success against 0 or 1 depending on not
+ // push fe
+ // jne <4> Skip if no match
+
+ // mov mem(value), 1 (once)
+
+ // .....
+
+ // <4>
+ // pop fe
+
+ // pop y1
+ // pop x1
+ */
+
+ keyOnce = ARG_MAKE_MEMORY(ctx.curKey++);
+ keyWatermark = ARG_MAKE_STACK(ctx.curKey++);
+ keyJmp1 = ARG_MAKE_VALUE(ctx.curKey++);
+ keyJmp2 = ARG_MAKE_VALUE(ctx.curKey++);
+ keyJmp3 = ARG_MAKE_VALUE(ctx.curKey++);
+ keyJmp4 = ARG_MAKE_VALUE(ctx.curKey++);
+ keyJmp5 = ARG_MAKE_VALUE(ctx.curKey++);
+ keyJmp6 = ARG_MAKE_VALUE(ctx.curKey++);
+ keyJmp7 = ARG_MAKE_VALUE(ctx.curKey++);
+
+ /* Save for later */
+ PUSH_OP_1(o_push, r_x1);
+ PUSH_OP_1(o_push, r_y1);
+
+ if(bOnce)
+ {
+ /* Once code */
+ PUSH_OP_1(o_test, keyOnce);
+ PUSH_OP_1(o_jne, keyJmp1);
+ PUSH_OP_2(o_mov, r_fe, ARG_MAKE_VALUE(0));
+ PUSH_OP_1(o_jmp, keyJmp5);
+ PUSH_OP_1(o_pos, keyJmp1);
+ }
+
+ /* Watermark code */
+ PUSH_OP_2(o_mov, r_x6, keyWatermark);
+ PUSH_OP_2(o_cmp, r_x1, r_x6);
+ PUSH_OP_2(o_mov, r_fe, r_fg);
+ PUSH_OP_1(o_jne, keyJmp2);
+ PUSH_OP_2(o_mov, r_x6, r_x1);
+ PUSH_OP_1(o_pos, keyJmp2);
+
+
+ /* Is it a simple match? */
+ if(!bTag)
+ {
+ /* Actual match */
+ PUSH_OP_2(o_match, r_x6, r_y1);
+ retv = pushMatch(script, &ctx, regexp);
+ if(retv < 0) RETURN(retv);
+ }
+
+ /* Or the very complicated tag statement */
+ else
+ {
+ /* Split the regular expression */
+ char* second = splitTagMatch(script, regexp);
+ if(!second) RETURN(R_SYNTAX);
+
+
+ /* (See code docs in tag.txt file) */
+ key1 = ARG_MAKE_VALUE(ctx.curKey++);
+ key4 = ARG_MAKE_VALUE(ctx.curKey++);
+ key5 = ARG_MAKE_VALUE(ctx.curKey++);
+ key9 = ARG_MAKE_VALUE(ctx.curKey++);
+
+
+ /* Setup */
+ PUSH_OP_2(o_mov, r_x2, ARG_MAKE_VALUE(0));
+ PUSH_OP_2(o_mov, r_x0, r_x6);
+ PUSH_OP_2(o_mov, r_y0, r_y1);
+
+ /* Top of loop */
+ PUSH_OP_1(o_pos, key1);
+
+ /* Start code */
+ PUSH_OP_2(o_match, r_x0, r_y1);
+
+ retv = pushMatch(script, &ctx, regexp);
+ if(retv < 0) RETURN(retv);
+
+ PUSH_OP_1(o_jne, key5);
+ PUSH_OP_2(o_cmp, r_b0, r_y0);
+ PUSH_OP_1(o_je, key5);
+ PUSH_OP_2(o_mov, r_fe, r_fg);
+ PUSH_OP_1(o_je, key5);
+
+ PUSH_OP_2(o_cmp, r_x2, ARG_MAKE_VALUE(0));
+ PUSH_OP_1(o_jne, key4);
+ PUSH_OP_2(o_mov, r_x5, r_b0);
+ PUSH_OP_2(o_mov, r_y5, r_e0);
+ PUSH_OP_2(o_mov, r_y0, r_e0);
+ PUSH_OP_1(o_pos, key4);
+
+ PUSH_OP_2(o_mov, r_x2, ARG_MAKE_VALUE(0));
+ PUSH_OP_2(o_mov, r_x0, r_e0);
+
+ /* Endless loop check */
+ PUSH_OP_2(o_cmp, r_y2, ARG_MAKE_VALUE(0x00100000));
+ PUSH_OP_1(o_je, key5);
+
+ /* End code */
+ PUSH_OP_2(o_match, r_y0, r_y1);
+ retv = pushMatch(script, &ctx, second);
+ if(retv < 0) RETURN(retv);
+
+ PUSH_OP_1(o_jne, key5);
+ PUSH_OP_2(o_mov, r_y0, r_e0);
+ PUSH_OP_2(o_mov, r_x7, r_b0);
+ PUSH_OP_2(o_mov, r_y7, r_e0);
+
+ /* Locks check */
+ PUSH_OP_2(o_check, r_y5, r_x7);
+ PUSH_OP_2(o_mov, r_x2, r_fe);
+
+ /* End of loop */
+ PUSH_OP_1(o_jmp, key1);
+ PUSH_OP_1(o_pos, key5);
+
+ /* Wrap up */
+ PUSH_OP_2(o_cmp, r_x2, ARG_MAKE_VALUE(0));
+ PUSH_OP_1(o_je, key9);
+ PUSH_OP_2(o_mov, r_b0, r_x5);
+ PUSH_OP_2(o_mov, r_e0, r_y7);
+ PUSH_OP_2(o_mov, r_b1, r_x5);
+ PUSH_OP_2(o_mov, r_e1, r_y5);
+ PUSH_OP_2(o_mov, r_b2, r_y5);
+ PUSH_OP_2(o_mov, r_e2, r_x7);
+ PUSH_OP_2(o_mov, r_b3, r_x7);
+ PUSH_OP_2(o_mov, r_e3, r_y7);
+ PUSH_OP_2(o_mov, r_cg, ARG_MAKE_VALUE(4));
+ PUSH_OP_1(o_pos, key9);
+ PUSH_OP_2(o_mov, r_fe, r_x2);
+
+ }
+
+
+ PUSH_OP_2(o_mov, r_x4, r_fe);
+
+ if(bOnce)
+ PUSH_OP_1(o_pos, keyJmp5);
+
+ /* Skip all the rest of the setup if failed */
+ PUSH_OP_1(o_jne, keyJmp3);
+
+ /* Set action flag */
+ PUSH_OP_2(o_mov, r_ac, ARG_MAKE_VALUE(1));
+
+ if(!bHas)
+ {
+ /* Group validation code */
+ PUSH_OP_2(o_cmp, ARG_MAKE_VALUE(groupNum + 1), r_cg);
+ PUSH_OP_2(o_mov, r_fe, r_fg);
+ PUSH_OP_1(o_jne, keyJmp7);
+ PUSH_OP_2(o_mov, r_fe, ARG_MAKE_VALUE(0));
+ PUSH_OP_1(o_jmp, keyJmp3);
+ PUSH_OP_1(o_pos, keyJmp7);
+
+ /* Now depending on group number set do this we set a set of registers to check */
+ PUSH_OP_2(o_mov, r_x1, ARG_MAKE_REGISTER(ARG_GET_REGISTER(r_b0) + groupNum));
+ PUSH_OP_2(o_mov, r_y1, ARG_MAKE_REGISTER(ARG_GET_REGISTER(r_e0) + groupNum));
+ }
+
+ /* Watermark */
+ PUSH_OP_2(o_mov, keyWatermark, r_e0);
+
+ /* End of setup code */
+ PUSH_OP_1(o_pos, keyJmp3);
+
+ /* Implement 'not' */
+ PUSH_OP_2(o_cmp, r_x4, ARG_MAKE_VALUE(bNot ? 0 : 1));
+ PUSH_OP_1(o_push, r_fe);
+ PUSH_OP_1(o_jne, keyJmp4);
+
+ /* Now we're inside */
+ if(bOnce)
+ PUSH_OP_2(o_mov, keyOnce, ARG_MAKE_VALUE(1));
+
+
+ /* Wrap up code */
+ PUSH_ROP_1(o_pop, r_x1);
+ PUSH_ROP_1(o_pop, r_y1);
+ PUSH_ROP_1(o_pop, r_fe);
+ PUSH_ROP_1(o_pos, keyJmp4);
+
+
+ free(regexp);
+ regexp = NULL;
+ }
+ break;
+
+
+ /*//////////////////////////////////////////////////////
+ // replace
+ */
+ case s_replace:
+ {
+
+ /* SYNTAX ----------------------------------------
+ //
+ // replace "replacetext"
+ */
+
+ uint key;
+
+ /* Check that we got data */
+ if(!ctx.curData)
+ SYNTAX_ERROR("'replace' needs text to replace");
+
+ /* OPS --------------------------------------------
+ //
+ // check x1, r_y1
+ // jne <1>
+ // repl x1, r_y1
+ // lock x1, r_y1
+ // <1>
+ */
+
+ key = ARG_MAKE_VALUE(ctx.curKey++);
+
+ PUSH_OP_2(o_check, r_x1, r_y1);
+ PUSH_OP_1(o_jne, key);
+
+ PUSH_OP(o_text);
+ pushText(&ctx, ctx.curData, ctx.lenData);
+ PUSH_OP_2(o_repl, r_x1, r_y1);
+
+ PUSH_OP_2(o_lock, r_x1, r_y1);
+ PUSH_OP_1(o_pos, key);
+ }
+ break;
+
+
+ /*//////////////////////////////////////////////////////
+ // lock
+ */
+ case s_lock:
+ {
+
+ /* SYNTAX ----------------------------------------
+ //
+ // lock
+ */
+
+ /* OPS --------------------------------------------
+ //
+ // lock x1, r_y1
+ */
+
+ PUSH_OP_2(o_lock, r_x1, r_y1);
+ }
+ break;
+
+
+
+ /*//////////////////////////////////////////////////////
+ // variable functions
+ */
+ case s_setvar:
+ case s_clrvar:
+ case s_addvar:
+ {
+
+ /* SYNTAX ----------------------------------------
+ //
+ // set name "value"
+ // clr name
+ // add name "value"
+ */
+
+ /* Check that we got a name */
+ if(!(ctx.curName && ctx.lenName))
+ SYNTAX_ERROR("Missing variable name");
+
+ /* Check that we got data */
+ if((ctx.curStatement == s_setvar ||
+ ctx.curStatement == s_addvar) && !ctx.curData)
+ SYNTAX_ERROR("Missing variable value");
+
+
+ /* OPS --------------------------------------------
+ */
+
+ /* If not clearing then push text */
+ if(ctx.curStatement != s_clrvar)
+ {
+ PUSH_OP(o_text);
+ pushText(&ctx, ctx.curData, ctx.lenData);
+ }
+
+ /* If not adding then clear */
+ if(ctx.curStatement != s_addvar)
+ {
+ PUSH_OP(o_clrvar);
+ pushVar(&ctx, ctx.curName, ctx.lenName);
+ }
+
+ /* If not clearing then set */
+ if(ctx.curStatement != s_clrvar)
+ {
+ PUSH_OP(o_setvar);
+ pushVar(&ctx, ctx.curName, ctx.lenName);
+ }
+ }
+ break;
+
+ /*//////////////////////////////////////////////////////
+ // else
+ */
+ case s_else:
+ {
+
+ /* SYNTAX ----------------------------------------
+ //
+ // else
+ // {
+ // .....
+ // }
+ */
+
+ uint key;
+
+ /* REMOVED the following because now else can follow any statement
+ //
+ // Make sure the previous statement was a match
+ // if(ctx.lastStatement != s_match && ctx.lastStatement != s_replace &&
+ // ctx.lastStatement != s_call)
+ // SYNTAX_ERROR("'else' must follow a match, replace or call statement");
+ */
+
+ /* Next statement must be a opening brace */
+ ctx.nextStatement = s_opbrace;
+
+ /* OPS --------------------------------------------
+ //
+ // je <1>
+ // ......
+ // <1>
+ */
+
+ key = ARG_MAKE_VALUE(ctx.curKey++);
+
+ PUSH_OP_1(o_je, key);
+ PUSH_ROP_1(o_pos, key);
+ }
+ break;
+
+ /*/////////////////////////////////////////////////////
+ // options
+ */
+ case s_message:
+ {
+ /* SYNTAX ----------------------------------------
+ //
+ // message "data"
+ */
+
+ /* Check that we got data */
+ if(!ctx.curData)
+ SYNTAX_ERROR("Missing message text.");
+
+ /* OPS --------------------------------------------
+ //
+ // text "message"
+ // stop
+ */
+
+ PUSH_OP(o_text);
+ pushText(&ctx, ctx.curData, ctx.lenData);
+ PUSH_OP(o_msg);
+ }
+ break;
+
+
+ /*/////////////////////////////////////////////////////
+ // options
+ */
+ case s_options:
+ {
+ /* SYNTAX ----------------------------------------
+ //
+ // options(case, line)
+ */
+
+ /* Save the options into the context */
+ ctx.code->curOptions = ctx.curFlags;
+ }
+ break;
+
+ default:
+ ASSERT(false);
+
+ }
+ }
+
+ /* Pop out of any implied blocks */
+ while(ctx.code->curContext & SYNTAX_IMPLIED)
+ POP_CODESTACK(&ctx);
+
+ /* copy any remaining post ops */
+ POP_CODESTACK(&ctx);
+
+ /* This is the bottom of the main loop */
+ PUSH_OP_1(o_test, r_ac);
+ PUSH_OP_1(o_je, ARG_MAKE_VALUE(0));
+
+ /* Put an end marker */
+ PUSH_OP(o_end);
+
+
+ /* Check brace syntax */
+ if(ctx.code->pPrev != NULL)
+ SYNTAX_ERROR("Not all braces matched.");
+
+ /* Put the compiled script in the script */
+ script->ops = ctx.beg;
+ script->len = ctx.cbops;
+
+ /* This fixes all the jmp and removes pos */
+ retv = compilerOptimize(script);
+
+cleanup:
+ if(retv == R_SYNTAX || retv == R_REGEXP)
+ script->errline = getLine(data, ctx.in) - 1;
+
+ /* Unwind codestack */
+ while(ctx.code)
+ ctx.code = popCodeStack(ctx.code);
+
+ if(pFunctions)
+ free(pFunctions);
+ if(regexp)
+ free(regexp);
+
+ if(retv < R_OK)
+ {
+ if(ctx.beg)
+ free(ctx.beg);
+
+ script->ops = NULL;
+ script->len = 0;
+ }
+
+ return retv;
+}
+
+
+
+int compilerOptimize(r_script* scr)
+{
+ vmop_t* op = scr->ops;
+ int retv = R_OK;
+
+ /* First find and remove all pos
+ // NOTE: land and pos are used interchangeably in this code */
+
+ uint* lands = NULL;
+ uint cur = 0;
+ uint alloc = 0;
+
+ while(*op != o_end)
+ {
+ ASSERT(op < scr->ops + scr->len);
+
+ switch(*op)
+ {
+ case o_pos:
+ {
+ if(alloc <= cur)
+ {
+ alloc += 0x40;
+ lands = (uint*)reallocf(lands, sizeof(uint) * 2 * alloc);
+ if(!lands)
+ RETURN(R_NOMEM);
+
+ }
+
+ ASSERT(ARG_TYPE(op[1]) == ARG_VAL_TYPE);
+
+ /* Position in 0 */
+ lands[cur * 2] = op - scr->ops;
+ /* key in 1 */
+ lands[(cur * 2) + 1] = *((uint*)(op + 1));
+
+ cur++;
+
+ /*
+ Okay now eat the rest of the stuff
+ total length of a pos should be 5 bytes
+ */
+ scr->len -= 5;
+ memmove(op, op + 5, scr->len - (op - scr->ops));
+ }
+ break;
+
+ default:
+ opsIterate(&op);
+ break;
+ };
+ }
+
+
+ /* Now fix all jumps and calls */
+ op = scr->ops;
+
+ while(*op != o_end)
+ {
+ ASSERT(op < scr->ops + scr->len);
+
+ switch(*op)
+ {
+ case o_pos:
+ /* Shouldn't meet any pos ops after we removed them above */
+ ASSERT(false);
+ break;
+
+ case o_jmp:
+ case o_jne:
+ case o_je:
+ case o_call:
+ {
+ uint* parg = ((uint*)(op + 1));
+ bool found = false;
+ size_t i;
+
+ /* find the key in our array */
+ for(i = 0; i < cur; i++)
+ {
+ if(lands[(i * 2) + 1] == *parg)
+ {
+ found = true;
+ *parg = ARG_MAKE_VALUE(lands[i * 2]);
+ break;
+ }
+ }
+
+ if(!found)
+ ASSERT(false && "jump without a pos");
+ }
+ /* (Note fall through) */
+
+ default:
+ opsIterate(&op);
+ break;
+ }
+ }
+
+cleanup:
+ if(lands)
+ free(lands);
+
+ return retv;
+}
+
+void opsIterate(vmop_t** ops)
+{
+ vmop_t op = *(*(ops));
+ (*ops)++;
+
+ /* increment *ops to next op point */
+ switch(op)
+ {
+ /* ops without arguments */
+ case o_end:
+ case o_nop:
+ case o_ret:
+ case o_msg:
+ break;
+
+ /* ops with one argument */
+ case o_push:
+ case o_pop:
+ case o_jmp:
+ case o_je:
+ case o_jne:
+ case o_test:
+ case o_call:
+ case o_stop:
+ INC_ARGUMENT(*ops);
+ break;
+
+ /* ops with two arguments */
+ case o_lock:
+ case o_check:
+ case o_cmp:
+ case o_add:
+ case o_sub:
+ case o_mov:
+ case o_repl:
+ INC_ARGUMENT(*ops);
+ INC_ARGUMENT(*ops);
+ break;
+
+ /* Special cases */
+ case o_match:
+ {
+ match_op* op;
+ INC_ARGUMENT(*ops);
+ INC_ARGUMENT(*ops);
+ op = (match_op*)(*ops);
+ (*ops) += match_op_size(*op);
+ }
+ break;
+
+ case o_setvar:
+ case o_clrvar:
+ {
+ var_op* op = (var_op*)(*ops);
+ (*ops) += var_op_size(*op);
+ }
+ break;
+ case o_text:
+ {
+ text_op* op;
+ op = (text_op*)(*ops);
+ (*ops) += text_op_size(*op);
+ }
+ break;
+
+ default:
+ ASSERT(false);
+ };
+
+}
+
+
+/*
+ TODO: individual ops do not need to be freed
+ any longer. Execution no longer changes them.
+*/
+
+/* Frees a set of ops */
+int opsFree(vmop_t* ops, size_t len)
+{
+ byte* cur = ops;
+
+ if(len == 0)
+ len = ~0;
+
+ while(cur < ops + len)
+ {
+ switch(*cur)
+ {
+ case o_end:
+ goto done;
+
+ default:
+ opsIterate(&cur);
+ break;
+ }
+ }
+
+done:
+ free(ops);
+ return R_OK;
+
+}
+
+
+static const char* getOpName(vmop_t op)
+{
+ #define RETOPNAME(r) case o_##r: return #r;
+ switch(op)
+ {
+ RETOPNAME(end);
+ RETOPNAME(stop);
+ RETOPNAME(nop);
+ RETOPNAME(ret);
+ RETOPNAME(push);
+ RETOPNAME(pop);
+ RETOPNAME(jmp);
+ RETOPNAME(je);
+ RETOPNAME(jne);
+ RETOPNAME(test);
+ RETOPNAME(call);
+ RETOPNAME(lock);
+ RETOPNAME(check);
+ RETOPNAME(cmp);
+ RETOPNAME(add);
+ RETOPNAME(sub);
+ RETOPNAME(mov);
+ RETOPNAME(match);
+ RETOPNAME(repl);
+ RETOPNAME(setvar);
+ RETOPNAME(clrvar);
+ RETOPNAME(pos);
+ RETOPNAME(text);
+ RETOPNAME(msg);
+
+ default:
+ return "<INVALID>";
+ }
+}
+
+static const char* getRegisterName(byte reg)
+{
+ #define RETREGNAME(r) case r_##r: return #r;
+ switch(reg)
+ {
+ RETREGNAME(fe);
+ RETREGNAME(fg);
+ RETREGNAME(fl);
+ RETREGNAME(ac);
+ RETREGNAME(sp);
+ RETREGNAME(bp);
+ RETREGNAME(b0);
+ RETREGNAME(b1);
+ RETREGNAME(b2);
+ RETREGNAME(b3);
+ RETREGNAME(b4);
+ RETREGNAME(b5);
+ RETREGNAME(b6);
+ RETREGNAME(b7);
+ RETREGNAME(b8);
+ RETREGNAME(b9);
+ RETREGNAME(e0);
+ RETREGNAME(e1);
+ RETREGNAME(e2);
+ RETREGNAME(e3);
+ RETREGNAME(e4);
+ RETREGNAME(e5);
+ RETREGNAME(e6);
+ RETREGNAME(e7);
+ RETREGNAME(e8);
+ RETREGNAME(e9);
+ RETREGNAME(cg);
+ RETREGNAME(x0);
+ RETREGNAME(x1);
+ RETREGNAME(x2);
+ RETREGNAME(x3);
+ RETREGNAME(x4);
+ RETREGNAME(x5);
+ RETREGNAME(x6);
+ RETREGNAME(x7);
+ RETREGNAME(y0);
+ RETREGNAME(y1);
+ RETREGNAME(y2);
+ RETREGNAME(y3);
+ RETREGNAME(y4);
+ RETREGNAME(y5);
+ RETREGNAME(y6);
+ RETREGNAME(y7);
+
+ default:
+ return "<INVALID>";
+ }
+};
+
+void dumpArgument(FILE* f, vmop_t* ops)
+{
+ switch(ARG_TYPE(*ops))
+ {
+ case ARG_VAL_TYPE:
+ fprintf(f, "0x%06x", ARG_GET_VALUE(*((uint*)ops)));
+ break;
+
+ case ARG_MEM_TYPE:
+ fprintf(f, "<mem:0x%06x>", ARG_GET_VALUE(*((uint*)ops)));
+ break;
+
+ case ARG_STACK_TYPE:
+ fprintf(f, "<stack:0x%06x>", ARG_GET_VALUE(*((uint*)ops)));
+ break;
+
+ case ARG_REG_TYPE:
+ fprintf(f, getRegisterName(*ops));
+ break;
+
+ default:
+ ASSERT(false);
+ }
+}
+
+/* Dump a string of ops to a stream */
+int opsDump(vmop_t* ops, FILE* f)
+{
+ vmop_t* beg = ops;
+
+ while(*ops != o_end)
+ {
+ vmop_t op = *ops;
+
+ fprintf(f, "%06x: %s ", (ops - beg), getOpName(op));
+
+ ops++;
+
+ /* Now the arguments */
+ switch(op)
+ {
+ case o_push:
+ case o_pop:
+ case o_jmp:
+ case o_je:
+ case o_jne:
+ case o_test:
+ case o_call:
+ case o_stop:
+ case o_pos:
+ dumpArgument(f, ops);
+ INC_ARGUMENT(ops);
+ break;
+
+ case o_lock:
+ case o_check:
+ case o_cmp:
+ case o_add:
+ case o_sub:
+ case o_mov:
+ case o_match:
+ case o_repl:
+ dumpArgument(f, ops);
+ fprintf(f, ", ");
+ INC_ARGUMENT(ops);
+ dumpArgument(f, ops);
+ INC_ARGUMENT(ops);
+ break;
+ };
+
+ /* Now any additional data */
+ switch(op)
+ {
+ case o_match:
+ {
+ match_op* op = (match_op*)ops;
+ if(op->type & kMatchPcre)
+ {
+ match_op_pcre* pcre = (match_op_pcre*)op;
+ fprintf(f, " %s", pcre->pattern);
+ }
+ else
+ {
+ fprintf(f, " <regexp>");
+ }
+
+ ops += match_op_size(*op);
+ }
+ break;
+
+ case o_setvar:
+ case o_clrvar:
+ {
+ var_op* vop = (var_op*)ops;
+ fprintf(f, " <%%%s>", vop->name);
+ ops += var_op_size(*vop);
+ }
+ break;
+
+ case o_test:
+ {
+ text_op* op = (text_op*)ops;
+ fprintf(f, " <%s>", op->string);
+ ops += text_op_size(*op);
+ }
+ break;
+ };
+
+
+ fprintf(f, "\n");
+ }
+
+ return R_OK;
+
+}
+
+
+
+
diff --git a/lib/execute.c b/lib/execute.c
new file mode 100644
index 0000000..3bf3c4b
--- /dev/null
+++ b/lib/execute.c
@@ -0,0 +1,1715 @@
+/*
+ * AUTHOR
+ * N. Nielsen
+ *
+ * LICENSE
+ * This software is in the public domain.
+ *
+ * The software is provided "as is", without warranty of any kind,
+ * express or implied, including but not limited to the warranties
+ * of merchantability, fitness for a particular purpose, and
+ * noninfringement. In no event shall the author(s) be liable for any
+ * claim, damages, or other liability, whether in an action of
+ * contract, tort, or otherwise, arising from, out of, or in connection
+ * with the software or the use or other dealings in the software.
+ *
+ * SUPPORT
+ * Send bug reports to: <nielsen@memberwebs.com>
+ */
+
+/* ----------------------------------------------------------------------
+ * Rep Execution Unit
+ * 2000-2001 Copyright, Nate Nielsen
+ */
+
+#include "common/usuals.h"
+#include "common/compat.h"
+#include "common/xstring.h"
+#include "lib/rlib.h"
+#include "priv.h"
+#include "execute.h"
+#include "ops.h"
+
+/* 100 million loops max! */
+#define MAX_PASSES 100000000
+
+/* NOTE: all *end* counts/pointers point to the last char of the actual data */
+
+/* Quick access macro for accessing VM registers */
+#define REGISTER(reg) \
+ (state->vmregs[ARG_GET_REGISTER(reg)])
+#define REGISTER2(reg, off) \
+ (state->vmregs[ARG_GET_REGISTER(reg) + (off)])
+
+/* =========================================================================
+ * MEMORY: Memory blocks are used by the code and are simply a set
+ * of numeric values indexed by key.
+ */
+
+/* _memoryAllocate: ---------------------------------------------------
+ * Helper which double the size of a 'memory' block if needed
+ */
+bool _memoryAllocate(memory* mem)
+{
+ ASSERT_PTR(mem);
+
+ if(!mem->thememory)
+ {
+ /* Allocate new set */
+ mem->thememory = (struct mem*)malloc(sizeof(struct mem) * BLOCK_SIZE);
+ if(!mem->thememory) return false;
+ mem->alloc = BLOCK_SIZE;
+ memoryClearAll(mem);
+ }
+
+ /* If no more space ... */
+ if(mem->alloc <= mem->cur)
+ {
+ /* ...double allocation */
+ size_t sz = sizeof(struct mem) * mem->alloc;
+ mem->thememory = (struct mem*)reallocf(mem->thememory, sz * 2);
+ if(!mem->thememory) return false;
+ mem->alloc *= 2;
+ }
+
+ return true;
+}
+
+
+/* _memoryFind: ----------------------------------------------------
+ * Find memory location for given key
+ */
+struct mem* _memoryFind(memory* mem, uint key)
+{
+ size_t i;
+ ASSERT_PTR(mem);
+
+ for(i = 0; i < mem->cur; i++)
+ {
+ if(key == mem->thememory[i].key)
+ return &(mem->thememory[i]);
+ }
+
+ return 0;
+}
+
+/* memoryInit: -----------------------------------------------------
+ * Initialize and or allocate a memory block
+ */
+bool memoryInit(memory* mem)
+{
+ zero(*mem);
+ return true;;
+}
+
+/* memoryFree: ----------------------------------------------------
+ * Free a memory block
+ */
+void memoryFree(memory* mem)
+{
+ if(mem)
+ {
+ ASSERT_PTR(mem);
+ if(mem->thememory)
+ free(mem->thememory);
+
+ mem->thememory = NULL;
+ }
+}
+
+/* memoryValue: --------------------------------------------------
+ * Get a value for key from memory
+ */
+uint* memoryValue(memory* mem, uint key)
+{
+ struct mem* m;
+ ASSERT_PTR(mem);
+ if(!_memoryAllocate(mem))
+ return NULL;
+
+ m = _memoryFind(mem, key);
+ if(!m)
+ {
+ /* Add new watermark */
+ mem->thememory[mem->cur].key = key;
+ mem->thememory[mem->cur].value = 0;
+ m = &(mem->thememory[mem->cur]);
+ mem->cur++;
+ }
+
+ return &(m->value);
+}
+
+/* memoryClearAll: ---------------------------------------------------
+ * Clear all values from block
+ */
+void memoryClearAll(memory* mem)
+{
+ ASSERT_PTR(mem);
+ mem->cur = 0;
+}
+
+
+/* =========================================================================
+ * DATA:
+ */
+
+/* _dataAllocate: ---------------------------------------------------
+ * Helper which double the size of a 'memory' block if needed
+ */
+bool _dataAllocate(data* dat)
+{
+ ASSERT_PTR(dat);
+
+ if(!dat->thedata)
+ {
+ /* Allocate new set */
+ dat->thedata = (struct dat*)malloc(sizeof(struct dat) * BLOCK_SIZE);
+ if(!dat->thedata) return false;
+ dat->alloc = BLOCK_SIZE;
+ dataClearAll(dat);
+ }
+
+ /* If no more space ... */
+ if(dat->alloc <= dat->cur)
+ {
+ /* ...double allocation */
+ size_t sz = sizeof(struct dat) * dat->alloc;
+ dat->thedata = (struct dat*)reallocf(dat->thedata, sz * 2);
+ if(!dat->thedata) return false;
+ dat->alloc *= 2;
+ }
+
+ return true;
+}
+
+
+/* _dataFind: ----------------------------------------------------
+ * Find memory location for given key
+ */
+struct dat* _dataFind(data* dat, void* key)
+{
+ size_t i;
+ ASSERT_PTR(dat);
+
+ for(i = 0; i < dat->cur; i++)
+ {
+ if(key == dat->thedata[i].key)
+ return &(dat->thedata[i]);
+ }
+
+ return 0;
+}
+
+/* dataInit: -----------------------------------------------------
+ * Initialize and or allocate a data block
+ */
+bool dataInit(data* dat)
+{
+ zero(*dat);
+ return true;
+}
+
+/* dataFree: ----------------------------------------------------
+ * Free a data block
+ */
+void dataFree(data* dat)
+{
+ if(dat)
+ {
+ ASSERT_PTR(dat);
+ if(dat->thedata)
+ {
+ dataClearAll(dat);
+ free(dat->thedata);
+ }
+
+ dat->thedata = NULL;
+ }
+}
+
+/* dataGetValue: --------------------------------------------------
+ * Get a value for key from data
+ */
+void* dataGetValue(data* dat, void* key)
+{
+ struct dat* d;
+ ASSERT_PTR(dat);
+
+ if(d = _dataFind(dat, key))
+ return d->value;
+
+ return NULL;
+}
+
+/* dataSetValue: --------------------------------------------------
+ * Set a value for key
+ */
+bool dataSetValue(data* dat, void* key, void* value)
+{
+ struct dat* d;
+ ASSERT_PTR(dat);
+ if(!_dataAllocate(dat))
+ return false;
+
+ if(d = _dataFind(dat, key))
+ {
+ d->value = value;
+ }
+ else
+ {
+ /* Add new one */
+ dat->thedata[dat->cur].key = key;
+ dat->thedata[dat->cur].value = value;
+ dat->cur++;
+ }
+
+ return true;
+}
+
+/* dataClearAll: ---------------------------------------------------
+ * Clear all values from block
+ */
+void dataClearAll(data* dat)
+{
+ size_t i;
+
+ ASSERT_PTR(dat);
+ if(dat->thedata)
+ {
+ for(i = 0; i < dat->cur; i++)
+ {
+ if(dat->thedata[i].value)
+ free(dat->thedata[i].value);
+ }
+ }
+
+ dat->cur = 0;
+}
+
+
+/* ========================================================================
+ * VARIABLES: Variables are strings indexed by a name.
+ */
+
+/* _variablesAllocate: ----------------------------------------------
+ * Expand a block of variables if necessary
+ */
+bool _variablesAllocate(variables* vars)
+{
+ ASSERT_PTR(vars);
+
+ if(!vars->thevars)
+ {
+ /* Allocate new set */
+ vars->thevars = (struct vari*)malloc(sizeof(struct vari) * BLOCK_SIZE);
+ if(!vars->thevars) return false;
+ vars->alloc = BLOCK_SIZE;
+ vars->cur = 0;
+ }
+
+ /* If no more space ... */
+ if(vars->alloc <= vars->cur)
+ {
+ /* ...double allocation */
+ size_t sz = sizeof(struct vari) * vars->alloc;
+ vars->thevars = (struct vari*)reallocf(vars->thevars, sz * 2);
+ if(!vars->thevars) return false;
+ vars->alloc *= 2;
+ }
+
+ return true;
+}
+
+/* _variablesNew: --------------------------------------------------------
+ * Make a new variable in a given block
+ */
+struct vari* _variablesNew(variables* vars, const char* name,
+ const char* val, size_t cnt)
+{
+ struct vari* v;
+ ASSERT_PTR(vars);
+
+ if(!_variablesAllocate(vars))
+ return NULL;
+
+ v = &(vars->thevars[vars->cur]);
+
+ /* Add new variable */
+ v->name = strdup(name);
+ v->value = (char*)malloc(sizeof(char) * (cnt + 2));
+ starclr(v->value);
+ starnadd(&(v->value), val, cnt);
+
+ /* Oops! */
+ if(!v->name || !v->value)
+ return NULL;
+
+ vars->cur++;
+
+ return v;
+}
+
+/* _variablesFind: ------------------------------------------------------
+ * Find a variable in the block by name
+ */
+struct vari* _variablesFind(variables* vars, const char* name)
+{
+ size_t i;
+ char* val;
+
+ ASSERT_PTR(vars);
+ i = vars->cur;
+
+ while(i--)
+ {
+ if(!strcasecmp(name, vars->thevars[i].name))
+ return &(vars->thevars[i]);
+ }
+
+ /* If not found then look up in the Environment */
+ if(val = getenv(name))
+ return _variablesNew(vars, name, val, strlen(val));
+
+ return NULL;
+}
+
+/* variablesInit: ------------------------------------------------------
+ * Initialize and possibly allocate a variable set
+ */
+bool variablesInit(variables* vars)
+{
+ zero(*vars);
+ return true;
+}
+
+/* variablesFree: ------------------------------------------------------
+ * Free a set of variables
+ */
+void variablesFree(variables* vars)
+{
+ if(vars)
+ {
+ ASSERT_PTR(vars);
+ if(vars->thevars)
+ free(vars->thevars);
+ vars->thevars = NULL;
+ }
+}
+
+/* variablesAddBytes: --------------------------------------------------
+ * Add a counted string to a variable.
+ */
+bool variablesAddBytes(variables* vars, const char* name,
+ const char* val, size_t cnt)
+{
+ struct vari* v;
+ ASSERT_PTR(vars);
+
+ v = _variablesFind(vars, name);
+ if(!v)
+ {
+ return _variablesNew(vars, name, val, cnt) != NULL;
+ }
+ else
+ {
+ /* Already have variable. Just add value */
+ ASSERT(v->value);
+ starnadd(&(v->value), val, cnt);
+ return v->value != NULL;
+ }
+}
+
+/* variablesAdd: -------------------------------------------------------
+ * Add a string to a variable.
+ */
+bool variablesAdd(variables* vars, const char* name, const char* val)
+{
+ ASSERT_PTR(vars);
+ return variablesAddBytes(vars, name, val, strlen(val));
+}
+
+/* variablesClear: ----------------------------------------------------
+ * Clear a variable's value.
+ */
+bool variablesClear(variables* vars, const char* name)
+{
+ struct vari* v;
+ ASSERT_PTR(vars);
+ if(v = _variablesFind(vars, name))
+ starclr(v->value);
+
+ return true;
+}
+
+/* variablesClearAll: -------------------------------------------------
+ * Delete all variables from set
+ */
+bool variablesClearAll(variables* vars)
+{
+ ASSERT_PTR(vars);
+ if(vars->thevars)
+ {
+ while(vars->cur--)
+ {
+ ASSERT(vars->thevars[vars->cur].name);
+ free(vars->thevars[vars->cur].name);
+ ASSERT(vars->thevars[vars->cur].value);
+ free(vars->thevars[vars->cur].value);
+ }
+
+ vars->cur = 0;
+ }
+
+ return true;
+}
+
+/* _escapeString: -----------------------------------------------------
+ * Helper function to make a string pass through the regex compiler
+ */
+static char* _escapeString(const char* string)
+{
+ const char kSpecialChars[] = "$^*(){}[]\\?+.";
+ size_t pos = 0;
+ size_t cnt = 0;
+ char* buff;
+
+ size_t len = strlen(string);
+
+ while((pos += strcspn(string + pos, kSpecialChars)) < len)
+ cnt++, pos++;
+
+ if(buff = (char*)malloc(sizeof(char) * (len + cnt + 1)))
+ {
+ pos = 0;
+ strcpy(buff, string);
+ while((pos += strcspn(buff + pos, kSpecialChars)) < len + cnt)
+ strins(buff + pos, "\\"), pos += 2;
+ }
+
+ return buff;
+}
+
+/* variablesSubstitute: -----------------------------------------------
+ * Perform variable and register substitution on a string
+ */
+int variablesSubstitute(variables* vars, r_stream* stream, r_script* script,
+ char** pstr, bool mode)
+{
+ size_t len = strlen(*pstr);
+ char* next = *pstr;
+ struct internal_state* state = stream->state;
+
+ ASSERT_PTR(vars);
+ ASSERT_PTR(state);
+
+ #define SYNTAX_ERROR(s) \
+ do{ \
+ scriptSetError(script, s); \
+ return R_SYNTAX; \
+ } while(0)
+
+ /*
+ If mode == true then we're substituting inside a regular expression
+ otherwise we're substituting in replaced text
+ */
+
+ /* Find next backslash or percent */
+ while(next += strcspn(next, "\\%"), next && next[0] != 0)
+ {
+ switch(*next)
+ {
+
+ /* It's a variable or a register */
+ case '%':
+
+ /* Is it a register? */
+ if(isdigit(next[1]))
+ {
+ /* Registers only in replace mode replace mode */
+ if(!mode)
+ {
+ uint reg = next[1] - 0x30;
+
+ /* Get register number */
+ if(reg < REGISTER(r_cg))
+ {
+ /* Save offset for reallocations */
+ size_t off = next - *pstr;
+ size_t reglen = REGISTER2(r_e0, reg) - REGISTER2(r_b0, reg);
+
+ /* Reallocate to fit register text */
+ strrsrv(*pstr, strlen(*pstr) + reglen);
+ if(!(*pstr)) return R_NOMEM;
+
+ /* offset next properly */
+ next = *pstr + off;
+
+ /* Replace \N with register text */
+ next = strnrep(next, 2,
+ (stream->nextIn + ABS_TO_REL(REGISTER2(r_b0, reg), stream->state)),
+ reglen);
+ }
+ else
+ /* If invalid number just blow away */
+ next = strnrep(next, 2, "", 0);
+ }
+ }
+
+
+ /* Otherwise it's a variable */
+ else
+ {
+ bool multi = false;
+ size_t len, off;
+ char temp;
+ struct vari* v;
+ const char* value;
+
+ /* Get the name */
+ len = strspn(next + 1, kValidIdentifier);
+ if(len == 0) SYNTAX_ERROR("Invalid variable name.");
+
+ /* Increment length for the % prefix */
+ len++;
+
+ /* Null terminate the variable name */
+ temp = next[len];
+ next[len] = 0;
+
+ /* Do we have this variable? */
+ if(v = _variablesFind(vars, next + 1))
+ value = v->value;
+ else
+ value = "\0\0";
+
+ /* Check if the variable is an array */
+ if(mode)
+ multi = starnext(value) ? true : false;
+
+ /* Unnull-terminate it */
+ next[len] = temp;
+
+ /* Get offset for reallocations */
+ off = next - *pstr;
+
+ /* Reallocate the string to accomodate new replacement */
+ strrsrv(*pstr, strlen(*pstr) + (starend(value) - value) + 4);
+ if(!*pstr) return R_NOMEM;
+
+ /* Offset next back properly */
+ next = *pstr + off;
+
+ /* Eat variable name and add open parentheses if needed
+ The '?:' after the opening paren denotes a non capturing
+ group */
+ next = strrep(next, len, multi ? "(?:" : "");
+
+ do
+ {
+ /* If in regular expression mode ... */
+ char* escval;
+ if(mode)
+ {
+ /* ... escape the value */
+ escval = _escapeString(value);
+ if(!escval) return R_NOMEM;
+ }
+
+ /* Insert the value */
+ next = strrep(next, 0, mode ? escval : value);
+
+ /* If in regular expression mode free escaped. */
+ if(mode)
+ free(escval);
+
+ /* If not multi then only put the first one in */
+ if(!multi)
+ break;
+
+ /* Add a pipe (alternation operator) if we have more values */
+ else
+ next = strrep(next, 0, starnext(value) ? "|" : "");
+
+ }
+ while(value = starnext(value));
+
+ /* Add closing parentheses if needed */
+ next = strrep(next, 0, multi ? ")" : "");
+ }
+ break;
+
+
+ /* It's a variable name or escaped character */
+ case '\\':
+
+ /*
+ Only unescape in replace mode
+ Regular expressions will unescape everything else
+ */
+ if(!mode)
+ next = strnrep(next, 1, "", 0);
+ else
+ next++;
+
+ next++;
+
+ break;
+ }
+ }
+
+ return R_OK;
+}
+
+/* variablesValidName: -------------------------------------------------
+ * Is the given name a valid variable name
+ */
+bool variablesValidName(const char* name)
+{
+ return strspn(name, kValidIdentifier) >= strlen(name);
+}
+
+/* variablesHasVars: ----------------------------------------------------
+ * Does the given string have variables or registers that need
+ * substitution?
+ */
+bool variablesHasVars(const char* string)
+{
+ const char* cur = string;
+
+ if(cur = strchr(cur, '%'))
+ {
+ if(!isEscaped(string, cur))
+ return true;
+
+ cur++;
+ }
+
+ return false;
+}
+
+/* =========================================================================
+ * LOCKS: Locks are used by the engine to implement the replacement locks
+ */
+
+/* _locksAllocate: -------------------------------------------------------
+ * Expand a block of locks if needed
+ */
+bool _locksAllocate(locks* lcks)
+{
+ if(!lcks->thelocks)
+ {
+ /* Allocate the first set */
+ lcks->thelocks = (struct lock*)malloc(sizeof(struct lock) * BLOCK_SIZE);
+ if(!lcks->thelocks) return false;
+ lcks->alloc = BLOCK_SIZE;
+ locksClearAll(lcks);
+ }
+
+ /* If not enough space ... */
+ if(lcks->alloc <= lcks->cur)
+ {
+ /* ... reallocate double */
+ size_t sz = sizeof(struct lock) * lcks->alloc;
+ lcks->thelocks = (struct lock*)reallocf(lcks->thelocks, sz * 2);
+ if(!lcks->thelocks) return false;
+ lcks->alloc *= 2;
+ }
+
+ return true;
+}
+
+/* locksInit: -------------------------------------------------------------
+ * Initialize and or allocate a block of locks
+ */
+bool locksInit(locks* lcks)
+{
+ zero(*lcks);
+ return true;
+}
+
+/* locksFree: -------------------------------------------------------------
+ * Free a set of locks
+ */
+void locksFree(locks* lcks)
+{
+ if(lcks)
+ {
+ ASSERT_PTR(lcks);
+ if(lcks->thelocks)
+ free(lcks->thelocks);
+ lcks->thelocks = NULL;
+ }
+}
+
+#define RANGE_ADD(bd, ed, b1, e1) \
+ (((bd) = (bd) < (b1) ? (bd) : (b1)), ((ed) = (ed) > (e1) ? (ed) : (e1)))
+
+#define RANGE_INTERSECTS(b1, e1, b2, e2) \
+ ((b1) < (e2) && (e1) > (b2))
+
+#define RANGE_BEFORE(b1, e1, b2, e2) \
+ ((b1) <= (b2) && (e1) <= (e2))
+
+
+/* locksAdd: ---------------------------------------------------------------
+ * Add a lock to the set
+ */
+bool locksAdd(locks* lcks, size_t beg, size_t end)
+{
+ size_t i = 0;
+
+ for(; i < lcks->cur; i++)
+ {
+ if(RANGE_INTERSECTS(beg, end, lcks->thelocks[i].beg, lcks->thelocks[i].end))
+ {
+ RANGE_ADD(lcks->thelocks[i].beg, lcks->thelocks[i].end, beg, end);
+
+ /* Clean up any doubles */
+ for(i = 0; i < lcks->cur - 1; i++)
+ {
+ if(RANGE_INTERSECTS(lcks->thelocks[i].beg, lcks->thelocks[i].end,
+ lcks->thelocks[i + 1].beg, lcks->thelocks[i + 1].end))
+ {
+ RANGE_ADD(lcks->thelocks[i].beg, lcks->thelocks[i].end,
+ lcks->thelocks[i + 1].beg, lcks->thelocks[i + 1].end);
+
+ lcks->cur--;
+ memmove(lcks->thelocks + i + 1, lcks->thelocks + i + 2,
+ sizeof(lcks->thelocks[0]) * (lcks->cur - (i + 1)));
+ i--;
+ }
+ }
+
+ return true;
+ }
+
+ if(RANGE_BEFORE(beg, end, lcks->thelocks[i].beg, lcks->thelocks[i].end))
+ break;
+ }
+
+ if(!_locksAllocate(lcks)) return false;
+
+ /* Move the locks one down */
+ memmove(lcks->thelocks + i + 1, lcks->thelocks + i,
+ sizeof(lcks->thelocks[0]) * (lcks->cur - i));
+
+ lcks->thelocks[i].beg = beg;
+ lcks->thelocks[i].end = end;
+
+ lcks->cur++;
+
+ return true;
+}
+
+#ifdef _DEBUG
+static void _locksTestIntersects()
+{
+ ASSERT(!RANGE_INTERSECTS(0x0F, 0x15, 0x15, 0x15));
+ ASSERT(RANGE_INTERSECTS(0x0F, 0x15, 0x0F, 0x15));
+ ASSERT(RANGE_INTERSECTS(0x10, 0x15, 0x0F, 0x15));
+ ASSERT(RANGE_INTERSECTS(0x0F, 0x12, 0x0F, 0x15));
+ ASSERT(RANGE_INTERSECTS(0x10, 0x12, 0x0F, 0x15));
+ ASSERT(RANGE_INTERSECTS(0x0F, 0x15, 0x10, 0x12));
+ ASSERT(RANGE_INTERSECTS(0x10, 0x1A, 0x0F, 0x15));
+ ASSERT(RANGE_INTERSECTS(0x02, 0x12, 0x0F, 0x15));
+ ASSERT(!RANGE_INTERSECTS(0x15, 0x15, 0x15, 0x15));
+ ASSERT(!RANGE_INTERSECTS(0x15, 0x15, 0x15, 0x20));
+}
+#endif
+
+/* locksCheck: -----------------------------------------------------------
+ * Check a range against the locks
+ */
+bool locksCheck(locks* lcks, r_stream* stream, size_t beg, size_t end)
+{
+ size_t cnt;
+
+#ifdef _DEBUG
+ _locksTestIntersects();
+#endif
+
+ for(cnt = 0; cnt < lcks->cur; cnt++)
+ if(RANGE_INTERSECTS(beg, end, lcks->thelocks[cnt].beg, lcks->thelocks[cnt].end))
+ return true;
+
+ return false;
+}
+
+
+/* ============================================================================
+ * REPLACEMENT LIST: Contains the text of any replacements. They're copied
+ * into the output later.
+ */
+
+/* replacementAlloc: ---------------------------------------------------------
+ * Prepare and allocate a replacement
+ */
+int replacementAlloc(r_stream* stream, const char* text, replacement** pprep)
+{
+ int ret;
+
+ size_t len = strlen(text);
+ ret = R_OK;
+
+ /* Allocate the replacement. Note that text hangs off the end */
+ *pprep = (replacement*)malloc(sizeof(replacement) + (sizeof(char) * len));
+ if(!*pprep)
+ ret = R_NOMEM;
+ else
+ {
+ /* Set it up properly */
+ memset(*pprep, 0, sizeof(replacement) + len);
+ (*pprep)->next = NULL;
+
+ /* Copy string on end of buffer */
+ strcpy((*pprep)->text, text);
+ }
+
+ return ret;
+}
+
+
+/* replacementAdd: -----------------------------------------------------------
+ * Insert replacement into queue in the right order (IMPORTANT!)
+ */
+void replacementAdd(replacement* repl, r_stream* stream)
+{
+ replacement top;
+ replacement* first = &top;
+ top.beg = top.end = 0;
+ top.next = stream->state->replaces;
+
+ /* Find appropriate pos */
+ while(first && first->next && (first->next->end < repl->end))
+ first = first->next;
+
+ while(first && first->next && (first->next->beg < repl->beg))
+ first = first->next;
+
+ /* Hook in */
+ repl->next = first->next;
+ first->next = repl;
+
+ stream->state->replaces = top.next;
+}
+
+
+/* replacementPop: -------------------------------------------------------
+ * Remove and return a replacement from the queue
+ */
+replacement* replacementPop(replacement* repl)
+{
+ replacement* ret = repl->next;
+ free(repl);
+ return ret;
+}
+
+/* replacementDump: ------------------------------------------------------
+ * Dump all replacements in the stream to stderr
+ */
+void replacementDump(r_stream* stream)
+{
+ replacement* repl = stream->state->replaces;
+
+ while(repl)
+ {
+ fprintf(stderr, " beg: %x end: %x text: \'%s\'\n", repl->beg, repl->end, repl->text);
+ repl = repl->next;
+ }
+}
+
+
+/* =========================================================================
+ * REGISTER HELPER FUNCTIONS
+ */
+
+/* regsSet: ----------------------------------------------------------
+ * Offset and copy an entire set of regs int the main registers
+ */
+static void regsSet(struct internal_state* state, int pcreregs[], size_t offset)
+{
+ size_t i;
+ for(i = 0; i < REGISTER(r_cg); i++)
+ {
+ REGISTER2(r_b0, i) = pcreregs[i * 2] + offset;
+ REGISTER2(r_e0, i) = pcreregs[(i * 2) + 1] + offset;
+ }
+
+ /* Set rest of registers as invalid */
+ for( ; i < MAX_REGS; i++)
+ {
+ REGISTER2(r_b0, i) = ~0;
+ REGISTER2(r_b0, i) = ~0;
+ }
+}
+
+
+/* ===========================================================================
+ * MAIN EXECUTION FUNCTIONS
+ */
+
+
+/* vmInit: --------------------------------------------------------------
+ * Initialize state for execution
+ */
+bool vmInit(r_stream* stream)
+{
+ struct internal_state* state = stream->state;
+ ASSERT(stream->state);
+
+ if(!variablesInit(&(state->vars)) ||
+ !memoryInit(&(state->mem)) ||
+ !locksInit(&(state->lcks)) ||
+ !dataInit(&(state->working)))
+ return R_NOMEM;
+
+ /* init replaces */
+ state->replaces = NULL;
+
+ return true;
+}
+
+
+/* vmFree: --------------------------------------------------------------
+ * Undo initialization
+ */
+void vmFree(r_stream* stream)
+{
+ struct internal_state* state = stream->state;
+ ASSERT(stream->state);
+
+ while(state->replaces)
+ state->replaces = replacementPop(state->replaces);
+
+ variablesFree(&(state->vars));
+ memoryFree(&(state->mem));
+ locksFree(&(state->lcks));
+ dataFree(&(state->working));
+}
+
+
+/* vmClean: -------------------------------------------------------------
+ * Prepare a VM for a totally new replacement operation
+ */
+void vmClean(r_stream* stream)
+{
+ struct internal_state* state = stream->state;
+ ASSERT(stream->state);
+
+ while(state->replaces)
+ state->replaces = replacementPop(state->replaces);
+
+ variablesClearAll(&(state->vars));
+ memoryClearAll(&(state->mem));
+ locksClearAll(&(state->lcks));
+ dataClearAll(&(state->working));
+ stream->state->offset = 0;
+}
+
+
+
+
+/* rvalue: --------------------------------------------------------------
+ * Return a value to be used on the right (value) side of an expression
+ */
+#ifdef USE_STACK_VARS
+uint rvalue(vmop_t* ops, struct internal_state* state,
+ memory* mem, memory* stackMem)
+#else
+uint rvalue(vmop_t* ops, struct internal_state* state, memory* mem)
+#endif
+{
+ switch(ARG_TYPE(*ops))
+ {
+ case ARG_VAL_TYPE:
+ return ARG_GET_VALUE(*((uint*)ops));
+ case ARG_REG_TYPE:
+ return REGISTER(*ops);
+ case ARG_MEM_TYPE:
+ return *(memoryValue(mem, ARG_GET_MEMORY(*((uint*)ops))));
+ case ARG_STACK_TYPE:
+#ifdef USE_STACK_VARS
+ return *(memoryValue(stackMem, ARG_GET_STACK(*((uint*)ops))));
+#else
+ return *(memoryValue(mem, ARG_GET_STACK(*((uint*)ops))));
+#endif
+ default:
+ ASSERT(false);
+ return 0;
+ }
+}
+
+
+/* lvalue: -----------------------------------------------------------------
+ * Return a value to be used on the left side (assigned) of an expression
+ */
+#ifdef USE_STACK_VARS
+static uint* lvalue(vmop_t* ops, struct internal_state* state,
+ memory* mem, memory* stackMem)
+#else
+static uint* lvalue(vmop_t* ops, struct internal_state* state, memory* mem)
+#endif
+{
+ switch(ARG_TYPE(*ops))
+ {
+ case ARG_VAL_TYPE:
+ ASSERT(false && "Can't put a value on left side.");
+ return 0;
+ case ARG_REG_TYPE:
+ return &(REGISTER(*ops));
+ case ARG_MEM_TYPE:
+ return memoryValue(mem, ARG_GET_MEMORY(*((uint*)ops)));
+ case ARG_STACK_TYPE:
+#ifdef USE_STACK_VARS
+ return memoryValue(stackMem, ARG_GET_STACK(*((uint*)ops)));
+#else
+ return memoryValue(mem, ARG_GET_MEMORY(*((uint*)ops)));
+#endif
+ default:
+ ASSERT(false);
+ return 0;
+ }
+}
+
+
+/* vmExecute: -------------------------------------------------------------
+ * The main VM run loop
+ */
+int vmExecute(r_stream* stream, r_script* script)
+{
+#ifdef USE_STACK_VARS
+ #define RVALUE(ops) rvalue(ops, state, &(state->mem), stackVars)
+ #define LVALUE(ops) lvalue(ops, state, &(state->mem), stackVars)
+#else
+ #define RVALUE(ops) rvalue(ops, state, &(state->mem))
+ #define LVALUE(ops) lvalue(ops, state, &(state->mem))
+#endif
+
+ #define PUSH_STACK(top, v) ((top)++[0] = (v))
+ #define POP_STACK(top) ((--top)[0])
+
+ /* Jump to cleanup label instead of return */
+ #define RETURN(r) \
+ do { \
+ retval = r; \
+ goto cleanup; \
+ } while (0)
+
+ struct internal_state* state;
+ int retval = R_OK;
+ vmop_t* ops;
+ uint passes = 0;
+
+ /* These are the registers passed to PCRE */
+ int pcreregs[MAX_REGS * 3];
+ int num_regs = 0;
+
+ /* And over here we have the stack */
+ uint* vmStack = NULL;
+ size_t allocStack = 0;
+ uint** stack;
+ #define STACK *stack
+
+#ifdef USE_STACK_VARS
+ memory* stackVars;
+#endif
+
+ /* The text buffer */
+ char* text = NULL;
+
+ /* We just setup some vars for easy access to structs */
+ state = stream->state;
+ ops = script->ops;
+ zero(state->vmregs);
+ ASSERT(script->ops);
+
+ stack = (uint**)&(state->vmregs[r_sp >> 2]);
+
+
+ /* Set the initial limits in the x1 and y1 registers */
+ REGISTER(r_x1) = REL_TO_ABS(0, state);
+ REGISTER(r_y1) = REL_TO_ABS(stream->availIn, state);
+
+ /* Stack variables */
+#ifdef USE_STACK_VARS
+ stackVars = (memory*)malloc(sizeof(memory));
+ if(!stackVars || !memoryInit(stackVars))
+ RETURN(R_NOMEM);
+#endif
+
+ /* Preallocate some memory for the text buffer */
+ text = malloc(sizeof(char) * 256);
+ if(!text) RETURN(R_NOMEM);
+ text[0] = 0;
+
+
+ while(1)
+ {
+ vmop_t op = *ops;
+ ops++;
+
+ /* The text buffer should always be pointing to a valid
+ block of memory */
+ ASSERT(text != NULL);
+
+
+ /* Check and see if we have enough stack and allocate if not */
+ if((STACK + 0x010) > (vmStack + allocStack))
+ {
+ size_t off = STACK - vmStack;
+ vmStack = (uint*)reallocf(vmStack, (allocStack + 0x080) * sizeof(uint));
+ if(!vmStack)
+ RETURN(R_NOMEM);
+
+ allocStack += 0x080;
+ STACK = vmStack + off;
+ }
+
+
+ /* Main switch which dispatches the ops */
+ switch(op)
+ {
+
+ /* end: Finished executing script (but can come back again for more) */
+ case o_end:
+ RETURN(R_OK);
+
+
+ /* nop: Do nothing */
+ case o_nop:
+ break;
+
+
+ /* push: Push a value on the stack */
+ case o_push:
+ PUSH_STACK(STACK, RVALUE(ops));
+
+ INC_ARGUMENT(ops);
+ break;
+
+
+ /* pop: Pop a value from the stack */
+ case o_pop:
+ *(LVALUE(ops)) = POP_STACK(STACK);
+ INC_ARGUMENT(ops);
+ break;
+
+
+ /* lock: Lock the area between the selected area */
+ case o_lock:
+ {
+ uint beg = RVALUE(ops);
+ INC_ARGUMENT(ops);
+
+ if(!locksAdd(&(state->lcks), beg, RVALUE(ops)))
+ RETURN(R_NOMEM);
+
+ INC_ARGUMENT(ops);
+
+ /* Locking is an action too */
+ REGISTER(r_ac) = 1;
+ }
+ break;
+
+
+ /* check: Check the selected area against any locks */
+ case o_check:
+ {
+ uint beg = RVALUE(ops);
+ INC_ARGUMENT(ops);
+
+ /* Check the lowest registers against the locks it against the locks */
+ REGISTER(r_fe) = locksCheck(&(state->lcks), stream, beg, RVALUE(ops))
+ ? 0 : 1;
+
+ INC_ARGUMENT(ops);
+ }
+ break;
+
+
+
+ /* match: Match a pattern buffer */
+ case o_match:
+ {
+ uint beg, end, begLimit, endLimit, begMatch, endMatch;
+ match_op* header;
+ pcre* re = NULL;
+ pcre_extra* extra = NULL;
+ bool cache = false;
+ size_t i = 0;
+ int rt;
+
+ beg = ABS_TO_REL(RVALUE(ops), state);
+ INC_ARGUMENT(ops);
+ end = ABS_TO_REL(RVALUE(ops), state);
+ INC_ARGUMENT(ops);
+
+ begLimit = ABS_TO_REL(REGISTER(r_x1), state);
+ endLimit = ABS_TO_REL(REGISTER(r_y1), state);
+
+ /* Determine what kind of a match we're talking about here */
+ header = (match_op*)ops;
+
+ /* This is the only type of regular expression we support
+ at the moment */
+ ASSERT(header->type & kMatchPcre);
+
+
+ /*
+ * Check if we've already cached the compiled
+ * regular expression.
+ * - We use the op header pointer as key to the pcre struct
+ * - And the pcre struct pointer as the key to the pcre_extra struct
+ */
+ if(re = (pcre*)dataGetValue(&(state->working), header))
+ {
+ /* This prevents the freeing of stuff below */
+ cache = true;
+
+ /* Get out the pcre_extra if present */
+ extra = (pcre_extra*)dataGetValue(&(state->working), re);
+ }
+
+ /* Otherwise we compile it */
+ else
+ {
+ match_op_pcre* pcreop = (match_op_pcre*)header;
+ char* pattern = strdup(pcreop->pattern);
+ int erroroffset;
+ const char* error = NULL;
+
+ /* If there's variables then don't cache and ... */
+ if(!(cache = !variablesHasVars(pattern)))
+ {
+ /* ... do variable substitution */
+ rt = variablesSubstitute(&(state->vars), stream, script, &pattern, true);
+ if(rt < 0) RETURN(rt);
+ }
+
+ /* Compile the pattern */
+ re = pcre_compile(pattern, pcreop->options,
+ &error, &erroroffset, NULL);
+
+ if(!re)
+ {
+ if(error)
+ scriptSetError(script, error);
+ RETURN(R_REGEXP);
+ }
+
+ /*
+ * If there's no variables in the regular expression
+ * then we can cache the compiled pcre and study it
+ */
+ if(cache)
+ {
+ if(!dataSetValue(&(state->working), header, re))
+ RETURN(R_NOMEM);
+
+ extra = pcre_study(re, 0, &error);
+ if(error)
+ {
+ scriptSetError(script, error);
+ RETURN(R_REGEXP);
+ }
+
+ if(extra && !dataSetValue(&(state->working), re, extra))
+ RETURN(R_NOMEM);
+ }
+
+ free(pattern);
+ }
+
+
+ begMatch = beg;
+ endMatch = end;
+
+ /* Set failed flag */
+ REGISTER(r_fe) = 0;
+
+ while(i < locksSize(&(state->lcks)) &&
+ locksEnd(&(state->lcks), i) <= REL_TO_ABS(beg, state))
+ i++;
+
+ do
+ {
+ if(i < locksSize(&(state->lcks)))
+ endMatch = ABS_TO_REL(locksBeg(&(state->lcks), i), state);
+ else if(i >= locksSize(&(state->lcks)))
+ endMatch = end;
+
+ /* If that put it too high then bring back */
+ if(endMatch > end)
+ endMatch = end;
+
+ /* If we haven't overstepped the bounds, search */
+ if(begMatch <= endMatch)
+ {
+ int opts = 0;
+
+ /*
+ * We have to do a little trickery here
+ * First we fool pcre_exec into thinking that our range start
+ * is the beginning of the string. We fix up the registers later
+ *
+ * Then we fool'm into thinking that the our search end is the
+ * end of the string, but if it's really not the end of the
+ * range then set not_eol so that '$' doesn't match.
+ */
+
+ if(endMatch != endLimit)
+ opts |= PCRE_NOTEOL;
+
+ /* Do actual search */
+ /* TODO: Get this ready for binary replacements */
+ rt = pcre_exec(re, extra,
+ ((char*)stream->nextIn + begLimit), /* Data */
+ endMatch - begLimit, /* size */
+ begMatch - begLimit, /* position */
+ opts, /* options */
+ pcreregs, /* group registers */
+ MAX_REGS * 3); /* number of registers */
+
+ /* These are programmer errors */
+ ASSERT(rt != PCRE_ERROR_NULL);
+ ASSERT(rt != PCRE_ERROR_BADOPTION);
+ ASSERT(rt != PCRE_ERROR_BADMAGIC);
+ ASSERT(rt != PCRE_ERROR_UNKNOWN_NODE);
+ ASSERT(rt != PCRE_ERROR_NOSUBSTRING);
+
+ if(rt == PCRE_ERROR_NOMEMORY)
+ RETURN(R_NOMEM);
+
+ if(rt >= 0)
+ {
+ /* Found a match! */
+
+ /* Do group register maintainance */
+ REGISTER(r_cg) = rt;
+ regsSet(state, pcreregs, REL_TO_ABS(begLimit, state));
+
+ /* Set succeeded flag */
+ REGISTER(r_fe) = 1;
+
+ /* Get out of here */
+ break;
+ }
+
+ /* Make sure we've got all the errors */
+ else if(rt != -1)
+ ASSERT(0);
+
+ }
+
+ /* The beginning of the next search block should be
+ the end of the current lock */
+ if(i < locksSize(&(state->lcks)))
+ begMatch = ABS_TO_REL(locksEnd(&(state->lcks), i), state);
+
+ i++;
+ }
+ while(endMatch < end);
+
+ if(!cache)
+ {
+ if(re)
+ free(re);
+ if(extra)
+ free(extra);
+ }
+
+ ops += match_op_size(*header);
+ }
+
+ break;
+
+
+
+ /* setvar: Add selected text to a variable */
+ case o_setvar:
+ {
+ var_op* pOp = (var_op*)ops;
+
+ /* TODO: Get this ready for binary replacements */
+ if(!variablesAddBytes(&(state->vars), (char*)pOp->name, text, strlen(text)))
+ RETURN(R_NOMEM);
+
+ ops += var_op_size(*pOp);
+ }
+ break;
+
+
+ /* setvar: clear a variable */
+ case o_clrvar:
+ {
+ var_op* pOp = (var_op*)ops;
+
+ /* TODO: Get this ready for binary variables */
+ variablesClear(&(state->vars), (char*)pOp->name);
+ ops += var_op_size(*pOp);
+ }
+ break;
+
+
+ case o_je:
+ case o_jne:
+ case o_jmp:
+
+ /* Get the conditional jumps out of the way */
+ if((op == o_je && REGISTER(r_fe) == 0) ||
+ (op == o_jne && REGISTER(r_fe) > 0))
+ {
+ INC_ARGUMENT(ops);
+ break;
+ }
+ else
+ {
+ /* Now do the jump */
+ vmop_t* opOld = ops;
+
+ /* Change the op to the indicated pos */
+ ops = script->ops + RVALUE(ops);
+
+ /*
+ * If a backwards jump then count as a "pass" thingy
+ * we use this count to catch endless loop errors
+ */
+
+ /* TODO: I don't think we need this anymore */
+ if(opOld > ops)
+ {
+ passes++;
+ if(passes > MAX_PASSES)
+ return R_LOOP;
+ }
+ }
+ break;
+
+
+ /* repl: Replace last match with given text */
+ case o_repl:
+ {
+ uint beg, end;
+ replacement* rep = NULL;
+ int rt;
+
+ beg = RVALUE(ops);
+ INC_ARGUMENT(ops);
+ end = RVALUE(ops);
+ INC_ARGUMENT(ops);
+
+ /* Get the formatted replacement */
+ rt = replacementAlloc(stream, text, &rep);
+ if(rt < 0)
+ RETURN(rt);
+
+ rep->beg = beg;
+ rep->end = end;
+
+ /* If we have a confirm function then ask about replacement */
+ if(stream->fMatch)
+ {
+ r_replace repl;
+ repl.from = ABS_TO_REL(beg, state);
+ repl.flen = end - beg;
+ repl.to = (byte*)rep->text;
+ repl.tlen = strlen(rep->text);
+ repl.offset = beg;
+
+ if(!(stream->fMatch)(stream, &repl))
+ {
+ free(rep);
+ rep = NULL;
+ }
+ }
+
+ if(rep)
+ {
+ /* Add it! */
+ REGISTER(r_ac) = 1;
+ stream->total++;
+ replacementAdd(rep, stream);
+ }
+ }
+
+ break;
+
+
+ /* stop: Stop execution of script */
+ case o_stop:
+ {
+ uint error = RVALUE(ops);
+ INC_ARGUMENT(ops);
+
+ scriptSetError(script, text);
+
+ if(error == 0)
+ RETURN(R_DONE);
+ else
+ RETURN(R_USER);
+ }
+ break;
+
+
+ /* cmp: compare two values and set registers accordingly */
+ case o_cmp:
+ {
+ uint arg1, arg2;
+
+ arg1 = RVALUE(ops);
+ INC_ARGUMENT(ops);
+ arg2 = RVALUE(ops);
+ INC_ARGUMENT(ops);
+
+ REGISTER(r_fe) = (arg1 == arg2) ? 1 : 0;
+ REGISTER(r_fg) = (arg1 > arg2) ? 1 : 0;
+ REGISTER(r_fl) = (arg1 < arg2) ? 1 : 0;
+ }
+ break;
+
+
+ /* test: Test whether value is 0 or not */
+ case o_test:
+ REGISTER(r_fe) = RVALUE(ops) != 0;
+ INC_ARGUMENT(ops);
+ break;
+
+
+ /* mov: Set one value to another */
+ case o_mov:
+ {
+ uint* dest = LVALUE(ops);
+ INC_ARGUMENT(ops);
+ *dest = RVALUE(ops);
+ INC_ARGUMENT(ops);
+ }
+ break;
+
+
+ /* call: Call a function */
+ case o_call:
+ {
+#ifdef USE_STACK_VARS
+ memory* newStack;
+#endif
+ uint pos = RVALUE(ops);
+ INC_ARGUMENT(ops);
+
+ /* Push current instruction pointer on the stack */
+ PUSH_STACK(STACK, ops - script->ops);
+
+ /* Change instruction pointer to function start */
+ ops = script->ops + pos;
+
+#ifdef USE_STACK_VARS
+ /* Push new set of stack variables */
+ newStack = (memory*)malloc(sizeof(memory));
+ if(!newStack || !memoryInit(newStack))
+ RETURN(R_NOMEM);
+
+ newStack->prev = stackVars;
+ stackVars = newStack;
+#endif
+ }
+ break;
+
+
+ /* ret: Return from a function */
+ case o_ret:
+ {
+#ifdef USE_STACK_VARS
+ memory* prev;
+#endif
+ /* Get location to return to */
+ uint pos = POP_STACK(STACK);
+
+ /* Change instruction pointer to returned location */
+ ops = script->ops + pos;
+
+#ifdef USE_STACK_VARS
+ /* Get previous set of stack vars */
+ ASSERT(stackVars->prev);
+ prev = stackVars->prev;
+ memoryFree(stackVars);
+ free(stackVars);
+ stackVars = prev;
+#endif
+ }
+ break;
+
+
+ /* add: Add 2 values */
+ case o_add:
+ {
+ /* Get first value */
+ uint* val = LVALUE(ops);
+ INC_ARGUMENT(ops);
+ *val += RVALUE(ops);
+ INC_ARGUMENT(ops);
+ }
+ break;
+
+
+ /* sub: Subtract values */
+ case o_sub:
+ {
+ /* Get first value */
+ uint* val = LVALUE(ops);
+ INC_ARGUMENT(ops);
+ *val -= RVALUE(ops);
+ INC_ARGUMENT(ops);
+ }
+ break;
+
+
+ /* text: set the text buffer */
+ case o_text:
+ {
+ text_op* top = (text_op*)ops;
+ int rt;
+
+ text = (char*)reallocf(text, top->len + 1);
+ if(!text) RETURN(R_NOMEM);
+
+ memcpy(text, top->string, top->len * sizeof(char));
+ text[top->len] = 0;
+
+ rt = variablesSubstitute(&(state->vars), stream, script, &text, false);
+ if(rt < 0) RETURN(rt);
+
+ ops += text_op_size(*top);
+ }
+ break;
+
+ /* msg: output text in buffer */
+ case o_msg:
+ {
+ if(stream->fMessage)
+ (stream->fMessage)(stream, text);
+ }
+ break;
+
+ default:
+ /* Invalid Instruction! */
+ ASSERT(false);
+ }
+ }
+
+cleanup:
+ if(vmStack)
+ free(vmStack);
+
+ while(stackVars)
+ {
+ memory* prev = stackVars->prev;
+ memoryFree(stackVars);
+ free(stackVars);
+ stackVars = prev;
+ }
+
+ return retval;
+}
+
+
+
diff --git a/lib/execute.h b/lib/execute.h
new file mode 100644
index 0000000..b3cbb40
--- /dev/null
+++ b/lib/execute.h
@@ -0,0 +1,179 @@
+/*
+ * AUTHOR
+ * N. Nielsen
+ *
+ * VERSION
+ * 2.1.2b
+ *
+ * LICENSE
+ * This software is in the public domain.
+ *
+ * The software is provided "as is", without warranty of any kind,
+ * express or implied, including but not limited to the warranties
+ * of merchantability, fitness for a particular purpose, and
+ * noninfringement. In no event shall the author(s) be liable for any
+ * claim, damages, or other liability, whether in an action of
+ * contract, tort, or otherwise, arising from, out of, or in connection
+ * with the software or the use or other dealings in the software.
+ *
+ * SUPPORT
+ * Send bug reports to: <nielsen@memberwebs.com>
+ */
+
+#ifndef __EXECUTE_H__20010618
+#define __EXECUTE_H__20010618
+
+#include "ops.h"
+
+/* Allocate Locks, Memory and Variables in blocks this size */
+#define BLOCK_SIZE 0x20
+
+static const size_t kNoMatch = 0xFFFFFFFF;
+static const size_t kInfinity = 0xFFFFFFFE;
+
+/* Maximum amount of registers allowed */
+#define MAX_REGS 10
+
+
+/* =============================================================================
+ * MEMORY:
+ */
+
+typedef struct _memory
+{
+ /* Internal structure */
+ struct mem
+ {
+ uint key;
+ uint value;
+ }
+ *thememory;
+
+ struct _memory* prev;
+ size_t alloc; /* amount allocated of above */
+ size_t cur; /* amount used */
+}
+memory;
+
+bool memoryInit(memory* mem);
+void memoryFree(memory* mem);
+uint* memoryValue(memory* mem, uint key);
+void memoryClearAll(memory* mem);
+
+
+/* =============================================================================
+ * DATA:
+ */
+
+typedef struct _data
+{
+ /* Internal structure */
+ struct dat
+ {
+ void* key;
+ void* value;
+ }
+ *thedata;
+
+ size_t alloc; /* amount allocated of above */
+ size_t cur; /* amount used */
+}
+data;
+
+bool dataInit(data* dat);
+void dataFree(data* dat);
+void* dataGetValue(data* dat, void* key);
+bool dataSetValue(data* dat, void* key, void* value);
+void dataClearAll(data* dat);
+
+
+/* ==========================================================================
+ * VARIABLES: Contains set of variables (and variable arrays) currently
+ * set. This is maintained across blocks.
+ */
+
+typedef struct _variables
+{
+ /* Internal structure */
+ struct vari
+ {
+ char* name;
+ char* value;
+ }
+ *thevars;
+
+ size_t alloc; /* Amount allocated */
+ size_t cur; /* Amount used */
+}
+variables;
+
+bool variablesInit(variables* vars);
+void variablesFree(variables* vars);
+bool variablesAdd(variables* vars, const char* name, const char* val);
+bool variablesAddBytes(variables* vars, const char* name,
+ const char* val, size_t cnt);
+bool variablesClear(variables* vars, const char* name);
+bool variablesClearAll(variables* vars);
+int variablesSubstitute(variables* vars, r_stream* stream, r_script* script,
+ char** pstr, bool mode);
+bool variablesValidName(const char* name);
+bool variablesHasVars(const char* string);
+
+
+
+/* ======================================================================
+ * REPLACEMENT: A list of replacements to be written out.
+ */
+
+typedef struct _replacement
+{
+ size_t beg; /* Beginning of text to be replaced */
+ size_t end; /* end ditto */
+ struct _replacement* next; /* next replacement in list */
+ char text[1]; /* Text to replace with (hangs of end) */
+}
+replacement;
+
+/* Allocate a replacement for the given text. */
+int replacementAlloc(r_stream* stream, const char* text, replacement** pprep);
+/* Place replacement in the replacement list in the appropriate order */
+void replacementAdd(replacement* repl, r_stream* stream);
+/* Remove the first replacement in the replacement list and return */
+replacement* replacementPop(replacement* repl);
+/* Dump all replacements to stderr */
+void replacementDump(r_stream* stream);
+
+
+/* ======================================================================
+ * LOCKS: A list of locks to be checked against
+ */
+
+typedef struct _locks
+{
+ /* Internal structure */
+ struct lock
+ {
+ size_t beg;
+ size_t end;
+ }
+ *thelocks;
+
+ size_t cur;
+ size_t alloc;
+}
+locks;
+
+bool locksInit();
+void locksFree();
+bool locksAdd(locks* lcks, size_t beg, size_t end);
+bool locksCheck(locks* lcks, r_stream* stream, size_t beg, size_t end);
+#define locksClearAll(lcks) ((lcks)->cur = 0)
+#define locksSize(lcks) ((lcks)->cur)
+#define locksBeg(lcks, idx) ((lcks)->thelocks[idx].beg)
+#define locksEnd(lcks, idx) ((lcks)->thelocks[idx].end)
+
+
+
+bool isEscaped(const char* str, const char* posi);
+
+#endif /* __EXECUTE_H__20010618 */
diff --git a/lib/ops.h b/lib/ops.h
new file mode 100644
index 0000000..86bc017
--- /dev/null
+++ b/lib/ops.h
@@ -0,0 +1,349 @@
+/*
+ * AUTHOR
+ * N. Nielsen
+ *
+ * VERSION
+ * 2.1.2b
+ *
+ * LICENSE
+ * This software is in the public domain.
+ *
+ * The software is provided "as is", without warranty of any kind,
+ * express or implied, including but not limited to the warranties
+ * of merchantability, fitness for a particular purpose, and
+ * noninfringement. In no event shall the author(s) be liable for any
+ * claim, damages, or other liability, whether in an action of
+ * contract, tort, or otherwise, arising from, out of, or in connection
+ * with the software or the use or other dealings in the software.
+ *
+ * SUPPORT
+ * Send bug reports to: <nielsen@memberwebs.com>
+ */
+
+#ifndef __OPS_H__20000616
+#define __OPS_H__20000616
+
+
+/* ----------------------------------------------------------------------
+ Op Arguments
+
+ The first two bits of an op argument are it's type.
+ 00 = value type (4 bytes)
+ 01 = memory type (4 bytes)
+ 10 = register type (1 byte)
+*/
+
+#define ARG_VAL_TYPE 0x00
+#define ARG_MEM_TYPE 0x01
+#define ARG_REG_TYPE 0x02
+#define ARG_STACK_TYPE 0x03
+
+#define ARG_MAKE_STACK(v) ((((v) << 2) | ARG_STACK_TYPE))
+#define ARG_MAKE_MEMORY(v) ((((v) << 2) | ARG_MEM_TYPE))
+#define ARG_MAKE_VALUE(v) ((uint)((v) << 2))
+#define ARG_MAKE_REGISTER(v) ((byte)(((v) << 2) | ARG_REG_TYPE))
+
+#define ARG_GET_MEMORY(v) ((uint)((v) >> 2))
+#define ARG_GET_VALUE(v) ((uint)((v) >> 2))
+#define ARG_GET_REGISTER(v) ((byte)((v) >> 2))
+#define ARG_GET_STACK(v) ((uint)((v) >> 2))
+
+
+#define ARG_TYPE(v) ((v) & 0x03)
+#define ARG_SIZE(v) ((ARG_TYPE(v) == ARG_REG_TYPE) ? 1 : 4)
+
+#define INC_ARGUMENT(op) ((op) += ARG_SIZE(*(op)))
+
+/* ----------------------------------------------------------------------
+ Registers (with 0x10 set)
+*/
+
+ /* General purpose flag
+ gets set by cmp, on equal, match on success, check on success
+ gets read by je and jne for jump conditions
+ */
+ #define r_fe ((byte)ARG_MAKE_REGISTER(0x00))
+ /* const byte fe = ARG_MAKE_REGISTER(0x00); */
+
+ /* Greater and less than flags
+ gets set by cmp
+ */
+ #define r_fg ((byte)ARG_MAKE_REGISTER(0x01))
+ /* const byte fg = ARG_MAKE_REGISTER(0x01); */
+ #define r_fl ((byte)ARG_MAKE_REGISTER(0x02))
+ /* const byte fl = ARG_MAKE_REGISTER(0x02); */
+
+ /* Action flag
+ // gets set by match on success
+ */
+ #define r_ac ((byte)ARG_MAKE_REGISTER(0x03))
+ /* const byte ac = ARG_MAKE_REGISTER(0x03); */
+
+ /* Stack pointer */
+ #define r_sp ((byte)ARG_MAKE_REGISTER(0x0A))
+ /* const byte sp = ARG_MAKE_REGISTER(0x0A); */
+
+ /* Base pointer */
+ #define r_bp ((byte)ARG_MAKE_REGISTER(0x0B))
+ /* const byte bp = ARG_MAKE_REGISTER(0x0B); */
+
+ /* Beginning and end registers
+ // gets set by match
+ */
+ #define r_b0 ((byte)ARG_MAKE_REGISTER(0x10))
+ /* const byte b0 = ARG_MAKE_REGISTER(0x10); */
+ #define r_b1 ((byte)ARG_MAKE_REGISTER(0x11))
+ /* const byte b1 = ARG_MAKE_REGISTER(0x11); */
+ #define r_b2 ((byte)ARG_MAKE_REGISTER(0x12))
+ /* const byte b2 = ARG_MAKE_REGISTER(0x12); */
+ #define r_b3 ((byte)ARG_MAKE_REGISTER(0x13))
+ /* const byte b3 = ARG_MAKE_REGISTER(0x13); */
+ #define r_b4 ((byte)ARG_MAKE_REGISTER(0x14))
+ /* const byte b4 = ARG_MAKE_REGISTER(0x14); */
+ #define r_b5 ((byte)ARG_MAKE_REGISTER(0x15))
+ /* const byte b5 = ARG_MAKE_REGISTER(0x15); */
+ #define r_b6 ((byte)ARG_MAKE_REGISTER(0x16))
+ /* const byte b6 = ARG_MAKE_REGISTER(0x16); */
+ #define r_b7 ((byte)ARG_MAKE_REGISTER(0x17))
+ /* const byte b7 = ARG_MAKE_REGISTER(0x17); */
+ #define r_b8 ((byte)ARG_MAKE_REGISTER(0x18))
+ /* const byte b8 = ARG_MAKE_REGISTER(0x18); */
+ #define r_b9 ((byte)ARG_MAKE_REGISTER(0x19))
+ /* const byte b9 = ARG_MAKE_REGISTER(0x19); */
+ #define r_e0 ((byte)ARG_MAKE_REGISTER(0x1B))
+ /* const byte e0 = ARG_MAKE_REGISTER(0x1B); */
+ #define r_e1 ((byte)ARG_MAKE_REGISTER(0x1C))
+ /* const byte e1 = ARG_MAKE_REGISTER(0x1C); */
+ #define r_e2 ((byte)ARG_MAKE_REGISTER(0x1D))
+ /* const byte e2 = ARG_MAKE_REGISTER(0x1D); */
+ #define r_e3 ((byte)ARG_MAKE_REGISTER(0x1E))
+ /* const byte e3 = ARG_MAKE_REGISTER(0x1E); */
+ #define r_e4 ((byte)ARG_MAKE_REGISTER(0x1F))
+ /* const byte e4 = ARG_MAKE_REGISTER(0x1F); */
+ #define r_e5 ((byte)ARG_MAKE_REGISTER(0x20))
+ /* const byte e5 = ARG_MAKE_REGISTER(0x20); */
+ #define r_e6 ((byte)ARG_MAKE_REGISTER(0x21))
+ /* const byte e6 = ARG_MAKE_REGISTER(0x21); */
+ #define r_e7 ((byte)ARG_MAKE_REGISTER(0x22))
+ /* const byte e7 = ARG_MAKE_REGISTER(0x22); */
+ #define r_e8 ((byte)ARG_MAKE_REGISTER(0x23))
+ /* const byte e8 = ARG_MAKE_REGISTER(0x23); */
+ #define r_e9 ((byte)ARG_MAKE_REGISTER(0x24))
+ /* const byte e9 = ARG_MAKE_REGISTER(0x24); */
+
+ /* The count of groups matched */
+ #define r_cg ((byte)ARG_MAKE_REGISTER(0x25))
+ /* const byte cg = ARG_MAKE_REGISTER(0x25); */
+
+ /* General purpose registers */
+ #define r_x0 ((byte)ARG_MAKE_REGISTER(0x30))
+ /* const byte x0 = ARG_MAKE_REGISTER(0x30); */
+ #define r_x1 ((byte)ARG_MAKE_REGISTER(0x31))
+ /* const byte x1 = ARG_MAKE_REGISTER(0x31); */
+ #define r_x2 ((byte)ARG_MAKE_REGISTER(0x32))
+ /* const byte x2 = ARG_MAKE_REGISTER(0x32); */
+ #define r_x3 ((byte)ARG_MAKE_REGISTER(0x33))
+ /* const byte x3 = ARG_MAKE_REGISTER(0x33); */
+ #define r_x4 ((byte)ARG_MAKE_REGISTER(0x34))
+ /* const byte x4 = ARG_MAKE_REGISTER(0x34); */
+ #define r_x5 ((byte)ARG_MAKE_REGISTER(0x35))
+ /* const byte x5 = ARG_MAKE_REGISTER(0x35); */
+ #define r_x6 ((byte)ARG_MAKE_REGISTER(0x36))
+ /* const byte x6 = ARG_MAKE_REGISTER(0x36); */
+ #define r_x7 ((byte)ARG_MAKE_REGISTER(0x37))
+ /* const byte x7 = ARG_MAKE_REGISTER(0x37); */
+ #define r_y0 ((byte)ARG_MAKE_REGISTER(0x38))
+ /* const byte y0 = ARG_MAKE_REGISTER(0x38); */
+ #define r_y1 ((byte)ARG_MAKE_REGISTER(0x39))
+ /* const byte y1 = ARG_MAKE_REGISTER(0x39); */
+ #define r_y2 ((byte)ARG_MAKE_REGISTER(0x3A))
+ /* const byte y2 = ARG_MAKE_REGISTER(0x3A); */
+ #define r_y3 ((byte)ARG_MAKE_REGISTER(0x3B))
+ /* const byte y3 = ARG_MAKE_REGISTER(0x3B); */
+ #define r_y4 ((byte)ARG_MAKE_REGISTER(0x3C))
+ /* const byte y4 = ARG_MAKE_REGISTER(0x3C); */
+ #define r_y5 ((byte)ARG_MAKE_REGISTER(0x3D))
+ /* const byte y5 = ARG_MAKE_REGISTER(0x3D); */
+ #define r_y6 ((byte)ARG_MAKE_REGISTER(0x3E))
+ /* const byte y6 = ARG_MAKE_REGISTER(0x3E); */
+ #define r_y7 ((byte)ARG_MAKE_REGISTER(0x3F))
+ /* const byte y7 = ARG_MAKE_REGISTER(0x3F); */
+
+ /* Well almost all of the above are general purpose
+ // x1 and y1 are generally used for the limits
+ // x0 and y0 are generally used for selecting areas
+ */
+
+ #define NUM_REGISTERS 0x40
+
+
+typedef unsigned char vmop_t;
+
+/* ----------------------------------------------------------------------
+ OP CODES:
+*/
+ /* END:
+ end of instructions! */
+ #define o_end ((vmop_t)(0x00))
+ /* const vmop_t end = 0x00; */
+
+ /* NOP:
+ Blank / space filler */
+ #define o_nop ((vmop_t)(0xD1))
+ /* const vmop_t nop = 0xD1; */
+
+ /* PUSH: (1 value param)
+ Copy new context and execute */
+ #define o_push ((vmop_t)(0xD2))
+ /* const vmop_t push = 0xD2; */
+
+ /* POP: (1 value param)
+ Remove current context and execute previous */
+ #define o_pop ((vmop_t)(0xD4))
+ /* const vmop_t pop = 0xD4; */
+
+ /* LOCK: (2 value params)
+ Lock data between the parameters */
+ #define o_lock ((vmop_t)(0xE0))
+ /* const vmop_t lock = 0xE0; */
+
+ /* CHECK: (2 value params)
+ Check data between parameters against locks */
+ #define o_check ((vmop_t)(0xE1))
+ /* const vmop_t check = 0xE1; */
+
+ /* MATCH: (2 value params, plus match structure)
+ Match the regexp against string between the limited beg and end */
+ #define o_match ((vmop_t)(0xC0))
+ /* const vmop_t match = 0xC0; */
+
+ /* SETVAR: (2 value params, plus variable name)
+ Add text between selected registers to a variable */
+ #define o_setvar ((vmop_t)(0xC2))
+ /* const vmop_t setvar = 0xC2; */
+
+ /* CLRVAR: (2 value params, plus variable name)
+ Clear a variable */
+ #define o_clrvar ((vmop_t)(0xC3))
+ /* const vmop_t clrvar = 0xC3; */
+
+ /* JMP: (1 address param)
+ Jump to the specified address */
+ #define o_jmp ((vmop_t)(0xD6))
+ /* const vmop_t jmp = 0xD6; */
+
+ /* JE: (1 address param)
+ Jump if fe flag is set */
+ #define o_je ((vmop_t)(0xD7))
+ /* const vmop_t je = 0xD7; */
+
+ /* JNE: (1 address param)
+ Jump if fe flag is not set */
+ #define o_jne ((vmop_t)(0xD8))
+ /* const vmop_t jne = 0xD8; */
+
+ /* REPL: (2 value params, plus repl sizeof(text_op))
+ Perform a replacement operation with current registers and string*/
+ #define o_repl ((vmop_t)(0xC4))
+ /* const vmop_t repl = 0xC4; */
+
+ /* STOP: (1 value param denoting error or not)
+ Plus an optional message */
+ #define o_stop ((vmop_t)(0x02))
+ /* const vmop_t stop = 0x02; */
+
+ /* CMP: (2 value params)
+ Compare the two values */
+ #define o_cmp ((vmop_t)(0xE9))
+ /* const vmop_t cmp = 0xE9; */
+ #define o_test ((vmop_t)(0xEA))
+ /* const vmop_t test = 0xEA; */
+
+ /* MOV: (2 value params)
+ set first value to be equal to second */
+ #define o_mov ((vmop_t)(0xD9))
+ /* const vmop_t mov = 0xD9; */
+
+ /* CALL: (1 address param)
+ Call function at address */
+ #define o_call ((vmop_t)(0xDA))
+ /* const vmop_t call = 0xDA; */
+
+ /* RET:
+ Return control to previous function */
+ #define o_ret ((vmop_t)(0xDB))
+ /* const vmop_t ret = 0xDB; */
+
+ /* ADD: (2 value params)
+ add second value to first */
+ #define o_add ((vmop_t)(0xDC))
+ /* const vmop_t add = 0xDC; */
+
+ /* SUB: (2 value params)
+ subtract second value from first */
+ #define o_sub ((vmop_t)(0xDD))
+ /* const vmop_t sub = 0xDD; */
+
+ /* TEXT: (text block)
+ put text in the data buffer */
+ #define o_text ((vmop_t)(0xDE))
+
+ /* MESSAGE:
+ output data buffer as a message */
+ #define o_msg ((vmop_t)(0xDF))
+
+
+/* ----------------------------------------------------------------------
+ OP STRUCTURES: for large ops structures to ease access
+*/
+
+#ifdef _WIN32
+#pragma pack(push, ops)
+#endif
+
+#pragma pack(1)
+
+typedef struct _match_op
+{
+ short len; /* Length of structure */
+ byte type; /* Match type */
+}
+match_op;
+
+/* Or these two values for type above */
+static const byte kMatchPcre = 0x01;
+
+typedef struct _match_op_pcre
+{
+ match_op header;
+ short options;
+ char pattern[1];
+} match_op_pcre;
+
+#define match_op_size(op) (sizeof(byte) * (op).len)
+
+typedef struct _text_op
+{
+ short len; /* Length of string */
+ byte string[1]; /* Text to put in buffer */
+} text_op;
+
+#define text_op_size(op) (sizeof(text_op) + (sizeof(char) * (op).len))
+
+typedef struct _var_op
+{
+ short len; /* Length of entire structure */
+ byte name[1]; /* Variable name */
+} var_op;
+
+#define var_op_size(op) (sizeof(var_op) + (sizeof(char) * (op).len))
+
+#pragma pack()
+
+#ifdef _WIN32
+#pragma pack(pop, ops)
+#endif
+
+
+
+#endif /* __OPS_H__20000616 */ \ No newline at end of file
diff --git a/lib/priv.h b/lib/priv.h
new file mode 100644
index 0000000..dce25fe
--- /dev/null
+++ b/lib/priv.h
@@ -0,0 +1,95 @@
+/*
+ * AUTHOR
+ * N. Nielsen
+ *
+ * LICENSE
+ * This software is in the public domain.
+ *
+ * The software is provided "as is", without warranty of any kind,
+ * express or implied, including but not limited to the warranties
+ * of merchantability, fitness for a particular purpose, and
+ * noninfringement. In no event shall the author(s) be liable for any
+ * claim, damages, or other liability, whether in an action of
+ * contract, tort, or otherwise, arising from, out of, or in connection
+ * with the software or the use or other dealings in the software.
+ *
+ * SUPPORT
+ * Send bug reports to: <nielsen@memberwebs.com>
+ */
+
+#ifndef __PRIV_H__
+#define __PRIV_H__
+
+#include "pcre.h"
+#include "execute.h"
+
+/*
+ WARNING: There are two different sets of positions around.
+ One set is absolute. This is relative to the beginning of
+ the document. Relative positions are relative to the beginning
+ of the current buffer.
+
+ - Locks use absolute
+ - Registers use absolute
+ - match code uses relative
+ - Replacements use absolute
+*/
+
+#define REL_TO_ABS(v, s) ((v) + (s)->offset)
+#define ABS_TO_REL(v, s) ((v) - (s)->offset)
+
+
+/* Internal state of rlib */
+struct internal_state
+{
+ /* Set of replacements to be written out */
+ replacement* replaces;
+
+ /* Set of variables currently set */
+ variables vars;
+
+ /* Set of watermarks for each regex */
+ memory mem;
+
+ /* Data for compiled expressions fastmaps etc... */
+ data working;
+
+ /* Locks */
+ locks lcks;
+
+ /* Total amount read before this point */
+ size_t offset;
+
+ uint vmregs[NUM_REGISTERS];
+
+ /* Various options for library */
+ long options;
+
+ /* A translate table for mixed case */
+ char caseTranslate[256];
+};
+
+
+/* Scripting functions */
+int compilerRun(r_script* script, const char* data);
+int compilerOptimize(r_script* script);
+void scriptSetError(r_script* script, const char* format, ...);
+
+/* Stream management functions */
+void opsIterate(vmop_t** ops);
+int opsFree(vmop_t* ops, size_t len);
+int opsDump(vmop_t* ops, FILE* f);
+
+
+/* Execution functions */
+int vmExecute(r_stream* stream, r_script* script);
+bool vmInit(r_stream* stream);
+void vmClean(r_stream* stream);
+void vmFree(r_stream* stream);
+
+static const char* kValidIdentifier = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_";
+
+
+#define USE_STACK_VARS
+
+#endif /* __PRIV_H__ */
diff --git a/lib/rep.h b/lib/rep.h
new file mode 100644
index 0000000..673b85d
--- /dev/null
+++ b/lib/rep.h
@@ -0,0 +1,107 @@
+/*
+ * AUTHOR
+ * N. Nielsen
+ *
+ * LICENSE
+ * This software is in the public domain.
+ *
+ * The software is provided "as is", without warranty of any kind,
+ * express or implied, including but not limited to the warranties
+ * of merchantability, fitness for a particular purpose, and
+ * noninfringement. In no event shall the author(s) be liable for any
+ * claim, damages, or other liability, whether in an action of
+ * contract, tort, or otherwise, arising from, out of, or in connection
+ * with the software or the use or other dealings in the software.
+ *
+ * SUPPORT
+ * Send bug reports to: <nielsen@memberwebs.com>
+ */
+
+#ifndef __REP_H__
+#define __REP_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "rlib.h"
+
+/*
+ * This file defines a higher level interface into the rlib library.
+ * See "rlib.h" for the low level interface
+ */
+
+
+/*
+ * r_context
+ * A context with all necessary information for replaces etc...
+ */
+typedef struct _r_context
+{
+ r_stream stream; /* The rlib stream */
+ r_script script; /* The rlib script */
+
+ r_uint block; /* The buffer size used for replaces */
+ r_uint options; /* Options to be passed to rlibInit */
+}
+r_context;
+
+
+/*
+ * repLoad
+ * Loads a rep script from a file. Call this before repInit.
+ * This will load various options from the file in addition
+ * to the raw script (see r_context).
+ *
+ * ctx: The rep context (which should be zero'd)
+ * script: A rep script file open for reading.
+ */
+int repLoad(r_context* ctx, FILE* script);
+
+
+/*
+ * repInit
+ * Called to initialize the rlib with your initialization options
+ * etc...
+ *
+ * ctx: The rep context to initialize
+ */
+int repInit(r_context* ctx);
+
+
+/*
+ * repFiles
+ * Runs a rep script on a file, writing output to an output
+ * file.
+ *
+ * ctx: The rep context.
+ * fileIn: The input file opened for reading.
+ * fileOut: The output file opened for writing.
+ */
+int repFiles(r_context* ctx, FILE* fileIn, FILE* fileOut);
+
+
+/*
+ * repFile
+ * Runs a rep script on a file, sending output to the specifed
+ * callback function.
+ *
+ * ctx: The rep context.
+ * fileIn: The input file opened for reading.
+ */
+int repFile(r_context* ctx, FILE* fileIn);
+
+
+/*
+ * repFree
+ * Free a rep context.
+ *
+ * ctx: The rep context.
+ */
+void repFree(r_context* ctx);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __REP_H__ */
diff --git a/lib/rlib.c b/lib/rlib.c
new file mode 100644
index 0000000..db84548
--- /dev/null
+++ b/lib/rlib.c
@@ -0,0 +1,591 @@
+/*
+ * AUTHOR
+ * N. Nielsen
+ *
+ * LICENSE
+ * This software is in the public domain.
+ *
+ * The software is provided "as is", without warranty of any kind,
+ * express or implied, including but not limited to the warranties
+ * of merchantability, fitness for a particular purpose, and
+ * noninfringement. In no event shall the author(s) be liable for any
+ * claim, damages, or other liability, whether in an action of
+ * contract, tort, or otherwise, arising from, out of, or in connection
+ * with the software or the use or other dealings in the software.
+ *
+ * SUPPORT
+ * Send bug reports to: <nielsen@memberwebs.com>
+ */
+
+/* =========================================================================
+ * Main RLIB
+ * 2000-2001 Copyright, Nate Nielsen
+ */
+
+#include <stdarg.h>
+#include "common/usuals.h"
+#include "common/compat.h"
+#include "lib/rlib.h"
+#include "priv.h"
+#include "rep.h"
+#include "common/binfile.h"
+#include "common/repfile.h"
+
+/* rlibInit: --------------------------------------------------------------
+ * Initialize a stream. We already expect stream to be zero'd
+ */
+
+int rlibInit(r_stream* stream, long options)
+{
+ byte* op = NULL;
+ int i, j;
+ struct internal_state* state;
+
+ if(stream->state != NULL)
+ return R_INVARG;
+
+ /* Allocate a state */
+ state = stream->state =
+ (struct internal_state*)malloc(sizeof(struct internal_state));
+
+ if(!state)
+ return R_NOMEM;
+
+ zero(*state);
+
+ /* Init the stream for the current session */
+ if(!vmInit(stream))
+ return R_NOMEM;
+
+ state->options = options;
+
+ /* Init our case table */
+ /* we only do this once for the whole bunch */
+ for (i = 0; i < 256; i++)
+ stream->state->caseTranslate[i] = i;
+ for (j = 'a'; j <= 'z'; j++)
+ stream->state->caseTranslate[j] = j - ('a' - 'A');
+
+ return R_OK;
+}
+
+
+/* rlibCompile: ----------------------------------------------------
+ * Compile a script
+ */
+int rlibCompile(r_script* script, const char* data)
+{
+ return compilerRun(script, data);
+}
+
+
+/* rlibClear: -------------------------------------------------------------
+ * Prepare stream for a new file
+ */
+void rlibClear(r_stream* stream)
+{
+ if(stream)
+ {
+ if(stream->state)
+ vmClean(stream);
+ }
+}
+
+
+
+/* rLibRun: ---------------------------------------------------------------
+ * The replacement coordinator, output etc...
+*/
+int rlibRun(r_stream* stream, r_script* script, int done)
+{
+ #define RETURN(r) do { retv = r; goto finally; } while(0)
+
+ struct internal_state* state = stream->state;
+ size_t backup, curOffset;
+ int retv = R_OK;
+
+ if(!stream->state)
+ return R_INVARG;
+
+ /* Usually have to have an output function, unless just a matcher */
+ if(!stream->fWrite && !(state->options & RLIB_MODE_MATCHER))
+ return R_INVARG;
+ /* And if we are a matcher we have to have a match function */
+ if(state->options & RLIB_MODE_MATCHER && !stream->fMatch)
+ return R_INVARG;
+
+
+ /*
+ * Keep at least 1/2 amount of backup
+ * Backup is only kept if no matches occur in backup area
+ */
+ backup = done ? 0 : stream->availIn / 2;
+
+ /*
+ * Since there are conversions (between absolute and relative offsets)
+ * below, we need to keep inOffset the same for this session. So copy
+ * it and update it again at the end of the function.
+ */
+ curOffset = 0;
+
+ /* Need some data to work with */
+ if(!stream->nextIn || !stream->availIn)
+ RETURN(R_IN);
+
+
+ stream->total = 0;
+
+ /* Execute the script */
+ retv = vmExecute(stream, script);
+
+ /* Error? */
+ if(retv < 0)
+ RETURN(retv);
+
+ /* Fall through to commit code */
+ if(retv == R_DONE)
+ backup = 0;
+
+ if(!(state->options & RLIB_MODE_MATCHER))
+ {
+ /*
+ * Here we write out what we've found so far and advance the
+ * pointers
+ *
+ * On the first round we look something like this
+ *
+ * <- state->searched ->
+ * |---------------------------------------------| <- stream->nextIn
+ * |----------------------------------------| <- stream->nextOut
+ * replacements found ^- leftovers
+ */
+
+ /* toWrite is the size of the current block we need to transfer */
+ size_t toWrite;
+ /* block is the amount we can actually transfer (depending on output buffer) */
+ size_t block = stream->availIn;
+
+#ifdef VERBOSE
+ dumpRelpacements(stream);
+#endif
+
+ while(state->replaces)
+ {
+ /* Get the amount of data till the current replace */
+ toWrite = block = ABS_TO_REL(state->replaces->beg, state) - curOffset;
+
+ if(!(state->options & RLIB_MODE_PARSER))
+ {
+ /* ... write out up to current replacement... */
+ if(!(stream->fWrite)(stream, stream->nextIn, block))
+ RETURN(R_IOERR);
+ }
+
+ stream->nextIn += block;
+ stream->availIn -= block;
+ curOffset += block;
+
+
+ /* ... check space for replacement ... */
+ block = strlen(state->replaces->text);
+
+ /* ...write out replacement... */
+ if(!(stream->fWrite)(stream, (byte*)state->replaces->text, block))
+ RETURN(R_IOERR);
+
+ /* ... and skip (!) the text that we replaced */
+ block = state->replaces->end - state->replaces->beg;
+ stream->nextIn += block;
+ stream->availIn -= block;
+ curOffset += block;
+
+#ifdef _DEBUG
+ /* Check if things are in the right order */
+ if(state->replaces->next)
+ ASSERT(state->replaces->end <= state->replaces->next->end);
+#endif
+ /* Go to the next replacement */
+ state->replaces = replacementPop(state->replaces);
+ }
+
+ /*
+ * Now we check how much data we have left and copy
+ * up to backup if we have more
+ */
+
+ /* Copy out till backup marker */
+ if((stream->availIn) >= backup)
+ {
+ /* Get block size ... */
+ toWrite = block = stream->availIn - backup;
+
+ if(!(state->options & RLIB_MODE_PARSER))
+ {
+ if(!(stream->fWrite)(stream, stream->nextIn, block))
+ RETURN(R_IOERR);
+ }
+
+ stream->nextIn += block;
+ stream->availIn -= block;
+ curOffset += block;
+ }
+ }
+
+ /*
+ * After this the search could start anew
+ * unless in flush mode or done
+ */
+ if(done == true)
+ RETURN(R_DONE);
+ else
+ RETURN(R_IN);
+
+finally:
+ state->offset += curOffset;
+
+ return retv;
+}
+
+
+/* rlibSetVar: -----------------------------------------------------------
+ * Set a variable for the script to use
+ */
+int rlibSetVar(r_stream* stream, const char* var, const char* val)
+{
+ if(!stream || !stream->state || !var || !val)
+ return R_INVARG;
+
+ variablesClear(&(stream->state->vars), var);
+ return variablesAdd(&(stream->state->vars), var, val) ? R_OK : R_NOMEM;
+}
+
+
+/* rlibDump: --------------------------------------------------------------
+ * Dump the rep script opts for debugging
+ */
+void rlibDump(r_script* script, FILE* f)
+{
+ opsDump(script->ops, f);
+}
+
+
+/* rlibFree: --------------------------------------------------------------
+ * Free associated structures data etc...
+ */
+void rlibFree(r_stream* stream, r_script* script)
+{
+ struct internal_state* state;
+
+ if(stream)
+ {
+ if(state = stream->state)
+ {
+ /* Let execution free it's stuff */
+ vmFree(stream);
+
+ /* And free the state */
+ free(state);
+ stream->state = NULL;
+ }
+
+ zero(*stream);
+ }
+
+ if(script)
+ {
+ if(script->ops)
+ free(script->ops);
+
+ if(script->error)
+ free(script->error);
+
+ zero(*script);
+ }
+
+}
+
+
+/* scriptSetError: -------------------------------------------------
+ * Set the stream error text to whatever
+ */
+void scriptSetError(r_script* script, const char* format, ...)
+{
+ char* msg;
+
+ va_list vl;
+ va_start(vl, format);
+
+ if(script->error)
+ free(script->error);
+
+ if(vasprintf(&msg, format, vl) != -1)
+ script->error = msg;
+ else
+ script->error = NULL;
+
+ va_end(vl);
+}
+
+/* compileAlready: ------------------------------------------------------
+ * See if the file has already been compiled and load if so
+ */
+int compileAlready(r_context* ctx, FILE* file)
+{
+ /* Should have already read header */
+ bfval val;
+ BFILE h = NULL;
+ int retv = R_OK;
+ r_uint temp;
+
+ if(!(h = bfStartFile(file)))
+ RETURN(R_INVARG);
+
+ if(!repfReadHeader(h))
+ RETURN(R_IN);
+
+ while(bfReadValueInfo(h, &val))
+ {
+ if(ferror(file))
+ RETURN(R_IOERR);
+
+ if(feof(file))
+ RETURN(R_INVARG);
+
+ switch(val.id)
+ {
+ case REPVAL_BUFSIZE:
+ if(ctx->block == 0)
+ bfReadValueData(h, &val, &(ctx->block));
+ continue;
+
+ case REPVAL_PARSEMODE:
+ if(bfReadValueData(h, &val, &temp))
+ ctx->options |= (temp != 0) ? RLIB_MODE_PARSER : 0;
+ continue;
+
+ case REPVAL_SCRIPT:
+ {
+ ctx->script.ops = (r_byte*)malloc(val.len);
+ if(!ctx->script.ops)
+ RETURN(R_NOMEM);
+
+ bfReadValueData(h, &val, ctx->script.ops);
+ ctx->script.len = val.len;
+ continue;
+ }
+ break;
+ }
+
+ bfSkipValueData(h, &val);
+ }
+
+finally:
+ if(h)
+ bfClose(h);
+
+ return retv;
+}
+
+
+int repLoad(r_context* ctx, FILE* f)
+{
+ int retv = R_OK;
+ char* buff = NULL;
+ size_t len;
+ int r;
+
+ rlibFree(NULL, &(ctx->script));
+
+ /* Okay now try and read header, and possibly process
+ an already executable file. */
+ switch(r = compileAlready(ctx, f))
+ {
+ /* It's already compiled */
+ case R_OK:
+ RETURN(R_OK);
+ break;
+
+ /* It's not compiled so compile */
+ case R_IN:
+ break;
+
+ /* Failed processing */
+ default:
+ if(r < 0)
+ RETURN(r);
+ break;
+ };
+
+ /* Get file size */
+ len = 0;
+
+ if(fseek(f, 0, SEEK_END) || !(len = ftell(f)) ||
+ fseek(f, 0, SEEK_SET))
+ RETURN(R_IOERR);
+
+ buff = (char*)malloc(len + 1);
+ if(!buff)
+ RETURN(R_NOMEM);
+
+ if(fread(buff, 1, len, f) != len)
+ RETURN(R_IOERR);
+
+ /* Needs compiling */
+ buff[len] = '\0';
+
+ /* Init the stream */
+ r = rlibCompile(&(ctx->script), buff);
+ if(r < 0)
+ RETURN(r);
+
+finally:
+ if(buff)
+ free(buff);
+
+ return retv;
+}
+
+int repInit(r_context* ctx)
+{
+ rlibInit(&(ctx->stream), ctx->options);
+ return R_OK;
+}
+
+int repFile(r_context* ctx, FILE* fIn)
+{
+ r_uint batch; /* Current batch size */
+ r_uint block = ctx->block; /* Block size */
+ r_byte* buff = NULL; /* Input buffer */
+ int retv = R_OK;
+ int r = R_IN;
+ int total = 0;
+
+ if(!block)
+ {
+ if(!fseek(fIn, 0, SEEK_END) && (block = ftell(fIn)) != ~0)
+ block++;
+
+ else
+ block = 0;
+
+ fseek(fIn, 0, SEEK_SET);
+ }
+
+ else
+ {
+ block *= 2;
+ }
+
+ /* Okay now if it's a sane value just allocate it */
+ if(block)
+ {
+ if(block > 0x0F00000)
+ RETURN(R_NOMEM);
+
+ /* Allocate buffers */
+ buff = (r_byte*)malloc(block);
+ if(!buff)
+ RETURN(R_NOMEM);
+ }
+
+ /* Hook buffers to the stream */
+ ctx->stream.nextIn = buff;
+ ctx->stream.availIn = 0;
+
+ /* While we either have more data to put in or more output... */
+ while(!feof(fIn) || r == R_DONE)
+ {
+ /* If rlib wants data then give it */
+ if(r == R_IN)
+ {
+ /* This is a normal standard read */
+ if(buff)
+ {
+ /* Move data to front */
+ memmove(buff, ctx->stream.nextIn, ctx->stream.availIn);
+
+ /* Set pointer to data */
+ ctx->stream.nextIn = buff;
+
+ /* Read rest of block */
+ batch = fread(ctx->stream.nextIn + ctx->stream.availIn,
+ sizeof(r_byte), block - ctx->stream.availIn, fIn);
+
+ ctx->stream.availIn += batch;
+
+ if(ferror(fIn))
+ RETURN(R_IOERR);
+ }
+
+ /* Here we read as much as possible in one shot allocating as we go*/
+ else
+ {
+ batch = 0;
+ block = 0;
+
+ while(!feof(fIn))
+ {
+ if(ferror(fIn))
+ RETURN(R_IOERR);
+
+ block += 0x4000;
+
+ if(block > MAX_BUFF ||
+ !(buff = reallocf(buff, block)))
+ RETURN(R_NOMEM);
+
+ batch += fread(buff + batch, sizeof(r_byte),
+ 0x4000, fIn);
+ }
+
+ ctx->stream.nextIn = buff;
+ ctx->stream.availIn += batch;
+ }
+ }
+
+ /* call rlib */
+ r = rlibRun(&(ctx->stream), &(ctx->script), feof(fIn));
+
+ // Oops!
+ if(r < 0)
+ RETURN(r);
+
+ total += ctx->stream.total;
+ }
+
+finally:
+ if(buff)
+ free(buff);
+
+ ctx->stream.total = total;
+
+ return retv;
+}
+
+int fileOutput(struct r_stream* stream, byte* data, size_t len)
+{
+ FILE* f = (FILE*)stream->arg;
+ return (fwrite(data, 1, len, f) == len) && (!ferror(f));
+}
+
+int repFiles(r_context* ctx, FILE* fIn, FILE* fOut)
+{
+ void* arg = ctx->stream.arg;
+ r_write func = ctx->stream.fWrite;
+ int ret;
+
+ ctx->stream.fWrite = fileOutput;
+ ctx->stream.arg = fOut;
+
+ ret = repFile(ctx, fIn);
+
+ ctx->stream.fWrite = func;
+ ctx->stream.arg = arg;
+
+ return ret;
+}
+
+void repFree(r_context* ctx)
+{
+ rlibFree(&(ctx->stream), &(ctx->script));
+}
diff --git a/lib/rlib.dsp b/lib/rlib.dsp
new file mode 100644
index 0000000..b3b149a
--- /dev/null
+++ b/lib/rlib.dsp
@@ -0,0 +1,140 @@
+# Microsoft Developer Studio Project File - Name="rlib" - Package Owner=<4>
+# Microsoft Developer Studio Generated Build File, Format Version 6.00
+# ** DO NOT EDIT **
+
+# TARGTYPE "Win32 (x86) Static Library" 0x0104
+
+CFG=rlib - Win32 Debug
+!MESSAGE This is not a valid makefile. To build this project using NMAKE,
+!MESSAGE use the Export Makefile command and run
+!MESSAGE
+!MESSAGE NMAKE /f "rlib.mak".
+!MESSAGE
+!MESSAGE You can specify a configuration when running NMAKE
+!MESSAGE by defining the macro CFG on the command line. For example:
+!MESSAGE
+!MESSAGE NMAKE /f "rlib.mak" CFG="rlib - Win32 Debug"
+!MESSAGE
+!MESSAGE Possible choices for configuration are:
+!MESSAGE
+!MESSAGE "rlib - Win32 Release" (based on "Win32 (x86) Static Library")
+!MESSAGE "rlib - Win32 Debug" (based on "Win32 (x86) Static Library")
+!MESSAGE
+
+# Begin Project
+# PROP AllowPerConfigDependencies 0
+# PROP Scc_ProjName ""
+# PROP Scc_LocalPath ""
+CPP=cl.exe
+RSC=rc.exe
+
+!IF "$(CFG)" == "rlib - Win32 Release"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 0
+# PROP BASE Output_Dir "Release"
+# PROP BASE Intermediate_Dir "Release"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 0
+# PROP Output_Dir "release"
+# PROP Intermediate_Dir "release"
+# PROP Target_Dir ""
+# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /YX /FD /c
+# ADD CPP /nologo /W3 /GX /O2 /I ".." /I "..\win32\pcre\include" /D "WIN32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /YX /FD /c
+# ADD BASE RSC /l 0x409 /d "NDEBUG"
+# ADD RSC /l 0x409 /d "NDEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LIB32=link.exe -lib
+# ADD BASE LIB32 /nologo
+# ADD LIB32 /nologo /out:"..\win32\release\rlib.lib"
+
+!ELSEIF "$(CFG)" == "rlib - Win32 Debug"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 1
+# PROP BASE Output_Dir "Debug"
+# PROP BASE Intermediate_Dir "Debug"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 1
+# PROP Output_Dir "debug"
+# PROP Intermediate_Dir "debug"
+# PROP Target_Dir ""
+# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /YX /FD /GZ /c
+# ADD CPP /nologo /W3 /Gm /GX /ZI /Od /I ".." /I "..\win32\pcre\include" /D "WIN32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /YX /FD /GZ /c
+# ADD BASE RSC /l 0x409 /d "_DEBUG"
+# ADD RSC /l 0x409 /d "_DEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LIB32=link.exe -lib
+# ADD BASE LIB32 /nologo
+# ADD LIB32 /nologo /out:"..\win32\debug\rlibd.lib"
+
+!ENDIF
+
+# Begin Target
+
+# Name "rlib - Win32 Release"
+# Name "rlib - Win32 Debug"
+# Begin Group "Source Files"
+
+# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
+# Begin Source File
+
+SOURCE=..\common\binfile.c
+# End Source File
+# Begin Source File
+
+SOURCE=..\common\compat.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\compile.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\execute.c
+# End Source File
+# Begin Source File
+
+SOURCE=..\common\repfile.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\rlib.c
+# End Source File
+# Begin Source File
+
+SOURCE=..\common\xstring.c
+# End Source File
+# End Group
+# Begin Group "Header Files"
+
+# PROP Default_Filter "h;hpp;hxx;hm;inl"
+# Begin Source File
+
+SOURCE=.\execute.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\ops.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\priv.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\rep.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\rlib.h
+# End Source File
+# End Group
+# End Target
+# End Project
diff --git a/lib/rlib.h b/lib/rlib.h
new file mode 100644
index 0000000..b5f8b68
--- /dev/null
+++ b/lib/rlib.h
@@ -0,0 +1,261 @@
+/*
+ * AUTHOR
+ * N. Nielsen
+ *
+ * LICENSE
+ * This software is in the public domain.
+ *
+ * The software is provided "as is", without warranty of any kind,
+ * express or implied, including but not limited to the warranties
+ * of merchantability, fitness for a particular purpose, and
+ * noninfringement. In no event shall the author(s) be liable for any
+ * claim, damages, or other liability, whether in an action of
+ * contract, tort, or otherwise, arising from, out of, or in connection
+ * with the software or the use or other dealings in the software.
+ *
+ * SUPPORT
+ * Send bug reports to: <nielsen@memberwebs.com>
+ */
+
+#ifndef __RLIB_H__
+#define __RLIB_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct internal_state;
+struct r_stream;
+
+typedef unsigned char r_byte;
+typedef unsigned int r_uint;
+
+
+/*
+ * r_replace:
+ * Represents a replacement which was found. This structure is passed
+ * to the r_match matching callback function below.
+ */
+typedef struct _r_replace
+{
+ r_uint offset; /* The offset from the beginning of the file */
+ r_uint from; /* The offset from the beginning of the current buffer */
+ r_uint flen; /* The length of the data to be replaced */
+ r_byte* to; /* The data to replace */
+ r_uint tlen; /* The length of the replacement data */
+}
+r_replace;
+
+
+
+
+/*
+ * r_match
+ * Callback function which receives replacements made by the script.
+ *
+ * stream: rlib stream
+ * replace: The replacement.
+ *
+ * return 1 to accept this match, or 0 to skip.
+ */
+typedef int (*r_match)(struct r_stream* stream, r_replace* replace);
+
+/*
+ * r_write
+ * Callback used by the rlibRun output processed data.
+ *
+ * stream: rlib stream
+ * data: The data
+ * len: The number of bytes to write from data.
+ */
+typedef int (*r_write)(struct r_stream* stream, r_byte* data, r_uint len);
+
+/*
+ * r_message
+ * Callback which is sent messages from the 'message' command
+ * in a rep script.
+ *
+ * stream: rlib stream
+ * message: Null terminated message
+ */
+typedef void (*r_message)(struct r_stream* stream, const char* message);
+
+
+
+
+
+
+/*
+ * r_stream
+ * The basic data interface into rlib.
+ */
+typedef struct r_stream
+{
+ /* ------ Data you supply -------------- */
+ r_byte* nextIn; /* The next byte to be read by rlib */
+ r_uint availIn; /* The number of bytes at nextIn */
+
+ r_write fWrite; /* Callback used for output */
+ r_match fMatch; /* Callback for confirmation or matching */
+ r_message fMessage; /* Callback for messages from script */
+ void* arg; /* Optional data for the above functions */
+
+ /* -------- Data returned -------------- */
+ r_uint total; /* Total replaces */
+
+ /* -------- Internal ------------------- */
+ struct internal_state* state;
+}
+r_stream;
+
+
+
+
+
+/*
+ * r_script
+ * Represents a loaded rep script along with syntax error
+ * information.
+ */
+typedef struct _r_script
+{
+ r_byte* ops; /* Compiled script */
+ r_uint len; /* The length of the script */
+ char* error; /* Syntax error details */
+ r_uint errline; /* Line number of syntax error */
+}
+r_script;
+
+
+
+
+
+/*
+ * rlibCompile
+ * Call this function to compile a script. It will be compiled into the
+ * r_sript structure. Be sure to zero the script structure passed.
+ *
+ * script: rlib script
+ * data: rep script text. Must be null terminated.
+ */
+int rlibCompile(r_script* script, const char* data);
+
+
+
+/*
+ * rlibDump
+ * Write out byte codes for a script
+ *
+ * script: rlib script
+ * f: Output stream to write to
+ */
+void rlibDump(r_script* script, FILE* f);
+
+
+
+/*
+ * rlibFree
+ * Free internal variables associated with this stream and/or
+ * script structure.
+ *
+ * stream: rlib stream
+ * script: rlib script
+ */
+void rlibFree(r_stream* stream, r_script* script);
+
+
+
+/*
+ * rlibInit
+ * Call this function to initialize a rlib stream.
+ * For any of the following functions to work a stream must have been
+ * initialized.
+ *
+ * stream: Pointer to a zero'd stream structure.
+ * options: Can be any combination of the following mode values.
+ */
+int rlibInit(r_stream* stream, long options);
+
+/* Causes rlib to only output replacing text */
+#define RLIB_MODE_PARSER 0x00000100
+
+/* Causes rlib not to output anything */
+#define RLIB_MODE_MATCHER 0x00000200
+
+
+
+/*
+ * rlibSetVar
+ * Set a variable for use in the script.
+ *
+ * stream: rlib stream
+ * var: Variable name
+ * val: Variable value
+ */
+int rlibSetVar(r_stream* stream, const char* var, const char* val);
+
+
+
+/*
+ * rlibRun
+ * Run the script. rlibRun will only "eat" nextIn upto where the
+ * last replace was found or up half whichever is more. Unless
+ * it's the last buffer in which case it finishes up.
+ *
+ * stream: rlib stream
+ * script: rlib script
+ * done: flag whether this is the last buffer
+ */
+int rlibRun(r_stream* stream, r_script* script, int done);
+
+
+
+/* rlibClear: -----------------------------------------------------------
+ * Prepare and cleanup a context/stream after being run for a second
+ * run.
+ *
+ * stream: rlib context/stream
+ */
+void rlibClear(r_stream* stream);
+
+
+
+
+
+/* ERROR CODES */
+
+/* OK */
+#define R_OK 0
+
+/* Need more input */
+#define R_IN 1
+
+/* Finished processing script */
+#define R_DONE 3
+
+/* Not enough memory */
+#define R_NOMEM -1
+
+/* Syntax error in the script */
+#define R_SYNTAX -2
+
+/* Regular expression error in the script */
+#define R_REGEXP -3
+
+/* Enless loop encountered */
+#define R_LOOP -4
+
+/* User defined error from script */
+#define R_USER -5
+
+/* Read or write error */
+#define R_IOERR -6
+
+/* Invalid argument or stream data member */
+#define R_INVARG -10
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __RLIB_H__ */