diff options
author | Stef <stef@ws.local> | 2003-09-17 18:34:42 +0000 |
---|---|---|
committer | Stef <stef@ws.local> | 2003-09-17 18:34:42 +0000 |
commit | 53914f770f1e1dc1ab4342c64846fd995825b7e6 (patch) | |
tree | 63d14dacbd3d81363fcbea1036c47a0210b0f397 | |
parent | 15f3015d2e8305b729d7996faad410b3378497da (diff) |
Initial Import
37 files changed, 8574 insertions, 0 deletions
diff --git a/.cvsignore b/.cvsignore new file mode 100644 index 0000000..1692634 --- /dev/null +++ b/.cvsignore @@ -0,0 +1,5 @@ +*.plg +*.opt +*.ncb +Debug +Release
\ No newline at end of file diff --git a/ReadMe.txt b/ReadMe.txt new file mode 100644 index 0000000..81827d7 --- /dev/null +++ b/ReadMe.txt @@ -0,0 +1,34 @@ +======================================================================== + CONSOLE APPLICATION : rtfm +======================================================================== + + +AppWizard has created this rtfm application for you. + +This file contains a summary of what you will find in each of the files that +make up your rtfm application. + +rtfm.dsp + This file (the project file) contains information at the project level and + is used to build a single project or subproject. Other users can share the + project (.dsp) file, but they should export the makefiles locally. + +rtfm.cpp + This is the main application source file. + + +///////////////////////////////////////////////////////////////////////////// +Other standard files: + +StdAfx.h, StdAfx.cpp + These files are used to build a precompiled header (PCH) file + named rtfm.pch and a precompiled types file named StdAfx.obj. + + +///////////////////////////////////////////////////////////////////////////// +Other notes: + +AppWizard uses "TODO:" to indicate parts of the source code you +should add to or customize. + +///////////////////////////////////////////////////////////////////////////// diff --git a/src/basehandler.cpp b/src/basehandler.cpp new file mode 100644 index 0000000..02c79c1 --- /dev/null +++ b/src/basehandler.cpp @@ -0,0 +1,59 @@ +// BaseHandler.cpp: implementation of the BaseHandler class. +// +////////////////////////////////////////////////////////////////////// + +#include "stdafx.h" +#include "BaseHandler.h" + +////////////////////////////////////////////////////////////////////// +// Construction/Destruction +////////////////////////////////////////////////////////////////////// + +BaseHandler::BaseHandler() +{ + m_reader = NULL; +} + +BaseHandler::~BaseHandler() +{ + clear(); +} + +void BaseHandler::clear() +{ + m_reader = NULL; +} + +void BaseHandler::startDocument(RtfReader* reader) +{ + clear(); + m_reader = reader; +} + +void BaseHandler::endDocument() +{ + // We leave document and levels here so it they + // can be accessed later + m_reader = NULL; +} + +void BaseHandler::controlWord(const string& cw, int flags, int param) +{ + +} + +void BaseHandler::groupStart() +{ + +} + +void BaseHandler::groupEnd() +{ + +} + +void BaseHandler::charData(wstring data) +{ + +} + diff --git a/src/basehandler.h b/src/basehandler.h new file mode 100644 index 0000000..33cfe61 --- /dev/null +++ b/src/basehandler.h @@ -0,0 +1,29 @@ +// BaseHandler +// Implements an RtfHandler for other classes (LevelHandler +// and RtfParser) to override. + +#ifndef _BASEHANDLER_H_ +#define _BASEHANDLER_H_ + +#include "RtfReader.h" + +class BaseHandler : + public RtfHandler +{ +public: + BaseHandler(); + virtual ~BaseHandler(); + + virtual void startDocument(RtfReader* reader); + virtual void endDocument(); + virtual void controlWord(const string& cw, int flags, int param); + virtual void groupStart(); + virtual void groupEnd(); + virtual void charData(wstring data); + +protected: + virtual void clear(); + RtfReader* m_reader; +}; + +#endif // _BASEHANDLER_H_ diff --git a/src/levelhandler.cpp b/src/levelhandler.cpp new file mode 100644 index 0000000..1c55e9c --- /dev/null +++ b/src/levelhandler.cpp @@ -0,0 +1,98 @@ +// LevelHandler.cpp: implementation of the LevelHandler class. +// +////////////////////////////////////////////////////////////////////// + +#include "stdafx.h" +#include "LevelHandler.h" + +////////////////////////////////////////////////////////////////////// +// Construction/Destruction +////////////////////////////////////////////////////////////////////// + +LevelHandler::LevelHandler() +{ + +} + +LevelHandler::~LevelHandler() +{ + clear(); +} + +void LevelHandler::clear() +{ + m_curLevel.release(); + m_topLevel.release(); + + BaseHandler::clear(); +} + +void LevelHandler::startDocument(RtfReader* reader) +{ + BaseHandler::startDocument(reader); + + m_topLevel = new Level; + m_curLevel = m_topLevel; +} + +void LevelHandler::endDocument() +{ + BaseHandler::endDocument(); +} + +void LevelHandler::groupStart() +{ + BaseHandler::groupStart(); + ASSERT(m_curLevel); + pushLevel(); +} + +void LevelHandler::groupEnd() +{ + ASSERT(m_curLevel); + popLevel(); + BaseHandler::groupEnd(); +} + +DOM::Element LevelHandler::getElement() +{ + ASSERT(m_curLevel); + return m_curLevel->getElement(); +} + +void LevelHandler::pushLevel() +{ + // Push a level on the stack + m_curLevel = m_curLevel->pushLevel(); +} + +void LevelHandler::popLevel() +{ + // Pull a level off the stack + LevelPtr level = m_curLevel->getPrevious(); + + // TODO: report errors here + if(level) + m_curLevel = level; +} + +void LevelHandler::rewindLevel(LevelPtr ptr) +{ + ASSERT(ptr != NULL); + + LevelPtr prev = ptr->getPrevious(); + + if(prev != NULL) + m_curLevel = prev; + else + m_curLevel = ptr; +} + +LevelPtr LevelHandler::getLevel() +{ + return m_curLevel; +} + + + + diff --git a/src/levelhandler.h b/src/levelhandler.h new file mode 100644 index 0000000..533fdfe --- /dev/null +++ b/src/levelhandler.h @@ -0,0 +1,41 @@ +// LevelHandler +// Keeps track of levels (braces) in the RTF file. +// + +#ifndef _LEVELHANDLER_H_ +#define _LEVELHANDLER_H_ + +#include "RtfReader.h" +#include "BaseHandler.h" +#include "RtfParseHelpers.h" +#include "Reference.h" + +class LevelHandler + : public BaseHandler +{ +public: + LevelHandler(); + virtual ~LevelHandler(); + + virtual void startDocument(RtfReader* reader); + virtual void endDocument(); + virtual void groupStart(); + virtual void groupEnd(); + + virtual DOM::Element getElement(); + + void pushLevel(); + void popLevel(); + void rewindLevel(LevelPtr ptr); + LevelPtr getLevel(); + + +protected: + virtual void clear(); + + LevelPtr m_topLevel; + LevelPtr m_curLevel; +}; + + +#endif // _LEVELHANDLER_H_ diff --git a/src/reference.h b/src/reference.h new file mode 100644 index 0000000..8b82a61 --- /dev/null +++ b/src/reference.h @@ -0,0 +1,112 @@ +#ifndef __REFERENCE_H__ +#define __REFERENCE_H__ + +template<typename C> +class Reference +{ +public: + Reference() + { m_ptr = NULL; } + + Reference(C* ptr) + { + m_ptr = ptr; + addref(); + } + + Reference(C* ptr, bool addref) + { + m_ptr = ptr; + if(addref) + addref(); + } + + ~Reference() + { + release(); + } + + Reference(const Reference& orig) + { + m_ptr = orig.m_ptr; + addref(); + } + + Reference& operator=(const C* ptr) + { + C* old = m_ptr; + m_ptr = (C*)ptr; + addref(); + if(old) + old->release(); + return *this; + } + + Reference& operator=(const Reference& orig) + { return operator=(orig.m_ptr); } + + void attach(C* ptr) + { + release(); + m_ptr = ptr; + } + + C* detach() + { + C* ptr = m_ptr; + m_ptr = NULL; + return ptr; + } + + operator C*() const + { return m_ptr; } + + C* operator->() const + { return m_ptr; } + +#if 0 + operator bool() const + { + return m_ptr != NULL; + } +#endif + + void release() + { + if(m_ptr) + m_ptr->release(); + m_ptr = NULL; + } + + void addref() + { + if(m_ptr) + m_ptr->addRef(); + } + +private: + C* m_ptr; +}; + +class Instance +{ +public: + Instance() + { m_x = 0; } + virtual ~Instance() + { } + void addRef() + { m_x++; } + void release() + { + if((--m_x) <= 0) + delete this; + } + +private: + // The reference count + int m_x; +}; + + +#endif //__REFERENCE_H__
\ No newline at end of file diff --git a/src/rtfformatting.h b/src/rtfformatting.h new file mode 100644 index 0000000..2dfc126 --- /dev/null +++ b/src/rtfformatting.h @@ -0,0 +1,122 @@ +// RtfTextProperties.h: interface for the RtfTextProperties class. +// +////////////////////////////////////////////////////////////////////// + +#if !defined(AFX_RTFTEXTPROPERTIES_H__719D85C9_69D9_4499_BE5E_7A9A7F6F9C38__INCLUDED_) +#define AFX_RTFTEXTPROPERTIES_H__719D85C9_69D9_4499_BE5E_7A9A7F6F9C38__INCLUDED_ + +#if _MSC_VER > 1000 +#pragma once +#endif // _MSC_VER > 1000 + +class RtfFormatting +{ +public: + RtfFormatting() + { + resetText(); + resetPara(); + } + + RtfFormatting(const RtfFormatting& format) + { + copy(format); + } + + bool textEquals(const RtfFormatting& format) const + { + return m_bold == format.m_bold && + m_italic == format.m_italic && + m_strike == format.m_italic && + m_hidden == format.m_hidden && + m_underline == format.m_underline && + m_color == format.m_color; + } + + bool paraEquals(RtfFormatting& format) const + { + return m_style == format.m_style && + m_list == format.m_list && + m_inTbl == format.m_inTbl; + } + + void copy(const RtfFormatting& format) + { + m_bold = format.m_bold; + m_italic = format.m_italic; + m_strike = format.m_italic; + m_hidden = format.m_hidden; + m_underline = format.m_underline; + m_color = format.m_color; + + m_style = format.m_style; + m_list = format.m_list; + m_inTbl = format.m_inTbl; + } + + void resetText() + { + m_bold = m_italic = m_strike = + m_underline = m_hidden = false; + m_color = -1; + } + + void resetPara() + { + m_style = m_list = -1; + m_inTbl = false; + } + + bool textIsBold() const + { return m_bold; } + bool textIsItalic() const + { return m_italic; } + bool textIsStrike() const + { return m_strike; } + bool textIsUnderline() const + { return m_underline; } + bool textIsHidden() const + { return m_hidden; } + int textColor() const + { return m_color; } + int paraStyle() const + { return m_style; } + int paraList() const + { return m_list; } + bool paraInTable() const + { return m_inTbl; } + + void textSetBold(bool bold) + { m_bold = bold; } + void textSetItalic(bool italic) + { m_italic = italic; } + void textSetStrike(bool strike) + { m_strike = strike; } + void textSetUnderline(bool underline) + { m_underline = underline; } + void textSetHidden(bool hidden) + { m_hidden = hidden; } + void textSetColor(int color) + { m_color = color; } + void paraSetStyle(int style) + { m_style = style; } + void paraSetList(int list) + { m_list = list; } + void paraSetTable(bool inTable) + { m_inTbl = inTable; } + +protected: + bool m_bold; + bool m_italic; + bool m_strike; + bool m_underline; + bool m_hidden; + int m_color; + + int m_style; + int m_list; + bool m_inTbl; + // TODO: Character styles +}; + +#endif // !defined(AFX_RTFTEXTPROPERTIES_H__719D85C9_69D9_4499_BE5E_7A9A7F6F9C38__INCLUDED_) diff --git a/src/rtfparser.cpp b/src/rtfparser.cpp new file mode 100644 index 0000000..2928aa7 --- /dev/null +++ b/src/rtfparser.cpp @@ -0,0 +1,398 @@ +// RtfReader.cpp: implementation of the RtfReader class. +// +////////////////////////////////////////////////////////////////////// + +#include "stdafx.h" + +#include <stdlib.h> +#include <stdio.h> +#include "RtfReader.h" + +////////////////////////////////////////////////////////////////////// +// Construction/Destruction +////////////////////////////////////////////////////////////////////// + +const int RtfHandler::kAsterisk = 0x00000001; +const int RtfHandler::kHasParam = 0x00000002; +const int RtfHandler::kIsEncoded = 0x00000004; + +RtfReader::RtfReader() +{ + m_handler = NULL; + m_depth = 0; + m_parseHex = true; + m_parseUnicode = false; + m_uniEat = 0; + m_uniEatStack.push(0); +} + +RtfReader::~RtfReader() +{ + +} + +bool RtfReader::parse(string fileName) +{ + FILE* file = fopen(fileName, "r"); + if(!file) + return false; + + bool ret = parse(file); + + fclose(file); + + return ret; +} + +void RtfReader::emptyData(RtfContext& cx) +{ + if(!cx.data.empty()) + { + if(m_handler) + m_handler->charData(cx.data); + cx.data.resize(0); + } +} + +void RtfReader::sendData(RtfContext& cx, wchar_t ch) +{ + if(m_uniEat > 0) + m_uniEat--; + else + cx.data.append(1, ch); +} + +void RtfReader::sendData(RtfContext& cx, wstring data) +{ + if(m_uniEat > 0) + { + int len = __min(data.size(), m_uniEat); + cx.data.append(data.substr(len)); + m_uniEat -= len; + } + else + { + cx.data.append(data); + } +} + +void RtfReader::sendControlWord(RtfContext& cx, string cw, int flags, int param) +{ + emptyData(cx); + if(m_handler) + m_handler->controlWord(cw, flags, param); +} + +bool RtfReader::parseHexChar(RtfContext& cx, int num) +{ + string data; + for(int i = 0; i < num; i++) + { + char ch = fgetc(cx.file); + + if(ch == -1) + return false; + + if((ch >= 'A' && ch <= 'F') || + (ch >= 'a' && ch <= 'f') || + (ch >= '0' && ch <= '9')) + { + data.append(1, ch); + } + else + { + m_parseErrors.append((string)"invalid hex char: " + ch + "\n"); + } + } + + if(m_parseHex) + { + char* end = NULL; + int val = strtol(data.c_str(), &end, 16); + if(end == data.c_str() + data.size() && m_parseHex) + sendData(cx, val); + else + m_parseErrors.append("invalid hex char: " + data + "\n"); + } + else + { + sendControlWord(cx, data, RtfHandler::kIsEncoded, -1); + } + + return true; +} + +bool RtfReader::parseControlWord(RtfContext& cx) +{ + bool isAsterisk = false; + string controlword; + string param; + + while(1) + { + int ch = fgetc(cx.file); + if(ch == WEOF) + return false; + + bool empty = controlword.empty(); + + // Part of the name of a control word + // NOTE: Although the RTF specification prohibits upercase + // control words, MS Word uses them :-/ + if(ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z') + controlword.append(1, ch); + + // Part of the parameter of a control word + else if(ch >= '0' && ch <= '9') + param.append(1, ch); + + // Now handle escapes and other special types of + // control words. These are all only valid at beginning + // of the "control word" + + // hex spelled out character + else if(empty && ch == '\'') + { + parseHexChar(cx, 2); + break; + } + + // Asterisk type destination + else if(empty && ch == '*') + { + isAsterisk = true; + + ch = fgetc(cx.file); + while(strchr("\r\n", ch)) + ch = fgetc(cx.file); + + if(ch != '\\') + ungetc(ch, cx.file); + } + + // Escaped backslash + else if(empty && ch == '\\') + { + sendData(cx, L'\\'); + break; + } + + // Non breaking space + else if(empty && ch == '~') + { + sendData(cx, 0x00A0); + break; + } + + // Optional hyphen + else if(empty && ch == '-') + { + sendData(cx, 0x00AD); + break; + } + + // a hyphen right after control word is part of number + else if(!empty && param.empty() && ch == '-') + { + param.append(1, ch); + } + + // TODO: This looks real hokey and acts that + // way too +#if 0 + // An enter as the first character of a control word + // makes a paragraph + else if(strchr("\n\r", ch)) + { + controlword = "par"; + break; + } +#endif + // Space end a rtf code (but get eaten) + else if(strchr(" ", ch)) + break; + + // Anything else (including a backslash ends a control word) + else + { + ungetc(ch, cx.file); + break; + } + } + + // Empty out the control word buffers + if(!controlword.empty()) + { + int flags = isAsterisk ? RtfHandler::kAsterisk : 0; + int numPar = -1; + + if(!param.empty()) + { + char* end = NULL; + numPar = strtol(param.c_str(), &end, 10); + if(end == param.c_str() + param.size()) + flags += RtfHandler::kHasParam; + } + + // Here we check for common characters + if(controlword == "emdash") + sendData(cx, 0x2014); + else if(controlword == "endash") + sendData(cx, 0x2013); + else if(controlword == "emspace") + sendData(cx, 0x2003); + else if(controlword == "enspace") + sendData(cx, 0x2002); + else if(controlword == "bullet") + sendData(cx, 0x2022); + else if(controlword == "lquote") + sendData(cx, 0x2018); + else if(controlword == "rquote") + sendData(cx, 0x2019); + else if(controlword == "ldblquote") + sendData(cx, 0x201C); + else if(controlword == "rdblquote") + sendData(cx, 0x201D); + + // Unicode values get sent through + else if(m_parseUnicode && flags & RtfHandler::kHasParam && + controlword == "u" ) + { + sendData(cx, numPar); + m_uniEat = m_uniEatStack.top(); + } + + // Unicode destination + else if(m_parseUnicode && controlword == "ud") + { + + } + + // Skip value for unicode characters + else if(m_parseUnicode && controlword == "uc") + { + m_uniEatStack.pop(); + m_uniEatStack.push(numPar); + } + + // Otherwise we send the control word + else + { + if(m_handler) + sendControlWord(cx, controlword, flags, numPar); + } + } + + return true; +} + +bool RtfReader::parse(FILE* file) +{ + m_depth = 0; + m_parseErrors = ""; + + int ch = 0; + + RtfContext cx; + cx.isData = false; + cx.file = file; + cx.data = L""; + + if(m_handler) + m_handler->startDocument(this); + + while(1) + { + ch = fgetc(file); + if(ch == EOF) + goto done; + + // Type is undetermined so we figure it out here + if(!cx.isData) + { + switch(ch) + { + case '\\': + if(!parseControlWord(cx)) + goto done; + break; + + case '{': + { + emptyData(cx); + + m_uniEatStack.push(m_uniEatStack.top()); + + if(m_handler) + m_handler->groupStart(); + + m_depth++; + } + break; + + case '}': + { + emptyData(cx); + + if(m_handler) + m_handler->groupEnd(); + + if(!m_uniEatStack.empty()) + m_uniEatStack.pop(); + + m_depth--; + } + break; + + default: + cx.isData = true; + break; + } + } + + if(cx.isData) + { + // We translate tabs into the appropriate control + // word + if(ch == '\t') + sendControlWord(cx, "tab", 0, -1); + +// Don't need this code, the XML outputter +// Takes care of it for us +#if 0 + if(ch == '&') + sendData(cx, L"&"); + + else if(ch == '\'') + sendData(cx, L"'"); + + else if(ch == '"') + sendData(cx, L"""); + + else if(ch == '<') + sendData(cx, L"<"); + + else if(ch == '>') + sendData(cx, L">"); +#endif + + // enters a + else if(!strchr("\r\n", ch)) + sendData(cx, ch); + + cx.isData = false; + } + } + +done: + + if(m_depth != 0) + m_parseErrors.append("unmatched braces\n"); + + // TODO: Check depth and give errors if screwy + if(m_handler) + m_handler->endDocument(); + + return m_parseErrors.empty(); +} + diff --git a/src/rtfparser.h b/src/rtfparser.h new file mode 100644 index 0000000..f28150e --- /dev/null +++ b/src/rtfparser.h @@ -0,0 +1,80 @@ +// RtfReader.h: interface for the RtfReader class. +// +////////////////////////////////////////////////////////////////////// + +#if !defined(AFX_RTFREADER_H__2C77784F_5333_4E16_B0E0_B56E211C2D82__INCLUDED_) +#define AFX_RTFREADER_H__2C77784F_5333_4E16_B0E0_B56E211C2D82__INCLUDED_ + +#if _MSC_VER > 1000 +#pragma once +#endif // _MSC_VER > 1000 + +#include <mystring.h> +#include <stack> +class RtfReader; + +class RtfHandler +{ +public: + virtual void startDocument(RtfReader* reader) = 0; + virtual void endDocument() = 0; + virtual void controlWord(const string& cw, int flags, int param) = 0; + virtual void groupStart() = 0; + virtual void groupEnd() = 0; + virtual void charData(wstring data) = 0; + + static const int kAsterisk; + static const int kHasParam; + static const int kIsEncoded; +}; + +class RtfReader +{ +public: + RtfReader(); + virtual ~RtfReader(); + + + bool parse(string fileName); + bool parse(FILE* file); + + void setHandler(RtfHandler* handler) + { m_handler = handler; } + string getParseErrors() const + { return m_parseErrors; } + int getDepth() const + { return m_depth; } + void setHexParse(bool parse) + { m_parseHex = parse; } + void setUnicode(bool unicode); + +protected: + RtfHandler* m_handler; + int m_depth; + bool m_parseHex; + string m_parseErrors; + + // Unicode handling + bool m_parseUnicode; + typedef std::stack<int> StackInt; + StackInt m_uniEatStack; + int m_uniEat; + +private: + + struct RtfContext + { + FILE* file; + bool isData; + wstring data; + }; + + bool parseControlWord(RtfContext& cx); + bool parseHexChar(RtfContext& cx, int num); + void sendControlWord(RtfContext& cx, string cw, int flags, int param); + void sendData(RtfContext& cx, wchar_t ch); + void sendData(RtfContext& cx, wstring data); + void emptyData(RtfContext& cx); +}; + +#endif // !defined(AFX_RTFREADER_H__2C77784F_5333_4E16_B0E0_B56E211C2D82__INCLUDED_) diff --git a/src/rtfx.cpp b/src/rtfx.cpp new file mode 100644 index 0000000..6a68a28 --- /dev/null +++ b/src/rtfx.cpp @@ -0,0 +1,68 @@ +// rtfm.cpp : Defines the entry point for the console application. +// + +#include "stdafx.h" +#include <stdlib.h> +#include <stdio.h> + +#include "RtfReader.h" +#include "RtfAnalyser.h" + +int usage() +{ + fprintf(stderr, "usage: rtfm inrtf outxml\n"); + return 2; +} + +int main(int argc, char* argv[]) +{ + if(argc < 3) + return usage(); + + try + { + FILE* file = fopen(argv[1], "rb"); + + if(!file) + { + fprintf(stderr, "rtfm: couldn't open file: %s: %s\n", argv[1], strerror(errno)); + return 1; + } + + RtfParserOptions options; + + RtfParser handler(options); + RtfReader rtf; + rtf.setHandler(&handler); + bool ret = rtf.parse(file); + fclose(file); + + if(!ret) + { + fprintf(stderr, "rtfm: rtf parse failed: %s\n", rtf.getParseErrors().c_str()); + return 1; + } + + + DOM::Document doc = handler.getDocument(); + string xml = doc.serialize(); + + FILE* out = fopen(argv[2], "wb"); + if(!out) + { + fprintf(stderr, "rtfm: couldn't open file: %s: %s\n", argv[2], strerror(errno)); + return 1; + } + + fwrite(xml.c_str(), 1, xml.length(), out); + fclose(out); + return 0; + + } + catch(DOM::DOMException& e) + { + fprintf(stderr, "rtfm: xml dom error: %s\n", e.getMessage()); + } + + return 1; +} diff --git a/src/sablo.h b/src/sablo.h new file mode 100644 index 0000000..99cfc40 --- /dev/null +++ b/src/sablo.h @@ -0,0 +1,2139 @@ +// +// AUTHOR +// N. Nielsen +// +// LICENSE +// This software is in the public domain. +// +// The software is provided "as is", without warranty of any kind, +// express or implied, including but not limited to the warranties +// of merchantability, fitness for a particular purpose, and +// noninfringement. In no event shall the author(s) be liable for any +// claim, damages, or other liability, whether in an action of +// contract, tort, or otherwise, arising from, out of, or in connection +// with the software or the use or other dealings in the software. +// +// SUPPORT +// Send bug reports to: <nielsen@memberwebs.com> +// +// SITE +// http://memberwebs.com/nielsen/ +// + +// SABLOTRON C++ WRAPPER +// +// This wrapper tries to emulate the W3C DOM as much as possible. +// Objects returned can be copied liberally. When copied they still +// refer to the original Sablotron classes. +// Almost all classes are "light" wrappers, and shouldn't be more +// than 4 to 8 bytes a piece, with no additional memory allocation. +// Certain objects (NodeList, NamedNodeMap implementations) are +// heavier, and are reference counted. +// +// Salbotron uses UTF8 exclusively internally. This wrapper +// supports on the fly conversions from UTF16. Define the following +// constants to enable conversions: +// +// USE_UTF16 +// Convert all data values from UTF16 to UTF8. +// +// DOM_UTF16 +// Convert all DOM names from UTF16 to UTF8. +// +// If either of the above two constants are enabled you must include +// salbotr.cpp in your build. +// +// Everything is in the DOM namespace. +// +// Objects of type Document and DOMImplementation must be manually +// freed with their 'release' member function. +// +// + +#ifndef __SABLO_H__ +#define __SABLO_H__ + +#define USE_UTF16 +// #define DOM_UTF16 + +#include <stdio.h> +#include <wchar.h> +#include <exception> +#include <string> + +namespace DOM +{ + namespace S + { + extern "C" + { + #include <sablot.h> + #include <sdom.h> + } + }; + + bool transcode16to8(const std::basic_string<wchar_t>& data, + std::basic_string<char>& ret); + bool transcode8to16(const std::basic_string<char>& data, + std::basic_string<wchar_t>& ret); + + namespace INT + { + typedef std::basic_string<char> _8str; + typedef std::basic_string<wchar_t> _16str; + } + +#ifdef ASSERT + #define DOM_ASSERT ASSERT +#else + #include "assert.h" + #define ASSERT assert +#endif + +#ifdef USE_UTF16 + #ifdef DOM_UTF16 + typedef INT::_16str data_str; + typedef INT::_16str dom_str; + #define FROM_V(v) _16to8(v) + #define FROM_D(v) _16to8(v) + #define TO_V(v) _8to16(v) + #define TO_D(v) _8to16(v) + #else + typedef INT::_16str data_str; + typedef INT::_8str dom_str; + #define FROM_V(v) _16to8(v) + #define FROM_D(v) v + #define TO_V(v) _8to16(v) + #define TO_D(v) v + #endif +#else + typedef INT::_8str data_str; + typedef INT::_8str dom_str; + #define FROM_V(v) v + #define FROM_D(v) v + #define TO_V(v) v + #define TO_D(v) v +#endif + + namespace INT + { + template<typename C> + class Ref + { + public: + Ref() + { m_ptr = NULL; } + + Ref(C* ptr) + { + m_ptr = ptr; + addref(); + } + + Ref(C* ptr, bool addref) + { + m_ptr = ptr; + if(addref) + addref(); + } + + ~Ref() + { + release(); + } + + Ref(const Ref& orig) + { + m_ptr = orig.m_ptr; + addref(); + } + + Ref& operator=(const C* ptr) + { + C* old = m_ptr; + m_ptr = (C*)ptr; + addref(); + if(old) + old->release(); + return *this; + } + + Ref& operator=(const Ref& orig) + { return operator=(orig.m_ptr); } + +#ifdef COMPARE_REF + bool operator==(const C* ptr) + { + if(m_ptr == NULL && ptr == NULL) + return true; + else if(m_ptr == NULL || ptr == NULL) + return false; + + return *ptr == *m_ptr; + } + + bool operator==(const Ref& orig) + { return operator==(orig.m_ptr); } +#else + bool operator==(const C* ptr) + { + ASSERT(ptr == NULL); + return m_ptr == NULL; + } +#endif + operator C*() const + { return m_ptr; } + + operator C&() const + { return &m_ptr; } + + C* operator->() const + { return m_ptr; } + + protected: + void release() + { + if(m_ptr) + m_ptr->release(); + m_ptr = NULL; + } + + void addref() + { + if(m_ptr) + m_ptr->addref(); + } + + private: + C* m_ptr; + }; + + class Inst + { + public: + Inst() + { m_x = 0; } + virtual ~Inst() + { } + void addref() + { m_x++; } + void release() + { + if((--m_x) <= 0) + delete this; + } + + private: + int m_x; + }; + + class Base; + }; + + #define ASSERT_VALID() \ + ASSERT(isValid()); + #define ASSERT_VALID_NODE(node) \ + ASSERT(node.isValid()); + #define ASSERT_TYPE(t) \ + ASSERT(getNodeType() == t) + #define ASSERT_NODE_TYPE(n, t) \ + ASSERT(n.getNodeType() == t) + + class DOMException + { + public: + typedef enum + { + INDEX_SIZE_ERR = S::SDOM_INDEX_SIZE_ERR, + DOMSTRING_SIZE_ERR = S::SDOM_DOMSTRING_SIZE_ERR, + HIERARCHY_REQUEST_ERR = S::SDOM_HIERARCHY_REQUEST_ERR, + WRONG_DOCUMENT_ERR = S::SDOM_WRONG_DOCUMENT_ERR, + INVALID_CHARACTER_ERR = S::SDOM_INVALID_CHARACTER_ERR, + NO_DATA_ALLOWED_ERR = S::SDOM_NO_DATA_ALLOWED_ERR, + NO_MODIFICATION_ALLOWED_ERR = S::SDOM_NO_MODIFICATION_ALLOWED_ERR, + NOT_FOUND_ERR = S::SDOM_NOT_FOUND_ERR, + NOT_SUPPORTED_ERR = S::SDOM_NOT_SUPPORTED_ERR, + INUSE_ATTRIBUTE_ERR = S::SDOM_INUSE_ATTRIBUTE_ERR, + INVALID_STATE_ERR = S::SDOM_INVALID_STATE_ERR, + SYNTAX_ERR = S::SDOM_SYNTAX_ERR, + INVALID_MODIFICATION_ERR = S::SDOM_INVALID_MODIFICATION_ERR, + NAMESPACE_ERR = S::SDOM_NAMESPACE_ERR, + INVALID_ACCESS_ERR = S::SDOM_INVALID_ACCESS_ERR, + /* not in spec below this point: */ + INVALID_NODE_TYPE = S::SDOM_INVALID_NODE_TYPE, + QUERY_PARSE_ERR = S::SDOM_QUERY_PARSE_ERR, + QUERY_EXECUTION_ERR = S::SDOM_QUERY_EXECUTION_ERR, + NOT_OK = S::SDOM_NOT_OK + } CODES; + + int getCode() + { return code; } + char* getMessage() + { return S::SDOM_getExceptionMessage(m_sit); } + void getDetails(int* code, char** message, + char** documentUri, int* fileLine) + { S::SDOM_getExceptionDetails(m_sit, code, message, documentUri, fileLine); } + + short code; + + protected: + DOMException(S::SDOM_Exception e, S::SablotSituation s) + { + code = e; + m_sit = s; + } + + S::SablotSituation m_sit; + + friend class INT::Base; + }; + + namespace INT + { + /** + * The base class that keeps references to sablo + */ + class Base + { + public: + bool operator==(const Base& other) const + { return m_sit == other.m_sit; } + bool operator==(const void* null) const + { ASSERT(null == NULL); return m_sit == NULL; }; + bool operator!=(const Base& other) const + { return !operator==(other); } + bool operator!=(const void* null) const + { return !operator==(null); } + + protected: + Base(S::SablotSituation sit) + { m_sit = sit; } + Base(const Base& base) + { m_sit = base.m_sit; } + Base& operator=(const Base& other) + { m_sit = other.m_sit; return *this; } + Base& operator=(const void* null) + { ASSERT(null == NULL); m_sit = NULL; return *this; } + inline bool isValid() const + { return m_sit != NULL; } + + inline S::SDOM_Exception _try_(S::SDOM_Exception e) const + throw(DOMException) + { + if(e != S::SDOM_OK) + throw DOMException(e, m_sit); + return e; + } + +#ifdef USE_UTF16 + inline _16str _8to16(const _8str& d) const + throw(DOMException) + { + _16str s; + if(!transcode8to16(d, s)) + throw DOMException(S::SDOM_INVALID_CHARACTER_ERR, m_sit); + return s; + } + + inline _8str _16to8(const _16str& d) const + throw(DOMException) + { + _8str s; + if(!transcode16to8(d, s)) + throw DOMException(S::SDOM_INVALID_CHARACTER_ERR, m_sit); + return s; + } +#endif + + S::SablotSituation m_sit; + }; + + class NamedNodeMap; + class NodeList; + class ChildNodeList; + class AttrNodeList; + class DOMNodeList; + class AttrNamedNodeMap; + } + + class Element; + class Document; + class DOMImplementation; + + typedef INT::Ref<INT::NamedNodeMap> NamedNodeMap; + typedef INT::Ref<INT::NodeList> NodeList; + + /** + * Thin wrapper class for a DOM Node + */ + class Node : + public INT::Base + { + public: + enum TYPES + { + ELEMENT_NODE = S::SDOM_ELEMENT_NODE, + ATTRIBUTE_NODE = S::SDOM_ATTRIBUTE_NODE, + TEXT_NODE = S::SDOM_TEXT_NODE, + CDATA_SECTION_NODE = S::SDOM_CDATA_SECTION_NODE, + ENTITY_REFERENCE_NODE = S::SDOM_ENTITY_REFERENCE_NODE, + ENTITY_NODE = S::SDOM_ENTITY_NODE, + PROCESSING_INSTRUCTION_NODE = S::SDOM_PROCESSING_INSTRUCTION_NODE, + COMMENT_NODE = S::SDOM_COMMENT_NODE, + DOCUMENT_NODE = S::SDOM_DOCUMENT_NODE, + DOCUMENT_TYPE_NODE = S::SDOM_DOCUMENT_TYPE_NODE, + DOCUMENT_FRAGMENT_NODE = S::SDOM_DOCUMENT_FRAGMENT_NODE, + NOTATION_NODE = S::SDOM_NOTATION_NODE + }; + + Node() : INT::Base(NULL) + { + m_node = NULL; + } + + Node(const Node& node) + : INT::Base(node) + { + m_node = node.m_node; + } + + Node& operator=(const Node& other) + { + Base::operator=(other); + m_node = other.m_node; + return *this; + } + + Node& operator=(const void* null) + { + ASSERT(null == NULL); + Base::operator=(null); + m_node = NULL; + return *this; + } + + bool operator==(const Node& other) const + { + return Base::operator==(other) && + m_node == other.m_node; + } + + bool operator==(const void* null) const + { + ASSERT(null == NULL); + return Base::operator==(null) || + m_node == NULL; + } + + bool operator!=(const Node& other) const + { return !operator==(other); } + + bool operator!=(const void* null) const + { return !operator==(null); } + + const Node* operator->() const + { return (const Node*)this; } + Node* operator->() + { return this; } + + dom_str getNodeName() const + throw(DOMException) + { + ASSERT_VALID(); + S::SDOM_char* name; + _try_(S::SDOM_getNodeName(m_sit, m_node, &name)); + return TO_D(INT::_8str(name)); + } + + data_str getNodeValue() const + throw(DOMException) + { + ASSERT_VALID(); + S::SDOM_char* value; + _try_(S::SDOM_getNodeValue(m_sit, m_node, &value)); + return TO_V(INT::_8str(value)); + } + + void setNodeValue(const data_str& value) + throw(DOMException) + { + ASSERT_VALID(); + _try_(S::SDOM_setNodeValue(m_sit, m_node, FROM_V(value).c_str())); + } + + short getNodeType() const + throw(DOMException) + { + ASSERT_VALID(); + S::SDOM_NodeType type; + _try_(S::SDOM_getNodeType(m_sit, m_node, &type)); + return (short)type; + } + + Node getParentNode() const + throw(DOMException) + { + ASSERT_VALID(); + S::SDOM_Node parent; + _try_(S::SDOM_getParentNode(m_sit, m_node, &parent)); + return Node(m_sit, parent); + } + + NodeList getChildNodes() const + throw(DOMException); + + Node getFirstChild() const + throw(DOMException) + { + ASSERT_VALID(); + S::SDOM_Node child; + _try_(S::SDOM_getFirstChild(m_sit, m_node, &child)); + return Node(m_sit, child); + } + + Node getLastChild() const + throw(DOMException) + { + ASSERT_VALID(); + S::SDOM_Node child; + _try_(S::SDOM_getLastChild(m_sit, m_node, &child)); + return Node(m_sit, child); + } + + Node getPreviousSibling() const + throw(DOMException) + { + ASSERT_VALID(); + S::SDOM_Node sib; + _try_(S::SDOM_getPreviousSibling(m_sit, m_node, &sib)); + return Node(m_sit, sib); + } + + Node getNextSibling() const + throw(DOMException) + { + ASSERT_VALID(); + S::SDOM_Node sib; + _try_(S::SDOM_getNextSibling(m_sit, m_node, &sib)); + return Node(m_sit, sib); + } + + NamedNodeMap getAttributes() const + throw(DOMException); + + Document getOwnerDocument() const + throw(DOMException); + + Node insertBefore(const Node& newChild, const Node& refChild) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_VALID_NODE(newChild); + ASSERT_VALID_NODE(refChild); + _try_(S::SDOM_insertBefore(m_sit, m_node, newChild.m_node, refChild.m_node)); + return newChild; + } + + Node replaceChild(const Node& newChild, const Node& refChild) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_VALID_NODE(newChild); + ASSERT_VALID_NODE(refChild); + _try_(S::SDOM_replaceChild(m_sit, m_node, newChild.m_node, refChild.m_node)); + return newChild; + } + + Node removeChild(const Node& oldChild) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_VALID_NODE(oldChild); + _try_(S::SDOM_removeChild(m_sit, m_node, oldChild.m_node)); + return oldChild; + } + + Node appendChild(const Node& newChild) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_VALID_NODE(newChild); + _try_(S::SDOM_appendChild(m_sit, m_node, newChild.m_node)); + return newChild; + } + + bool hasChildNodes() const + throw(DOMException) + { + ASSERT_VALID(); + int count = 0; + _try_(S::SDOM_getChildNodeCount(m_sit, m_node, &count)); + return count != 0; + } + + Node cloneNode(bool deep) const + throw(DOMException) + { + ASSERT_VALID(); + S::SDOM_Node node; + _try_(S::SDOM_cloneNode(m_sit, m_node, deep ? 1 : 0, &node)); + return Node(m_sit, node); + } + + void normalize() + throw(DOMException) + { + ASSERT_VALID(); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + } + + bool isSupported(const dom_str& feature, + const dom_str& version) const + { + ASSERT_VALID(); + return false; + } + + dom_str getNamespaceURI() const + throw(DOMException) + { + ASSERT_VALID(); + S::SDOM_char* uri; + _try_(S::SDOM_getNodeNSUri(m_sit, m_node, &uri)); + return TO_D(INT::_8str(uri)); + } + + dom_str getPrefix() const + throw(DOMException) + { + ASSERT_VALID(); + S::SDOM_char* prefix; + _try_(S::SDOM_getNodePrefix(m_sit, m_node, &prefix)); + return TO_D(INT::_8str(prefix)); + } + + void setPrefix(const dom_str& prefix) + throw(DOMException) + { + ASSERT_VALID(); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + } + + dom_str getLocalName() const + throw(DOMException) + { + ASSERT_VALID(); + S::SDOM_char* name; + _try_(S::SDOM_getNodeLocalName(m_sit, m_node, &name)); + return TO_D(INT::_8str(name)); + } + + bool hasAttributes() const + throw (DOMException) + { + ASSERT_VALID(); + + if(getNodeType() != ELEMENT_NODE) + return false; + + int count = 0; + _try_(S::SDOM_getAttributeNodeCount(m_sit, m_node, &count)); + return count != 0; + } + + void* setUserData(void* data) + throw(DOMException) + { + ASSERT_VALID(); + void* old = S::SDOM_getNodeInstanceData(m_node); + S::SDOM_setNodeInstanceData(m_node, data); + return old; + } + + void* getUserData() const + throw(DOMException) + { + ASSERT_VALID(); + return S::SDOM_getNodeInstanceData(m_node); + } + + std::string serialize() const + throw(DOMException) + { + ASSERT_VALID(); + S::SDOM_Document doc; + _try_(S::SDOM_getOwnerDocument(m_sit, m_node, &doc)); + S::SDOM_char* serialized; + _try_(S::SDOM_nodeToString(m_sit, doc, m_node, &serialized)); + std::string ret(serialized); + S::SablotFree(serialized); + return ret; + } + + bool isValid() const + { + return Base::isValid() && + m_node != NULL; + } + + protected: + Node(S::SablotSituation sit, S::SDOM_Node node) : + INT::Base(sit) { m_node = node; } + + protected: + S::SDOM_Node m_node; + + friend class Document; + friend class INT::ChildNodeList; + friend class INT::AttrNodeList; + friend class INT::DOMNodeList; + friend class INT::AttrNamedNodeMap; + }; + + class Attr : + public Node + { + public: + Attr() { } + Attr(const Attr& node) : + Node(node) { } + + Attr& operator=(const Attr& other) + { Node::operator=(other); return *this; } + Attr& operator=(const void* null) + { Node::operator=(null); return *this; } + const Attr* operator->() const + { return (const Attr*)this; } + Attr* operator->() + { return this; } + + dom_str getName() const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(ATTRIBUTE_NODE); + return getNodeName(); + } + + Element getOwnerElement() const + throw(DOMException); + + bool getSpecified() const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(ATTRIBUTE_NODE); + return true; + } + + data_str getValue() const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(ATTRIBUTE_NODE); + return getNodeValue(); + } + + void setValue(const data_str& value) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(ATTRIBUTE_NODE); + setNodeValue(value); + } + + protected: + Attr(S::SablotSituation sit, S::SDOM_Node node) : + Node(sit, node) { } + + friend class Element; + friend class Document; + }; + + /** + * This wrapper class for an element + */ + class Element : + public Node + { + public: + Element() { } + Element(const Element& node) : + Node(node) {} + + Element& operator=(const Element& other) + { Node::operator=(other); return *this; } + Element& operator=(const void* null) + { Node::operator=(null); return *this; } + const Element* operator->() const + { return (const Element*)this; } + Element* operator->() + { return this; } + + dom_str getTagName() const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(ELEMENT_NODE); + return getNodeName(); + } + + data_str getAttribute(const dom_str& name) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(ELEMENT_NODE); + S::SDOM_char* value; + _try_(S::SDOM_getAttribute(m_sit, m_node, FROM_D(name).c_str(), &value)); + return TO_V(INT::_8str(value)); + } + + void setAttribute(const dom_str& name, const data_str& value) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(ELEMENT_NODE); + _try_(S::SDOM_setAttribute(m_sit, m_node, FROM_D(name).c_str(), + FROM_V(value).c_str())); + } + + void removeAttribute(const dom_str& name) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(ELEMENT_NODE); + _try_(S::SDOM_removeAttribute(m_sit, m_node, FROM_D(name).c_str())); + } + + Attr getAttributeNode(const dom_str& name) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(ELEMENT_NODE); + S::SDOM_Node attr; + _try_(S::SDOM_getAttributeNode(m_sit, m_node, FROM_D(name).c_str(), &attr)); + return Attr(m_sit, attr); + } + + Attr setAttributeNode(const Attr& attr) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_VALID_NODE(attr); + ASSERT_TYPE(ELEMENT_NODE); + S::SDOM_Node repl; + _try_(S::SDOM_setAttributeNode(m_sit, m_node, attr.m_node, &repl)); + return Attr(m_sit, repl); + } + + Attr removeAttributeNode(const Attr& attr) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_VALID_NODE(attr); + ASSERT_TYPE(ELEMENT_NODE); + S::SDOM_Node rem; + _try_(S::SDOM_removeAttributeNode(m_sit, m_node, attr.m_node, &rem)); + return Attr(m_sit, rem); + } + + NodeList getElementsByTagName(const dom_str& name) const + throw(DOMException); + + data_str getAttributeNS(const dom_str& uri, const dom_str& name) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(ELEMENT_NODE); + S::SDOM_char* value; + _try_(S::SDOM_getAttributeNS(m_sit, m_node, (char*)FROM_D(uri).c_str(), + (char*)FROM_D(name).c_str(), &value)); + return TO_V(INT::_8str(value)); + } + + void setAttributeNS(const dom_str& uri, const dom_str& name, + const data_str& value) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(ELEMENT_NODE); + _try_(S::SDOM_setAttributeNS(m_sit, m_node, FROM_D(uri).c_str(), + FROM_D(name).c_str(), FROM_V(value).c_str())); + } + + void removeAttributeNS(const dom_str& uri, const dom_str& name) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(ELEMENT_NODE); + DOM::Attr attr = getAttributeNodeNS(uri, name); + if(attr != NULL) + removeAttributeNode(attr); + } + + Attr getAttributeNodeNS(const dom_str& uri, const dom_str& name) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(ELEMENT_NODE); + S::SDOM_Node attr; + _try_(S::SDOM_getAttributeNodeNS(m_sit, m_node, (char*)FROM_D(uri).c_str(), + (char*)FROM_D(name).c_str(), &attr)); + return Attr(m_sit, attr); + } + + Attr setAttributeNodeNS(const Attr& attr) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_VALID_NODE(attr); + ASSERT_TYPE(ELEMENT_NODE); + S::SDOM_Node repl; + _try_(S::SDOM_setAttributeNodeNS(m_sit, m_node, attr.m_node, &repl)); + return Attr(m_sit, repl); + } + + NodeList getElementsByTagNameNS(const dom_str& uri, + const dom_str& name) const + throw(DOMException); + + bool hasAttribute(const dom_str& name) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(ELEMENT_NODE); + return getAttributeNode(name) != NULL; + } + + bool hasAttributeNS(const dom_str& uri, const dom_str& name) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(ELEMENT_NODE); + return getAttributeNodeNS(uri, name) != NULL; + } + + protected: + Element(S::SablotSituation sit, S::SDOM_Node node) : + Node(sit, node) { } + + friend class Attr; + friend class Document; + }; + + + class CharacterData : + public Node + { + public: + CharacterData() { } + CharacterData(const Node& node) : + Node(node) { } + + CharacterData& operator=(const CharacterData& other) + { Node::operator=(other); return *this; } + CharacterData& operator=(const void* null) + { Node::operator=(null); return *this; } + const CharacterData* operator->() const + { return (const CharacterData*)this; } + CharacterData* operator->() + { return this; } + + void appendData(const data_str& data) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT(getNodeType() == TEXT_NODE || + getNodeType() == CDATA_SECTION_NODE || + getNodeType() == COMMENT_NODE); + data_str val = getNodeValue(); + val.append(data); + setNodeValue(val); + } + + void deleteData(int offset, int count) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT(getNodeType() == TEXT_NODE || + getNodeType() == CDATA_SECTION_NODE || + getNodeType() == COMMENT_NODE); + data_str val = getNodeValue(); + val.erase(offset, count); + setNodeValue(val); + } + + data_str getData() const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT(getNodeType() == TEXT_NODE || + getNodeType() == CDATA_SECTION_NODE || + getNodeType() == COMMENT_NODE); + return getNodeValue(); + } + + int getLength() const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT(getNodeType() == TEXT_NODE || + getNodeType() == CDATA_SECTION_NODE || + getNodeType() == COMMENT_NODE); + return getNodeValue().size(); + } + + void insertData(int offset, const data_str& data) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT(getNodeType() == TEXT_NODE || + getNodeType() == CDATA_SECTION_NODE || + getNodeType() == COMMENT_NODE); + data_str val = getNodeValue(); + val.insert(offset, data); + setNodeValue(val); + } + + void replaceData(int offset, int count, const data_str& arg) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT(getNodeType() == TEXT_NODE || + getNodeType() == CDATA_SECTION_NODE || + getNodeType() == COMMENT_NODE); + data_str val = getNodeValue(); + val.erase(offset, count); + val.insert(offset, arg); + setNodeValue(val); + } + + void setData(const data_str& data) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT(getNodeType() == TEXT_NODE || + getNodeType() == CDATA_SECTION_NODE || + getNodeType() == COMMENT_NODE); + setNodeValue(data); + } + + data_str substringData(int offset, int count) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT(getNodeType() == TEXT_NODE || + getNodeType() == CDATA_SECTION_NODE || + getNodeType() == COMMENT_NODE); + data_str val = getNodeValue(); + return val.substr(offset, count); + } + + protected: + CharacterData(S::SablotSituation sit, S::SDOM_Node node) : + Node(sit, node) { } + }; + + class Text : + public CharacterData + { + public: + Text() { } + Text(const Node& node) : + CharacterData(node) { } + + Text& operator=(const Text& other) + { CharacterData::operator=(other); return *this; } + Text& operator=(const void* null) + { CharacterData::operator=(null); return *this; } + const Text* operator->() const + { return (const Text*)this; } + Text* operator->() + { return this; } + + Text splitText(int offset) + throw(DOMException); + + protected: + Text(S::SablotSituation sit, S::SDOM_Node node) : + CharacterData(sit, node) { } + + friend class Document; + }; + + class CDATASection : + public Text + { + public: + CDATASection() { } + CDATASection(const CDATASection& node) : + Text(node) { } + + CDATASection& operator=(const CDATASection& other) + { Text::operator=(other); return *this; } + CDATASection& operator=(void* null) + { Text::operator=(null); return *this; } + const CDATASection* operator->() const + { return (const CDATASection*)this; } + CDATASection* operator->() + { return this; } + + protected: + CDATASection(S::SablotSituation sit, S::SDOM_Node node) : + Text(sit, node) { } + + friend class Document; + }; + + class Comment : + public CharacterData + { + public: + Comment() { } + Comment(const Comment& node) : + CharacterData(node) { } + + Comment& operator=(const Comment& other) + { CharacterData::operator=(other); return *this; } + Comment& operator=(void* null) + { CharacterData::operator=(null); return *this; } + const Comment* operator->() const + { return (const Comment*)this; } + Comment* operator->() + { return this; } + + protected: + Comment(S::SablotSituation sit, S::SDOM_Node node) : + CharacterData(sit, node) { } + + friend class Document; + }; + + class ProcessingInstruction : + public Node + { + public: + ProcessingInstruction() { } + ProcessingInstruction(const ProcessingInstruction& node) : + Node(node) { } + + ProcessingInstruction& operator=(const ProcessingInstruction& other) + { Node::operator=(other); return *this; } + ProcessingInstruction& operator=(void* null) + { Node::operator=(null); return *this; } + const ProcessingInstruction* operator->() const + { return (const ProcessingInstruction*)this; } + ProcessingInstruction* operator->() + { return this; } + + data_str getData() const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(PROCESSING_INSTRUCTION_NODE); + return getNodeValue(); + } + + dom_str getTarget() const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(PROCESSING_INSTRUCTION_NODE); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return dom_str(); + } + + void setData(const data_str& data) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(PROCESSING_INSTRUCTION_NODE); + setNodeValue(data); + } + + protected: + ProcessingInstruction(S::SablotSituation sit, S::SDOM_Node node) : + Node(sit, node) { } + + friend class Document; + }; + + class DocumentFragment : + public Node + { + public: + DocumentFragment() { } + DocumentFragment(const DocumentFragment& node) : + Node(node) { } + + DocumentFragment& operator=(const DocumentFragment& other) + { Node::operator=(other); return *this; } + DocumentFragment& operator=(void* null) + { Node::operator=(null); return *this; } + const DocumentFragment* operator->() const + { return (const DocumentFragment*)this; } + DocumentFragment* operator->() + { return this; } + }; + + class Entity : + public Node + { + public: + Entity() { } + Entity(const Entity& node) : + Node(node) { } + + Entity& operator=(const Entity& other) + { Node::operator=(other); return *this; } + Entity& operator=(void* null) + { Node::operator=(null); return *this; } + const Entity* operator->() const + { return (const Entity*)this; } + Entity* operator->() + { return this; } + + dom_str getNotationName() const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(ENTITY_NODE); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return dom_str(); + } + + dom_str getPublicId() const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(ENTITY_NODE); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return dom_str(); + } + + dom_str getSystemId() const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(ENTITY_NODE); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return dom_str(); + } + }; + + class EntityReference : + public Node + { + public: + EntityReference() { } + EntityReference(const EntityReference& node) : + Node(node) { } + + EntityReference& operator=(const EntityReference& other) + { Node::operator=(other); return *this; } + EntityReference& operator=(void* null) + { Node::operator=(null); return *this; } + const EntityReference* operator->() const + { return (const EntityReference*)this; } + EntityReference* operator->() + { return this; } + }; + + class Notation : + public Node + { + public: + Notation() { } + Notation(const Notation& node) : + Node(node) { } + + Notation& operator=(const Notation& other) + { Node::operator=(other); return *this; } + Notation& operator=(void* null) + { Node::operator=(null); return *this; } + const Notation* operator->() const + { return (const Notation*)this; } + Notation* operator->() + { return this; } + + dom_str getPublicId() const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(NOTATION_NODE); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return dom_str(); + } + + dom_str getSystemId() const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(NOTATION_NODE); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return dom_str(); + } + }; + + class DocumentType : + public Node + { + public: + DocumentType() { } + DocumentType(const DocumentType& node) : + Node(node) { } + + DocumentType& operator=(const DocumentType& other) + { Node::operator=(other); return *this; } + DocumentType& operator=(void* null) + { Node::operator=(null); return *this; } + const DocumentType* operator->() const + { return (const DocumentType*)this; } + DocumentType* operator->() + { return this; } + + NamedNodeMap getEntities() const + throw(DOMException); + + dom_str getInternalSubset() const + throw(DOMException) + { + ASSERT_VALID(); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return dom_str(); + } + + dom_str getName() const + throw(DOMException) + { + ASSERT_VALID(); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return dom_str(); + } + + NamedNodeMap getNotations() const + throw(DOMException); + }; + + + class Document : + public Node + { + public: + Document() { } + Document(const Document& doc) : + Node(doc) { } + Document(S::SablotSituation sit, S::SDOM_Document doc) : + Node(sit, NULL) { m_node = doc; } + + Document& operator=(const Document& other) + { Node::operator=(other); return *this; } + Document& operator=(void* null) + { Node::operator=(null); return *this; } + const Document* operator->() const + { return (const Document*)this; } + Document* operator->() + { return this; } + + DocumentType getDocType() const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(DOCUMENT_NODE); + return DocumentType(); + } + + DOMImplementation getImplementation() const + throw(DOMException); + + Element getDocumentElement() const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(DOCUMENT_NODE); + S::SDOM_Node child; + _try_(S::SDOM_getFirstChild(m_sit, m_node, &child)); + return Element(m_sit, child); + } + + Element createElement(const dom_str& tag) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(DOCUMENT_NODE); + S::SDOM_Node el; + _try_(S::SDOM_createElement(m_sit, (S::SDOM_Document)m_node, + &el, FROM_D(tag).c_str())); + return Element(m_sit, el); + } + + DocumentFragment createDocumentFragment() const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(DOCUMENT_NODE); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return DocumentFragment(); + } + + Text createTextNode(const data_str& data) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(DOCUMENT_NODE); + S::SDOM_Node text; + _try_(S::SDOM_createTextNode(m_sit, (S::SDOM_Document)m_node, + &text, FROM_V(data).c_str())); + return Text(m_sit, text); + } + + Comment createComment(const data_str& data) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(DOCUMENT_NODE); + S::SDOM_Node comment; + _try_(S::SDOM_createComment(m_sit, (S::SDOM_Document)m_node, + &comment, FROM_V(data).c_str())); + return Comment(m_sit, comment); + } + + CDATASection createCDATASection(const data_str& data) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(DOCUMENT_NODE); + S::SDOM_Node cdata; + _try_(S::SDOM_createCDATASection(m_sit, (S::SDOM_Document)m_node, + &cdata, FROM_V(data).c_str())); + return CDATASection(m_sit, cdata); + } + + ProcessingInstruction createProcessingInstruction(const dom_str& targ, + const data_str& data) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(DOCUMENT_NODE); + S::SDOM_Node pi; + _try_(S::SDOM_createProcessingInstruction(m_sit, + (S::SDOM_Document)m_node, + &pi, FROM_D(targ).c_str(), + FROM_V(data).c_str())); + return ProcessingInstruction(m_sit, pi); + } + + Attr createAttribute(const dom_str& name) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(DOCUMENT_NODE); + S::SDOM_Node attr; + _try_(S::SDOM_createAttribute(m_sit, (S::SDOM_Document)m_node, + &attr, FROM_D(name).c_str())); + return Attr(m_sit, attr); + } + + EntityReference createEntityReference() + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(DOCUMENT_NODE); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return EntityReference(); + } + + NodeList getElementsByTagName(const dom_str& name) const + throw(DOMException); + + Node importNode(const Node& import, bool deep) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(DOCUMENT_NODE); + ASSERT_VALID_NODE(import); + S::SDOM_Node imported; + _try_(S::SDOM_cloneForeignNode(m_sit, (S::SDOM_Document)m_node, + import.m_node, deep ? 1 : 0, &imported)); + return Node(m_sit, imported); + } + + Element createElementNS(const dom_str& uri, const dom_str& tag) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(DOCUMENT_NODE); + S::SDOM_Node el; + _try_(S::SDOM_createElementNS(m_sit, (S::SDOM_Document)m_node, + &el, FROM_D(uri).c_str(), FROM_D(tag).c_str())); + return Element(m_sit, el); + } + + Attr createAttributeNS(const dom_str& uri, const dom_str& name) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(DOCUMENT_NODE); + S::SDOM_Node attr; + _try_(S::SDOM_createAttributeNS(m_sit, (S::SDOM_Document)m_node, + &attr, FROM_D(uri).c_str(), FROM_D(name).c_str())); + return Attr(m_sit, attr); + } + + NodeList getElementsByTagNameNS(const dom_str& uri, + const dom_str& name) const + throw(DOMException); + + Element getElementById(const dom_str& id) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(DOCUMENT_NODE); + dom_str query = "//*[id('" + id + "')]"; + S::SDOM_NodeList result; + _try_(S::SDOM_xql(m_sit, FROM_D(query).c_str(), m_node, &result)); + + int length; + _try_(S::SDOM_getNodeListLength(m_sit, result, &length)); + + Element ret; + if(length != 1) + { + ret = Element(); + } + else + { + S::SDOM_Node el; + _try_(S::SDOM_getNodeListItem(m_sit, result, 0, &el)); + ret = Element(m_sit, el); + } + + S::SDOM_disposeNodeList(m_sit, result); + return ret; + } + + std::string serialize() const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(DOCUMENT_NODE); + S::SDOM_char* serialized; + _try_(S::SDOM_docToString(m_sit, (S::SDOM_Document)m_node, &serialized)); + std::string ret(serialized); + S::SablotFree(serialized); + return ret; + } + + void release() + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(DOCUMENT_NODE); + S::SDOM_tmpListDump((S::SDOM_Document)m_node, 0); + if(S::SablotDestroyDocument(m_sit, (S::SDOM_Document)m_node)) + _try_(S::SDOM_NOT_OK); + *this = NULL; + } + + protected: + + friend class Node; + friend class DOMImplementation; + }; + + + class DOMImplementation : + public INT::Base + { + public: + DOMImplementation() + throw(DOMException) : INT::Base(NULL) + { + if(S::SablotCreateSituation(&m_sit)) + _try_(S::SDOM_NOT_OK); + } + DOMImplementation(S::SablotSituation sit) + throw(DOMException) : INT::Base(sit) { } + DOMImplementation(const DOMImplementation& impl) : + INT::Base(impl) { } + + DOMImplementation& operator=(const DOMImplementation& other) + { Base::operator=(other); return *this; } + DOMImplementation& operator=(void* null) + { Base::operator=(null); return *this; } + const DOMImplementation* operator->() const + { return (const DOMImplementation*)this; } + DOMImplementation* operator->() + { return this; } + + Document createDocument(const dom_str& uri, const dom_str& qname, + const DocumentType& type) const + throw(DOMException) + { + ASSERT_VALID(); + S::SDOM_Document doc; + if(S::SablotCreateDocument(m_sit, &doc)) + _try_(S::SDOM_NOT_OK); + + Document document(m_sit, doc); + + if(!qname.empty()) + { + if(!uri.empty()) + document.appendChild(document.createElementNS(uri, qname)); + else + document.appendChild(document.createElement(qname)); + } + + return document; + } + + DocumentType createDocumentType(const dom_str& qname, + const dom_str& publicId, + const dom_str& systemId) const + throw(DOMException) + { + ASSERT_VALID(); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return DocumentType(); + } + + bool hasFeature(const dom_str& feature, const dom_str& version) const + throw(DOMException) + { + ASSERT_VALID(); + return false; + } + + void release() + throw(DOMException) + { + ASSERT_VALID(); + if(S::SablotDestroySituation(m_sit)) + _try_(S::SDOM_NOT_OK); + } + }; + + namespace INT + { + class NodeList : + public INT::Base, + public INT::Inst + { + public: +#ifdef COMPARE_REF + virtual bool operator==(const NodeList& other) const + { return Base::operator==(other); } +#endif + virtual int getLength() const + throw(DOMException) + { + ASSERT_VALID(); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return 0; + } + + virtual Node item(int index) const + throw(DOMException) + { + ASSERT_VALID(); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return Node(); + } + + protected: + NodeList(S::SablotSituation sit) : + INT::Base(sit) { } + + virtual bool isValid() const + { + return false; + } + + private: + NodeList(const NodeList& list) : INT::Base(list) { } + }; + + class ChildNodeList : + public NodeList + { + public: +#ifdef COMPARE_REF + virtual bool operator==(const NodeList& other) const + { return m_el == ((ChildNodeList)other).m_el; } +#endif + virtual int getLength() const + throw(DOMException) + { + ASSERT_VALID(); + int length; + _try_(S::SDOM_getChildNodeCount(m_sit, m_el, &length)); + return length; + } + + virtual Node item(int index) const + throw(DOMException) + { + ASSERT_VALID(); + S::SDOM_Node attr; + _try_(S::SDOM_getChildNodeIndex(m_sit, m_el, index, &attr)); + return Node(m_sit, attr); + } + + protected: + ChildNodeList(S::SablotSituation sit, S::SDOM_Node node) : + NodeList(sit) { m_el = node; } + + virtual bool isValid() const + { + return m_el != NULL; + } + + protected: + S::SDOM_Node m_el; + + friend class Node; + }; + + class AttrNodeList : + public NodeList + { + public: +#ifdef COMPARE_REF + virtual bool operator==(const NodeList& other) const + { return m_el == ((AttrNodeList)other).m_el; } +#endif + virtual int getLength() const + throw(DOMException) + { + ASSERT_VALID(); + int length; + _try_(S::SDOM_getAttributeNodeCount(m_sit, m_el, &length)); + return length; + } + + virtual Node item(int index) const + throw(DOMException) + { + ASSERT_VALID(); + S::SDOM_Node attr; + _try_(S::SDOM_getAttributeNodeIndex(m_sit, m_el, index, &attr)); + return Node(m_sit, attr); + } + + protected: + AttrNodeList(S::SablotSituation sit, S::SDOM_Node el) : + NodeList(sit) { m_el = el; } + + virtual bool isValid() const + { + return m_el != NULL; + } + + protected: + S::SDOM_Node m_el; + }; + + + class DOMNodeList : + public NodeList + { + public: +#ifdef COMPARE_REF + virtual bool operator==(const NodeList& other) const + { return m_list == ((DOMNodeList&)other).m_list; } +#endif + virtual int getLength() const + throw(DOMException) + { + ASSERT_VALID(); + int length; + _try_(S::SDOM_getNodeListLength(m_sit, m_list, &length)); + return length; + } + + virtual Node item(int index) const + throw(DOMException) + { + ASSERT_VALID(); + S::SDOM_Node it; + _try_(S::SDOM_getNodeListItem(m_sit, m_list, index, &it)); + return Node(m_sit, it); + } + + protected: + DOMNodeList(S::SablotSituation sit, S::SDOM_NodeList list) : + NodeList(sit) + { + m_list = list; + } + + ~DOMNodeList() + { + if(m_list != NULL) + S::SDOM_disposeNodeList(m_sit, m_list); + m_list = NULL; + } + + virtual bool isValid() const + { + return m_list != NULL; + } + + protected: + S::SDOM_NodeList m_list; + + friend class Element; + friend class Document; + }; + + + class NamedNodeMap : + public INT::Base, + public INT::Inst + { + public: +#ifdef COMPARE_REF + virtual bool operator==(const NamedNodeMap& other) const + { Base::operator==(other); } +#endif + virtual int getLength() const + throw(DOMException) + { + ASSERT_VALID(); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return 0; + } + + virtual Node getNamedItem(const dom_str& name) const + throw(DOMException) + { + ASSERT_VALID(); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return Node(); + } + + virtual Node getNamedItemNS(const dom_str& uri, const dom_str& name) const + throw(DOMException) + { + ASSERT_VALID(); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return Node(); + } + + virtual Node item(int index) const + throw(DOMException) + { + ASSERT_VALID(); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return Node(); + } + + virtual Node removeNamedItem(const dom_str& name) + throw(DOMException) + { + ASSERT_VALID(); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return Node(); + } + + virtual Node removeNamedItemNS(const dom_str& uri, const dom_str& name) + throw(DOMException) + { + ASSERT_VALID(); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return Node(); + } + + virtual Node setNamedItem(const Node& arg) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_VALID_NODE(arg); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return Node(); + } + + virtual Node setNamedItemNS(const Node& arg) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_VALID_NODE(arg); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return Node(); + } + + protected: + NamedNodeMap(S::SablotSituation sit) : + INT::Base(sit) { } + + virtual bool isValid() const + { + return false; + } + + private: + NamedNodeMap(const NamedNodeMap& map) : INT::Base(map) { } + }; + + class AttrNamedNodeMap : + public NamedNodeMap + { + public: +#ifdef COMPARE_REF + virtual bool operator==(const NamedNodeMap& other) const + { return m_el == ((AttrNamedNodeMap)other).m_el; } +#endif + virtual int getLength() const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_VALID_NODE(m_el); + int length; + _try_(S::SDOM_getAttributeNodeCount(m_sit, m_el.m_node, &length)); + return length; + } + + virtual Node getNamedItem(const dom_str& name) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_VALID_NODE(m_el); + return m_el.getAttributeNode(name); + } + + virtual Node getNamedItemNS(const dom_str& uri, const dom_str& name) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_VALID_NODE(m_el); + return m_el.getAttributeNodeNS(uri, name); + } + + virtual Node item(int index) const + throw(DOMException) + { + ASSERT_VALID(); + S::SDOM_Node attr; + _try_(S::SDOM_getAttributeNodeIndex(m_sit, m_el.m_node, index, &attr)); + return Node(m_sit, attr); + } + + virtual Node removeNamedItem(const dom_str& name) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_VALID_NODE(m_el); + Node node = getNamedItem(name); + if(node != NULL) + m_el.removeAttributeNode((Attr&)node); + return node; + } + + virtual Node removeNamedItemNS(const dom_str& uri, const dom_str& name) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_VALID_NODE(m_el); + Node node = getNamedItemNS(uri, name); + if(node != NULL) + m_el.removeAttributeNode((Attr&)node); + return node; + } + + virtual Node setNamedItem(const Node& arg) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_VALID_NODE(m_el); + return m_el.setAttributeNode((Attr&)arg); + } + + virtual Node setNamedItemNS(const Node& arg) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_VALID_NODE(m_el); + return m_el.setAttributeNodeNS((Attr&)arg); + } + + protected: + AttrNamedNodeMap(S::SablotSituation sit, const Node& el) : + NamedNodeMap(sit) + { + ASSERT_VALID_NODE(el); + ASSERT_NODE_TYPE(el, Node::ELEMENT_NODE); + m_el = (Element&)el; + } + + virtual bool isValid() const + { + return m_el.isValid(); + } + + + protected: + Element m_el; + + friend class Node; + }; + }; + + + inline NodeList Node::getChildNodes() const + throw(DOMException) + { + ASSERT_VALID(); + return NodeList(new INT::ChildNodeList(m_sit, m_node)); + } + + inline NamedNodeMap Node::getAttributes() const + throw(DOMException) + { + ASSERT_VALID(); + if(getNodeType() != ELEMENT_NODE) + return NamedNodeMap(NULL); + + return NamedNodeMap(new INT::AttrNamedNodeMap(m_sit, *this)); + } + + inline Document Node::getOwnerDocument() const + throw(DOMException) + { + ASSERT_VALID(); + S::SDOM_Document doc; + _try_(S::SDOM_getOwnerDocument(m_sit, m_node, &doc)); + return Document(m_sit, doc); + } + + inline Element Attr::getOwnerElement() const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(ATTRIBUTE_NODE); + S::SDOM_Node el; + _try_(S::SDOM_getAttributeElement(m_sit, m_node, &el)); + return Element(m_sit, el); + } + + inline NodeList Element::getElementsByTagName(const dom_str& name) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(ELEMENT_NODE); + dom_str query = "descendant::" + name; + S::SDOM_NodeList result; + _try_(S::SDOM_xql(m_sit, FROM_D(query).c_str(), m_node, &result)); + return NodeList(new INT::DOMNodeList(m_sit, result)); + } + + inline NodeList Element::getElementsByTagNameNS(const dom_str& uri, + const dom_str& name) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(ELEMENT_NODE); + dom_str query = "descendant::*[namespace-uri()='" + uri + "'"; + if(name != "*") + query += " and local-name()='" + name + "']"; + S::SDOM_NodeList result; + _try_(S::SDOM_xql(m_sit, FROM_D(query).c_str(), m_node, &result)); + return NodeList(new INT::DOMNodeList(m_sit, result)); + } + + inline NodeList Document::getElementsByTagName(const dom_str& name) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(DOCUMENT_NODE); + dom_str query = "descendant::" + name; + S::SDOM_NodeList result; + _try_(S::SDOM_xql(m_sit, FROM_D(query).c_str(), m_node, &result)); + return NodeList(new INT::DOMNodeList(m_sit, result)); + } + + inline NodeList Document::getElementsByTagNameNS(const dom_str& uri, + const dom_str& name) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(DOCUMENT_NODE); + dom_str query = "descendant::*[namespace-uri()='" + uri + "'"; + if(name != "*") + query += " and local-name()='" + name + "']"; + S::SDOM_NodeList result; + _try_(S::SDOM_xql(m_sit, FROM_D(query).c_str(), m_node, &result)); + return NodeList(new INT::DOMNodeList(m_sit, result)); + } + + inline DOMImplementation Document::getImplementation() const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(DOCUMENT_NODE); + return DOMImplementation(m_sit); + } + + inline Text Text::splitText(int offset) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT(getNodeType() == TEXT_NODE || + getNodeType() == CDATA_SECTION_NODE); + + data_str val = getNodeValue(); + setNodeValue(val.substr(0, offset)); + + Document doc = getOwnerDocument(); + ASSERT(doc != NULL); + + Text split(m_sit, NULL); + val = val.substr(0, offset); + + switch(getNodeType()) + { + case TEXT_NODE: + split = doc.createTextNode(val); + break; + + case CDATA_SECTION_NODE: + split = doc.createCDATASection(val); + break; + + default: + ASSERT(false); + }; + + Node parent = getParentNode(); + if(parent != NULL) + { + Node next = getNextSibling(); + if(next != NULL) + parent.insertBefore(split, next); + else + parent.appendChild(split); + } + + return split; + } + + inline NamedNodeMap DocumentType::getEntities() const + throw(DOMException) + { + ASSERT_VALID(); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return NamedNodeMap(NULL); + } + + inline NamedNodeMap DocumentType::getNotations() const + throw(DOMException) + { + ASSERT_VALID(); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return NamedNodeMap(NULL); + } + +}; // namespace DOM + + +#endif //__SABLO_H__
\ No newline at end of file diff --git a/src/sablo.txt b/src/sablo.txt new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/src/sablo.txt diff --git a/src/sablotr.cpp b/src/sablotr.cpp new file mode 100644 index 0000000..465648a --- /dev/null +++ b/src/sablotr.cpp @@ -0,0 +1,121 @@ +// +// AUTHOR +// N. Nielsen +// +// LICENSE +// This software is in the public domain. +// +// The software is provided "as is", without warranty of any kind, +// express or implied, including but not limited to the warranties +// of merchantability, fitness for a particular purpose, and +// noninfringement. In no event shall the author(s) be liable for any +// claim, damages, or other liability, whether in an action of +// contract, tort, or otherwise, arising from, out of, or in connection +// with the software or the use or other dealings in the software. +// +// SUPPORT +// Send bug reports to: <nielsen@memberwebs.com> +// +// SITE +// http://memberwebs.com/nielsen/ +// + +// SABLOTRON C++ WRAPPER CONVERSION FUNCTIONS +// + +#include "wchar.h" +#include "sablo.h" + +bool DOM::transcode16to8(const std::basic_string<wchar_t>& data, + std::basic_string<char>& ret) +{ + ret.resize(0); + ret.reserve(data.length() + (data.length() / 2)); + + // basic_string.c_str doesn't work properly everywhere + // most notably not in the g++ std library + + const wchar_t* c = data.length() ? data.data() : L""; + const wchar_t* e = c + data.length(); + + for( ; c != e; c++) + { + if(*c <= 0x007F) + { + ret.append(1, (char)*c); + } + else if(*c <= 0x07FF) + { + ret.append(1, (char)(192 | (*c >> 6))); + ret.append(1, (char)(128 | (*c & 63))); + } + else + { + ret.append(1, (char)(224 | (*c >> 12))); + ret.append(1, (char)(128 | ((*c >> 6) & 63))); + ret.append(1, (char)(128 | (*c & 63)) ); + } + } + + return true; +} + +bool DOM::transcode8to16(const std::basic_string<char>& data, + std::basic_string<wchar_t>& ret) +{ + ret.resize(0); + ret.reserve(data.length()); + + // basic_string.c_str doesn't work properly everywhere + // most notably not in the g++ std library + + const char* c = data.length() ? data.data() : ""; + const char* e = c + data.length(); + + for( ; c != e; c++) + { + // First 4 bits set + if((c[0] & 0xF8) == 0xF0 && + (c[1] & 0xC0) == 0x80 && + (c[2] & 0xC0) == 0x80 && + (c[3] & 0xC0) == 0x80) + { + ret.append(1, ((wchar_t)c[0] & 7) << 18 | + ((wchar_t)c[1] & 63) << 12 | + ((wchar_t)c[2] & 63) << 6 | + ((wchar_t)c[3] & 63)); + c += 3; + } + + // First 3 bits set + else if((c[0] & 0xF0) == 0xE0 && + (c[1] & 0xC0) == 0x80 && + (c[2] & 0xC0) == 0x80) + { + ret.append(1, ((wchar_t)c[0] & 15) << 12 | + ((wchar_t)c[1] & 63) << 6 | + ((wchar_t)c[2] & 63)); + c += 2; + } + + // First 2 bits set + else if((c[0] & 0xE0) == 0xC0 && + (c[1] & 0xC0) == 0x80) + { + ret.append(1, ((wchar_t)c[0] & 31) << 6 | + ((wchar_t)c[1] & 63)); + c += 1; + } + + // First bit set + else if(!(c[0] & 0x80)) + { + ret.append(1, (wchar_t)c[0]); + } + + else + return false; + } + + return true; +} diff --git a/src/stdafx.cpp b/src/stdafx.cpp new file mode 100644 index 0000000..1679211 --- /dev/null +++ b/src/stdafx.cpp @@ -0,0 +1,8 @@ +// stdafx.cpp : source file that includes just the standard includes +// rtfm.pch will be the pre-compiled header +// stdafx.obj will contain the pre-compiled type information + +#include "stdafx.h" + +// TODO: reference any additional headers you need in STDAFX.H +// and not in this file diff --git a/src/stdafx.h b/src/stdafx.h new file mode 100644 index 0000000..4876942 --- /dev/null +++ b/src/stdafx.h @@ -0,0 +1,21 @@ +// stdafx.h : include file for standard system include files, +// or project specific include files that are used frequently, but +// are changed infrequently +// + +#if !defined(AFX_STDAFX_H__AB96EF4C_5FF8_4004_96FA_747CCDFE3E7C__INCLUDED_) +#define AFX_STDAFX_H__AB96EF4C_5FF8_4004_96FA_747CCDFE3E7C__INCLUDED_ + +#if _MSC_VER > 1000 +#pragma once +#endif // _MSC_VER > 1000 + + +// TODO: reference additional headers your program requires here +#include <stdlib.h> +#include <debug.h> + +//{{AFX_INSERT_LOCATION}} +// Microsoft Visual C++ will insert additional declarations immediately before the previous line. + +#endif // !defined(AFX_STDAFX_H__AB96EF4C_5FF8_4004_96FA_747CCDFE3E7C__INCLUDED_) diff --git a/src/usuals.h b/src/usuals.h new file mode 100644 index 0000000..8cb7526 --- /dev/null +++ b/src/usuals.h @@ -0,0 +1,8 @@ +#ifndef __USUALS_H__ +#define __USUALS_H__ + +#ifndef NULL +#define NULL ((void*)0) +#endif + +#endif // __USUALS_H__
\ No newline at end of file diff --git a/src/xmlcomposehelpers.cpp b/src/xmlcomposehelpers.cpp new file mode 100644 index 0000000..4f6f106 --- /dev/null +++ b/src/xmlcomposehelpers.cpp @@ -0,0 +1,103 @@ + +#include "stdafx.h" +#include "RtfParseHelpers.h" + +Level::Level() +{ + m_previous = NULL; + m_element = NULL; + m_destination = NULL; + m_analyser = NULL; +} + +Level::Level(const Level& level) : + m_text(level.m_text) +{ + m_element = NULL; + m_destination = NULL; + m_analyser = NULL; + m_previous = &level; +} + +Level::~Level() +{ + +} + +LevelPtr Level::pushLevel() +{ + LevelPtr level = new Level(*this); + return level; +} + +LevelPtr Level::getPrevious() +{ + return m_previous; +} + +DOM::Element Level::getElement() +{ + if(m_element != NULL) + return m_element; + else if(m_previous) + return m_previous->getElement(); + else + ASSERT(0 && "should always have an element"); + + return DOM::Element(); +} + +void Level::setElement(DOM::Element element, bool deep) +{ + if(deep && m_previous && m_element == NULL) + m_previous->setElement(element, deep); + else + m_element = element; +} + +AnalyserPtr Level::getAnalyser() +{ + if(m_analyser) + return m_analyser; + else if(m_previous) + return m_previous->getAnalyser(); + else + return NULL; +} + +void Level::setAnalyser(AnalyserPtr analyser, bool deep) +{ + if(deep && m_previous && !m_analyser) + m_previous->setAnalyser(analyser, deep); + else + m_analyser = analyser; +} + +DestinationPtr Level::getDestination() +{ + if(m_destination) + return m_destination; + else if(m_previous) + return m_previous->getDestination(); + else + return NULL; +} + +void Level::setDestination(DestinationPtr destination, bool deep) +{ + if(deep && m_previous && !m_destination) + m_previous->setDestination(destination, deep); + else + m_destination = destination; +} + +RtfFormatting& Level::getFormatting() +{ + return m_text; +} + +void Level::setTextProperties(RtfFormatting& formatting) +{ + m_text.copy(formatting); +} + diff --git a/src/xmlcomposehelpers.h b/src/xmlcomposehelpers.h new file mode 100644 index 0000000..f6c06e7 --- /dev/null +++ b/src/xmlcomposehelpers.h @@ -0,0 +1,84 @@ +#ifndef __RTFPARSEHELPERS_H__ +#define __RTFPARSEHELPERS_H__ + +#include "Reference.h" +#include <mystring.h> +#include "sablo.h" +#include "RtfFormatting.h" + +class RtfParser; + +class Destination : + public Instance +{ +public: + // This is called when the Destination is first used + virtual void initialize() {}; + // Called when data arrives at destination + virtual void charData(wstring data) {}; + // Called when the Destination goes out of scope + virtual void done() {}; + +protected: + RtfParser* m_parser; + friend class RtfParser; +}; + + +class Analyser : + public Instance +{ +public: + // This is called when the Analyser is first used + virtual void initialize() {}; + // Called when a control word is seen + virtual void controlWord(const string& cw, int flags, int param) {}; + // Called when a group is seen within this scope + virtual void groupStart() {}; + // Called when a group ends within this scope + virtual void groupEnd() {}; + // Called when when this analyser goes out of scope + virtual void done() {}; + +protected: + RtfParser* m_parser; + friend class RtfParser; +}; + +class Level; + +typedef Reference<Destination> DestinationPtr; +typedef Reference<Analyser> AnalyserPtr; +typedef Reference<Level> LevelPtr; + +class Level : + public Instance +{ +public: + Level(); + virtual ~Level(); + + LevelPtr getPrevious(); + LevelPtr pushLevel(); + + DOM::Element getElement(); + void setElement(DOM::Element element, bool deep = false); + AnalyserPtr getAnalyser(); + void setAnalyser(AnalyserPtr analyser, bool deep = false); + DestinationPtr getDestination(); + void setDestination(DestinationPtr destination, bool deep = false); + RtfFormatting& getFormatting(); + void setTextProperties(RtfFormatting& textProperties); + +protected: + Level(const Level& level); + + LevelPtr m_previous; + DOM::Element m_element; + RtfFormatting m_text; + DestinationPtr m_destination; + AnalyserPtr m_analyser; +}; + + +#endif //__RTFPARSEHELPERS_H__
\ No newline at end of file diff --git a/src/xmlcomposer.cpp b/src/xmlcomposer.cpp new file mode 100644 index 0000000..1bfeb30 --- /dev/null +++ b/src/xmlcomposer.cpp @@ -0,0 +1,1811 @@ +// RtfParser.cpp: implementation of the RtfParser class. +// +////////////////////////////////////////////////////////////////////// + +#include "stdafx.h" +#include "RtfAnalyser.h" + +const char* kElDest = "i_dest"; +const char* kElBlock = "i_block"; +const char* kAtFix = "i_fix"; +const char* kAtCell = "i_cell"; +const char* kElListtable = "i_listtable"; +const char* kElListdef = "i_listdef"; + +const char* kElPara = "para"; +const char* kElDoc = "document"; +const char* kElTab = "tab"; +const char* kElSect = "sect"; +const char* kElPage = "page"; +const char* kElStyle = "style"; +const char* kElLine = "line"; +const char* kElList = "list"; +const char* kElStylesheet = "stylesheet"; +const char* kElInfo = "info"; +const char* kElTitle = "title"; +const char* kElAuthor = "author"; +const char* kElOperator = "operator"; +const char* kElB = "b"; +const char* kElHide = "hide"; +const char* kElI = "i"; +const char* kElStrike = "strike"; +const char* kElU = "u"; +const char* kElColor = "color"; +const char* kElCell = "cell"; +const char* kElRow = "row"; +const char* kElTable = "table"; + +const char* kAtList = "list"; +const char* kAtName = "name"; +const char* kAtBold = "bold"; +const char* kAtHidden = "hide"; +const char* kAtItalic = "italic"; +const char* kAtStrike = "strike"; +const char* kAtUnderline = "underline"; +const char* kAtColor = "color"; +const char* kAtType = "type"; +const char* kAtOrdered = "ordered"; +const char* kAtStart = "start"; +const char* kAtId = "id"; +const char* kAtIndex = "id"; + +const wchar_t* kValDisc = L"disc"; +const wchar_t* kValLowerAlpha = L"lower-alpha"; +const wchar_t* kValUpperAlpha = L"upper-alpha"; +const wchar_t* kValLowerRoman = L"lower-roman"; +const wchar_t* kValUpperRoman = L"upper-roman"; +const wchar_t* kValArabic = L"arabic"; +const wchar_t* kValNull = L""; + +const wchar_t* kValList = L"list"; +const wchar_t* kValPara = L"para"; +const wchar_t* kValTable = L"table"; + +const char* kNoDuplicates[] = + { kElB, kElU, kElI, kElColor, kElHide, kElColor, NULL }; + +const char* kRemoveTags[] = + { kElDest, kElListdef, kElListtable, NULL }; + +const char* kBlockTags[] = + { kElTable, kElPara, NULL }; + +const char* kHideList[] = + { kAtId, kAtList, NULL }; + +const char* kNSPrefix = "xmlns"; + +////////////////////////////////////////////////////////////////////// +// Construction/Destruction +////////////////////////////////////////////////////////////////////// + +RtfParser::RtfParser(const RtfParserOptions& options) +{ + m_document = NULL; + memcpy(&m_options, &options, sizeof(options)); +} + +RtfParser::~RtfParser() +{ + clear(); + + if(m_impl != NULL) + m_impl.release(); +} + +void RtfParser::clear() +{ + if(m_document != NULL) + { + try + { + m_document.release(); + } + catch(...) { } + + m_document = NULL; + } + LevelHandler::clear(); +} + +void RtfParser::startDocument(RtfReader* reader) +{ + LevelHandler::startDocument(reader); + + // Create a new document + m_document = m_impl.createDocument("", kElDoc, DOM::DocumentType()); + + // TODO: Throw error if document is null + ASSERT(m_document != NULL); + + ASSERT(m_curLevel != NULL); + m_curLevel->setElement(m_document.getDocumentElement(), true); + + // Set the attributes on the top level + setAnalyser(AnalyserPtr(new Root)); + setDestination(DestinationPtr(new Content)); + getTextFormatting().resetPara(); + getTextFormatting().resetText(); +} + +void RtfParser::endDocument() +{ + LevelHandler::endDocument(); + + // Cleanup the tree + removeDuplicates(m_document); + breakTables(m_document); + breakTags(m_document, kElTable, kElRow); + breakTags(m_document, kElRow, kElCell); + wrapTags(m_document, kElCell, kElDest); + breakBlocks(m_document); + breakLists(m_document); + fixLists(m_document); + fixStyles(m_document); + fixBlocks(m_document); + removeTags(m_document); + breakBreak(m_document, kElDoc, kElPage); + breakBreak(m_document, kElDoc, kElSect); + return; +} + + + + + +// ----------------------------------------------------------------------- +// Helper functions + +DOM::Element RtfParser::createElement(const string& name) +{ + ASSERT(name.length() > 0); + return m_document.createElement(name); + + // TODO: Throw exception here if necessary +} + +void RtfParser::replaceElement(const DOM::Element& element) +{ + ASSERT(m_curLevel != NULL); + m_curLevel->setElement(element, true); +} + +void RtfParser::pushElement(const DOM::Element& element) +{ + ASSERT(m_curLevel != NULL); + getElement().appendChild(element); + m_curLevel->setElement(element); +} + +DOM::Element RtfParser::popElement() +{ + DOM::Element element = getElement(); + ASSERT(m_curLevel != NULL); + + DOM::Node parent = element.getParentNode(); + ASSERT(parent.getNodeType() == DOM::Node::ELEMENT_NODE); + + // Set it deep so it replaces the current element + m_curLevel->setElement((DOM::Element&)parent, true); + return element; +} + +void RtfParser::setAttribute(const string& name, const wstring& value, DOM::Element el) +{ + ASSERT(name.length() > 0); + if(el == NULL) + el = getElement(); + el.setAttribute(name, value); +} + +void RtfParser::setDestination(DestinationPtr dest) +{ + ASSERT(m_curLevel); + + m_curLevel->setDestination(dest); + dest->m_parser = this; + dest->initialize(); +} + +DestinationPtr RtfParser::replaceDestination(DestinationPtr dest) +{ + ASSERT(m_curLevel); + + DestinationPtr old = m_curLevel->getDestination(); + m_curLevel->setDestination(dest, true); + dest->m_parser = this; + dest->initialize(); + + return old; +} + + +void RtfParser::setAnalyser(AnalyserPtr analy) +{ + ASSERT(m_curLevel); + ASSERT(analy != NULL); + + analy->m_parser = this; + m_curLevel->setAnalyser(analy); + analy->initialize(); +} + +AnalyserPtr RtfParser::getAnalyser() +{ + ASSERT(m_curLevel); + return m_curLevel->getAnalyser(); +} + +DestinationPtr RtfParser::getDestination() +{ + ASSERT(m_curLevel); + return m_curLevel->getDestination(); +} + +RtfFormatting& RtfParser::getTextFormatting() +{ + ASSERT(m_curLevel); + return m_curLevel->getFormatting(); +} + + +// --------------------------------------------------------------------------------- +// Pass this stuff on through to the appropriate analysers etc... + +void RtfParser::charData(wstring data) +{ + ASSERT(m_curLevel != NULL); + DestinationPtr destination = m_curLevel->getDestination(); + if(destination) + { + destination->charData(data); + } + else + { + destination = DestinationPtr(new Content); + setDestination(destination); + } + +} + +void RtfParser::controlWord(const string& cw, int flags, int param) +{ + ASSERT(m_curLevel != NULL); + AnalyserPtr analyser = m_curLevel->getAnalyser(); + if(analyser) + analyser->controlWord(cw, flags, param); +} + +void RtfParser::groupStart() +{ + LevelHandler::groupStart(); + + ASSERT(m_curLevel != NULL); + AnalyserPtr analyser = m_curLevel->getAnalyser(); + if(analyser) + analyser->groupStart(); +} + +void RtfParser::groupEnd() +{ + ASSERT(m_curLevel != NULL); + AnalyserPtr analyser = m_curLevel->getAnalyser(); + if(analyser) + analyser->groupEnd(); + + LevelHandler::groupEnd(); +} + +#define ON_INITIALIZE(cls) \ + void RtfParser::cls::initialize() +#define ON_CONTROLWORD(cls) \ + void RtfParser::cls::controlWord(const string& cw, int flags, int param) +#define ON_CHARDATA(cls) \ + void RtfParser::cls::charData(wstring data) +#define ON_GROUPSTART(cls) \ + void RtfParser::cls::groupStart() +#define ON_GROUPEND(cls) \ + void RtfParser::cls::groupEnd() +#define ON_DONE(cls) \ + void RtfParser::cls::done() +#define AN_ELEMENT(name) \ + m_parser->pushElement(m_parser->createElement(name)) +#define AN_POP_ELEMENT() \ + m_parser->popElement() +#define AN_ATTRIBUTE(name, value) \ + m_parser->setAttribute(name, value) +#define AN_DESTINATION_ATTR(name) \ + m_parser->setDestination(new Attribute(name)) +#define AN_DESTINATION(cls) \ + m_parser->setDestination(new cls) +#define AN_ANALYSER(cls) \ + m_parser->setAnalyser(AnalyserPtr(new cls)) +#define AN_SET_ANALYSER(cls) \ + m_parser->setAnalyser(AnalyserPtr(cls)) +#define HAS_PARAM (flags & kHasParam) +#define DEFAULT_CONTROLWORD processDefault(cw, flags, param) +#define DUMMY 1 == 1 +#define NUM_ATTR(n) m_parser->formatInt(n) + +bool RtfParser::ParseAnalyser::processDefault(const string& cw, int flags, int param) +{ + if(cw == "upr") + { + AnalyserPtr analy = m_parser->getAnalyser(); + ASSERT(analy != NULL); + AN_SET_ANALYSER(new Upr(analy)); + return true; + } + + return false; +} + +void RtfParser::ParseAnalyser::applyParaFormatting(RtfFormatting* format, + DOM::Element& el) +{ + if(format == NULL) + format = &(m_parser->getTextFormatting()); + + wstring fix = kValPara; + + int list = format->paraList(); + if(list != -1) + { + el.setAttribute(kAtList, NUM_ATTR(list)); + } + else + { + el.removeAttribute(kAtList); + } + + if(format->paraInTable()) + el.setAttribute(kAtCell, L"1"); + else + el.removeAttribute(kAtCell); + + int style = format->paraStyle(); + if(style != -1) + el.setAttribute(kElStyle, NUM_ATTR(style)); + else + el.removeAttribute(kElStyle); + + el.setAttribute(kAtFix, fix); +} + +DOM::Element RtfParser::ParseAnalyser::getCurrentBlock() +{ + DOM::Node node = m_parser->getElement(); + + if(node.hasChildNodes()) + node = node.getLastChild(); + + return m_parser->getPriorElement(node, kElBlock); + +} + +bool RtfParser::ParseAnalyser::processTextContent(const string& cw, int flags, int param) +{ + DOM::Element el; + bool process = false; + + RtfFormatting& format = m_parser->getTextFormatting(); + + if(cw == "par") + { + el = getCurrentBlock(); + if(el != NULL) + applyParaFormatting(&format, el); + + el = m_parser->createElement(kElBlock); + applyParaFormatting(&format, el); + } + + else if(cw == "intbl") + format.paraSetTable(true); + + else if(cw == "cell") + { + el = getCurrentBlock(); + if(el != NULL) + applyParaFormatting(&format, el); + + el = m_parser->createElement(kElCell); + m_parser->pushElement(el); + m_parser->popElement(); + el = m_parser->createElement(kElBlock); + applyParaFormatting(&format, el); + } + + else if(cw == "trowd") + el = m_parser->createElement(kElRow); + + else if(cw == "tab") + el = m_parser->createElement(kElTab); + + else if(cw == "sect") + el = m_parser->createElement(kElSect); + + else if(cw == "page") + el = m_parser->createElement(kElPage); + + else if(cw == "s" && HAS_PARAM) + format.paraSetStyle(param); + + else if(cw == "line") + el = m_parser->createElement(kElLine); + + else if(cw == "header") + AN_ANALYSER(Skip); + else if(cw == "footer") + AN_ANALYSER(Skip); + else if(cw == "bkmkstart") + AN_ANALYSER(Skip); + else if(cw == "listtext") + AN_ANALYSER(Skip); + + else if(cw == "ls" && HAS_PARAM) + format.paraSetList(param); + + if(el != NULL) + { + // This ensures that our content destination is open and ready + DestinationPtr dest = m_parser->getDestination(); + ASSERT(dest != NULL); + dest->charData(kValNull); + + m_parser->pushElement(el); + m_parser->popElement(); + } + + return (el != NULL) || process; + + /* TODO: cell, row, intbl, cellx, trowd*/ +} + +bool RtfParser::ParseAnalyser::processTextFormatting(const string& cw, int flags, + int param, RtfFormatting& format) +{ + bool on = true; + if(flags & HAS_PARAM && param == 0) + on = false; + + if(cw == "pard") + { + format.resetPara(); +// applyParaFormatting(); + } + else if(cw == "plain") + format.resetText(); + else if(cw == "b") + format.textSetBold(on); + else if(cw == "i") + format.textSetItalic(on); + else if(cw == "v") + format.textSetHidden(on); + else if(cw == "ul") + format.textSetUnderline(on); + else if(cw == "cf" && HAS_PARAM) + format.textSetColor(param); + else + return false; + + return true; +} + +bool RtfParser::ParseAnalyser::processTextFormatting(const string& cw, int flags, int param) +{ + return processTextFormatting(cw, flags, param, m_parser->getTextFormatting()); +} + + +ON_INITIALIZE(Skip) + { AN_DESTINATION(Null); } +ON_GROUPSTART(Skip) + { AN_ANALYSER(Skip); } + + +RtfParser::Upr::Upr(AnalyserPtr prv) +{ + ASSERT(prv); + prev = prv; +} +ON_GROUPSTART(Upr) + { AN_ANALYSER(Skip); } +ON_GROUPEND(Upr) +{ + ASSERT(prev); + m_parser->setAnalyser(prev); + prev = NULL; +} + + +ON_INITIALIZE(Stylesheet) +{ + AN_ELEMENT(kElStylesheet); +} +ON_GROUPSTART(Stylesheet) +{ + AN_ANALYSER(Style); + AN_DESTINATION(Null); +} + + + +ON_INITIALIZE(Style) +{ + // Were not sure if this element is really something + // so we can't always create + haveStyle = false; +} +ON_CONTROLWORD(Style) +{ + // Get the style id + if(flags & kAsterisk) + { + AN_ANALYSER(Skip); + return; + } + + if(!haveStyle) + { + AN_ELEMENT(kElStyle); + AN_DESTINATION_ATTR(kAtName); + haveStyle = true; + } + + if(cw == "s" && flags & kHasParam) + { + AN_ATTRIBUTE(kAtId, NUM_ATTR(param)); + } + + // Otherwise get as much formatting out of the tag as possible + else if(processTextFormatting(cw, flags, param)) + DUMMY; + + else + DEFAULT_CONTROLWORD; +} +ON_GROUPSTART(Style) +{ + AN_ANALYSER(Skip); +} +ON_GROUPEND(Style) +{ + RtfFormatting& props = m_parser->getTextFormatting(); + if(props.textIsBold()) + AN_ATTRIBUTE(kAtBold, L"1"); + if(props.textIsHidden()) + AN_ATTRIBUTE(kAtHidden, L"1"); + if(props.textIsItalic()) + AN_ATTRIBUTE(kAtItalic, L"1"); + if(props.textIsStrike()) + AN_ATTRIBUTE(kAtStrike, L"1"); + if(props.textIsUnderline()) + AN_ATTRIBUTE(kAtUnderline, L"1"); + if(props.textColor() != -1 && m_parser->getOptions().doColors) + AN_ATTRIBUTE(kAtColor, NUM_ATTR(props.textColor())); +} + + + +ON_INITIALIZE(ListTable) +{ + AN_ELEMENT(kElListtable); +} +ON_GROUPSTART(ListTable) +{ + AN_ANALYSER(List); + AN_DESTINATION(Null); +} + + + +ON_INITIALIZE(List) +{ + AN_ELEMENT(kElListdef); + AN_ATTRIBUTE(kAtType, kValDisc); + AN_ATTRIBUTE(kAtOrdered, L"0"); + levelsSeen = 0; +} +ON_CONTROLWORD(List) +{ + if(cw == "listname") + AN_DESTINATION_ATTR(kAtName); + else if(cw == "listid" && HAS_PARAM) + AN_ATTRIBUTE(kAtId, NUM_ATTR(param)); + + // We let listlevel in here too + else if(cw == "levelstartat" && HAS_PARAM) + AN_ATTRIBUTE(kAtStart, NUM_ATTR(param)); + + else if(cw == "levelnfc" && HAS_PARAM) + { + switch(param) + { + case 0: // 1, 2, 3 + case 5: // 1st, 2nd, 3rd + case 6: // One, Two, Three + case 7: // First, Second, Third + case 22: // 01, 02, 03 + AN_ATTRIBUTE(kAtType, kValArabic); + break; + case 1: // I, II, III + AN_ATTRIBUTE(kAtType, kValUpperRoman); + break; + case 2: // i, ii, iii + AN_ATTRIBUTE(kAtType, kValLowerRoman); + break; + case 3: // A, B, C + AN_ATTRIBUTE(kAtType, kValUpperAlpha); + break; + case 4: // a, b, c + AN_ATTRIBUTE(kAtType, kValLowerAlpha); + break; + default: + AN_ATTRIBUTE(kAtType, kValDisc); + break; + } + + switch(param) + { + case 0: case 5: case 6: case 7: case 22: + case 1: case 2: case 3: case 4: + AN_ATTRIBUTE(kAtOrdered, L"1"); + break; + default: + AN_ATTRIBUTE(kAtOrdered, L"0"); + } + } + + else + DEFAULT_CONTROLWORD; +} +ON_GROUPSTART(List) +{ + if(levelsSeen > 0) + AN_ANALYSER(Skip); + levelsSeen++; +} + + + + +ON_INITIALIZE(ListOverrideTable) +{ + DOM::Document document = m_parser->getDocument(); + lists = document.getElementsByTagName(kElListdef); + curList = NULL; + lsId = -1; +} +ON_GROUPSTART(ListOverrideTable) +{ + AN_DESTINATION(Null); +} +ON_CONTROLWORD(ListOverrideTable) +{ + // New list override clear + if(cw == "listoverride") + curList = NULL; + + // List id for current listoverride + else if(cw == "listid" && HAS_PARAM) + { + wstring id = NUM_ATTR(param); + + if(lists != NULL) + { + for(int i = 0; i < lists->getLength(); i++) + { + DOM::Node node = lists->item(i); + if(node != NULL && node.getNodeType() == DOM::Node::ELEMENT_NODE) + { + DOM::Element element = (DOM::Element&)node; + if(element.getAttribute(kAtId) == id) + { + curList = element; + break; + } + } + } + } + } + + // The actual list code + else if(cw == "ls" && HAS_PARAM) + lsId = param; + + // Override the starting level for the node + else if(cw == "levelstartat" && HAS_PARAM) + { + if(curList != NULL) + curList.setAttribute(kAtStart, NUM_ATTR(param)); + } + + else + DEFAULT_CONTROLWORD; + + + // Okay before any overrides take effect we need to duplicate + // the list node for overriding, using the 'listid' and 'ls' we gathered + if(curList != NULL && lsId != -1) + { + DOM::Element parent = (DOM::Element&)curList.getParentNode(); + if(parent != NULL) + { + curList = (DOM::Element&)curList.cloneNode(true); + if(curList != NULL) + { + parent.appendChild(curList); + curList.setAttribute(kAtList, NUM_ATTR(lsId)); + } + } + + lsId = -1; + } + +} +ON_GROUPEND(ListOverrideTable) +{ + +} + + + + +ON_INITIALIZE(Info) +{ + // Create a new element + AN_ELEMENT(kElInfo); + AN_DESTINATION(Null); +} +ON_CONTROLWORD(Info) +{ + // The title + if(cw == "title") + { + AN_ELEMENT(kElTitle); + AN_DESTINATION(Raw); + } + else if(cw == "author") + { + AN_ELEMENT(kElAuthor); + AN_DESTINATION(Raw); + } + else if(cw == "operator") + { + AN_ELEMENT(kElOperator); + AN_DESTINATION(Raw); + } + else if(flags & kAsterisk) + AN_ANALYSER(Skip); + else + DEFAULT_CONTROLWORD; +} + + + + +ON_INITIALIZE(Root) +{ + +} +ON_CONTROLWORD(Root) +{ + if(cw == "stylesheet") + AN_ANALYSER(Stylesheet); + else if(cw == "listtable") + AN_ANALYSER(ListTable); + else if(cw == "listoverridetable") + AN_ANALYSER(ListOverrideTable); + else if(cw == "info") + AN_ANALYSER(Info); + else if(cw == "fonttbl") + AN_ANALYSER(Skip); + else if(cw == "colortbl") + AN_ANALYSER(Skip); + else if(cw == "pict") + { + AN_ANALYSER(Skip); + AN_DESTINATION(Null); + } + else if(flags & kAsterisk) + AN_ANALYSER(Skip); + else if(processTextContent(cw, flags, param)) + DUMMY; + else if(processTextFormatting(cw, flags, param)) + DUMMY; + else + DEFAULT_CONTROLWORD; +} + + +ON_INITIALIZE(Content) +{ + parent = m_parser->getElement(); + created = false; +} +ON_CHARDATA(Content) +{ + // Create the first time we get content + if(!created) + { + DOM::Element dest = m_parser->createElement(kElDest); + parent.appendChild(dest); + m_parser->replaceElement(dest); + + DOM::Element el = m_parser->createElement(kElBlock); + m_parser->pushElement(el); + m_parser->popElement(); + + created = true; + } + + if(data.length() == 0) + return; + + int elements = 0; + RtfFormatting& format = m_parser->getTextFormatting(); + + // Now do text Properties if necessary + if(format.textIsBold()) + { + AN_ELEMENT(kElB); + elements++; + } + if(format.textIsHidden()) + { + AN_ELEMENT(kElHide); + elements++; + } + if(format.textIsItalic()) + { + AN_ELEMENT(kElI); + elements++; + } + if(format.textIsStrike()) + { + AN_ELEMENT(kElStrike); + elements++; + } + if(format.textIsUnderline()) + { + AN_ELEMENT(kElU); + elements++; + } + if(format.textColor() != -1 && m_parser->getOptions().doColors) + { + AN_ELEMENT(kElColor); + AN_ATTRIBUTE(kAtIndex, NUM_ATTR(format.textColor())); + elements++; + } + + // Write the data to the element + m_parser->getElement().appendChild( + m_parser->getDocument().createTextNode(data)); + + // Now drop out of all the above formatting + while(elements-- > 0) + AN_POP_ELEMENT(); +} + +#if 0 +ON_INITIALIZE(Table) +{ + stack = 0; + level = m_parser->getLevel(); + AN_ELEMENT(kElTable); + AN_DESTINATION(Content); +} + +ON_CONTROLWORD(Table) +{ + ASSERT(stack >= 0); + ASSERT(level != NULL); + + if(cw == "trowd") + { + stack++; + } + else if(cw == "row") + { + stack--; + if(stack <= 0) + m_parser->rewindLevel(level); + } + + else if(processTextContent(cw, flags, param)) + DUMMY; + else if(processTextFormatting(cw, flags, param)) + DUMMY; + else + DEFAULT_CONTROLWORD; + + if(!m_parser->getTextFormatting().paraInTable()) + { + m_parser->rewindLevel(level); + } + +} +#endif + + + + +ON_CHARDATA(Raw) +{ + // Write the data to the element + m_parser->getElement().appendChild( + m_parser->getDocument().createTextNode(data)); +} + + + + +ON_INITIALIZE(Attribute) +{ + element = m_parser->getElement(); + ASSERT(element != NULL); +} + +ON_CHARDATA(Attribute) +{ + // Get the current value + wstring cur = element.getAttribute(name); + + if(data.at(data.size() - 1) == L';') + data.resize(data.size() - 1); + + // Append data + cur.append(data); + + // Write it back + element.setAttribute(name, cur); +} + + + + + +/** + * A quick check to see if a node is an element of a certain + * name + */ +bool RtfParser::isElement(const DOM::Node& node, const string& name) +{ + return node != NULL && node.getNodeType() == DOM::Node::ELEMENT_NODE && + node.getNodeName() == name; +} + +bool RtfParser::isEqualElement(const DOM::Element& el1, const DOM::Element& el2) +{ + if(el1.getNodeName() == el2.getNodeName()) + return false; + + DOM::NamedNodeMap at1 = el1.getAttributes(); + DOM::NamedNodeMap at2 = el2.getAttributes(); + + if(at1 == NULL && at2 == NULL) + return true; + + if(at1 == NULL || at2 == NULL || + at1->getLength() != at2->getLength()) + return false; + + for(int i = 0; i < at1->getLength(); i++) + { + DOM::Attr attr1 = (DOM::Attr&)at1->item(0); + if(attr1 != NULL) + return false; + + DOM::Attr attr2 = (DOM::Attr&)at2->getNamedItem(attr1.getNodeName()); + if(attr2 != NULL) + return false; + + if(attr1.getNodeValue() == attr2.getNodeValue()) + return false; + } + + return true; +} + +wstring RtfParser::formatInt(int num) +{ + wstring n; + n.format(L"%d", num); + return n; +} + +/** + * Gets the pertinent ancestor of this node, or returns null + * if not found. + */ +DOM::Element RtfParser::getContainingElement(const DOM::Node& node, const string& name) +{ + DOM::Node n = node; + + while(true) + { + n = n.getParentNode(); + if(n == NULL) + break; + + if(isElement(n, name)) + return (DOM::Element&)n; + } + + return DOM::Element(); +} + +DOM::Element RtfParser::getPriorElement(const DOM::Node& node, const string& name) +{ + DOM::Node n = node; + + while(n != NULL) + { + if(isElement(n, name)) + return (DOM::Element&)n; + + n = n.getPreviousSibling(); + } + + DOM::Node parent = node.getParentNode(); + + if(parent == NULL) + return DOM::Element(); + else + return getPriorElement(parent, name); +} + +bool isNsAttr(const string& name) +{ + return strncmp(name.c_str(), kNSPrefix, strlen(kNSPrefix)) ? false : true; +} + +void RtfParser::copyAttributes(const DOM::Element& src, DOM::Element& dest, + const char** hideList) +{ + // Now get both sets of attributes + DOM::NamedNodeMap srcMap = src.getAttributes(); + DOM::NamedNodeMap destMap = dest.getAttributes(); + + if(srcMap == NULL || destMap == NULL) + return; + + // And copy them from one to the other + for(int j = 0; j < srcMap->getLength(); j++) + { + DOM::Node attr = srcMap->item(j); + if(attr != NULL) + { + // BUG: Sablotron seems to have a bug in it's + // setAttributeNode implementation. It always + // adds a blank namespace + // attr = attr.cloneNode(false); + // if(attr != NULL) + // destMap.setNamedItem(attr); + + string name = attr.getNodeName(); + + if(hideList) + { + + for(const char** t = hideList; *t != NULL; t++) + { + if(name == *t) + name.clear(); + } + } + + if(name.length() > 0 && !isNsAttr(name)) + dest.setAttribute(attr.getNodeName(), attr.getNodeValue()); + } + } +} + + +void RtfParser::breakBreak(DOM::Document& doc, const string& contain, + const string& tag) +{ + DOM::NodeList els = doc.getElementsByTagName(tag); + if(els != NULL) + { + for(int i = 0; i < els->getLength(); i++) + { + DOM::Element el = (DOM::Element&)els->item(i); +#if 0 + // See if parent node only has this break tag + // in it. If so then replace parent with this + + DOM::Node parent = el.getParentNode(); + + if(parent != NULL) + { + DOM::Node grandparent = parent.getParentNode(); + + if(grandparent != NULL && + el.getPreviousSibling() == NULL && + el.getNextSibling() == NULL) + { + grandparent.replaceChild(parent.removeChild(el), parent); + } + } +#endif + + breakElement(el, contain); + } + } +} + +/** + * Breaks a paragraph up through a previous level. Calls itself + * recursively to break paragraphs totally free up to containing + * destination. + * + * For example: + * + * <dest> + * This is <b> a <block fix="para"/> + * test of </b> your concentration. + * </dest> + * + * Becomes: + * + * <dest> + * This is <b> a </b><block fix="para"/> + * <b>test of </b> your concentration. + * </dest> + */ +bool RtfParser::breakElement(DOM::Element& el, const string& contain) +{ + ASSERT(el != NULL); + + DOM::Element parent = (DOM::Element&)el.getParentNode(); + DOM::Element grandparent; + + string s = el.getNodeName(); + s = parent.getNodeName(); + + // Get the parent node + if(parent != NULL) + grandparent = (DOM::Element&)parent.getParentNode(); + + // Make sure we have something to work with before continuing + if(grandparent == NULL || parent == NULL || + isElement(parent, contain)) + return true; + + DOM::Node e; + + // Check to see if this is the first node in the parent. + // If so then just move out to before + if(el.getPreviousSibling() == NULL) + { + e = grandparent.insertBefore(parent.removeChild(el), parent); + } + + + // Check to see if this is the last node in the parent. + // If so then just move out to after the parent + else if(el.getNextSibling() == NULL) + { + DOM::Node next = parent.getNextSibling(); + if(next == NULL) + e = grandparent.appendChild(parent.removeChild(el)); + else + e = grandparent.insertBefore(parent.removeChild(el), next); + } + + + // Otherwise it's in the middle so split the parent + // element etc... + else + { + // Clone it but not deep + DOM::Element parent2 = (DOM::Element&)parent.cloneNode(false); + + if(parent2 == NULL) + return false; + + // Flag that tells us whether we moved anything up to parent + bool moved = false; + + // Now move all nodes after this one to the second parent. + while((e = el.getNextSibling()) != NULL) + { + parent2.appendChild(parent.removeChild(e)); + moved = true; + } + + // Remove the element from it's parent + e = parent.removeChild(el); + + // Okay now we move the paragraph up to the parent + insertAfter(grandparent, e, parent); + if(moved) + insertAfter(grandparent, parent2, e); + } + + // Now call it again with the paragraph in the new position + // untill everything's cut through! + return breakElement((DOM::Element&)e, contain); +} + +/** + * Changes from a marker based paragraph system to a contained + * paragraph system. Also applies paragraph attributes to the + * appropriate paragraph. + * + * For example: + * + * <dest> + * This <blockattr style="10"> is <b> a <block fix="para"/> + * test of </b> your concentration. + * </dest> + * + * Becomes: + * + * <para style="10"> This is <b> a </b></para> + * <para><b>test of </b> your concentration.</para> + */ +void RtfParser::breakBlocks(DOM::Document& document) +{ + // First break out all the paragraphs to the destination level + DOM::NodeList blocks = document.getElementsByTagName(kElBlock); + if(blocks != NULL) + { + for(int i = 0; i < blocks->getLength(); i++) + { + DOM::Element block = (DOM::Element&)blocks->item(i); + + // If it's the single closed style para then break it + if(block != NULL && !block.hasChildNodes()) + breakElement(block, kElDest); + } + } + + + // Now group stuff in destinations into paras or other blocks + DOM::NodeList destinations = document.getElementsByTagName(kElDest); + if(destinations != NULL) + { + for(int i = 0; i < destinations->getLength(); i++) + { + DOM::Element dest = (DOM::Element&)destinations->item(i); + + // Sanity Check + if(dest == NULL || !dest.hasChildNodes()) + continue; + + // Go through the children of this destination + DOM::Node child = dest.getFirstChild(); + + DOM::Element block; + + while(child != NULL) + { + // If it's a block + if(isElement(child, kElBlock)) + { + block = (DOM::Element&)child; + child = child.getNextSibling(); + continue; + } + + // If it's already a real block element + for(const char** t = kBlockTags; *t != NULL; t++) + { + if(isElement(child, *t)) + { + block = NULL; + break; + } + } + + // If there's a block then add to it + if(block != NULL) + { + block.appendChild(dest.removeChild(child)); + child = block; + } + + child = child.getNextSibling(); + } + } + } +} + +void RtfParser::wrapTags(DOM::Document& doc, const string& tagName, + const string& wrapName) +{ + DOM::NodeList tags = doc.getElementsByTagName(tagName); + if(tags != NULL) + { + for(int i = 0; i < tags->getLength(); i++) + { + DOM::Element tag = (DOM::Element&)tags->item(i); + + DOM::Element wrap = doc.createElement(wrapName); + while(tag.hasChildNodes()) + wrap.appendChild(tag.removeChild(tag.getFirstChild())); + + tag.appendChild(wrap); + } + } +} + +void RtfParser::breakTags(DOM::Document& doc, const string& parentName, + const string& tagName) +{ + DOM::NodeList parents = doc.getElementsByTagName(parentName); + if(parents != NULL) + { + for(int i = 0; i < parents->getLength(); i++) + { + DOM::Element parent = (DOM::Element&)parents->item(i); + + if(!parent.hasChildNodes()) + continue; + + DOM::NodeList tags = parent.getElementsByTagName(tagName); + if(tags != NULL) + { + for(int i = 0; i < tags->getLength(); i++) + breakElement((DOM::Element&)tags->item(i), parentName); + } + + DOM::Node tag = doc.createElement(tagName); + parent.insertBefore(tag, parent.getFirstChild()); + + DOM::Node child = tag; + + while(child != NULL && (child = child.getNextSibling()) != NULL) + { + if(isElement(child, kElBlock)) + { + DOM::Node next = child.getNextSibling(); + if(next == NULL) + { + parent.removeChild(child); + continue; + } + + if(isElement(next, tagName)) + { + DOM::Node twodown = next.getNextSibling(); + if(!isElement(twodown, kElBlock)) + { + child = parent.insertBefore(parent.removeChild(next), child); + } + else + { + parent.removeChild(child); + child = next; + } + } + } + + if(isElement(child, tagName)) + { + if(!tag.hasChildNodes()) + parent.removeChild(tag); + tag = child; + } + else + { + tag.appendChild(parent.removeChild(child)); + child = tag; + } + } + + if(!tag.hasChildNodes()) + parent.removeChild(tag); + } + } + + DOM::NodeList tags = doc.getElementsByTagName(tagName); + if(tags != NULL) + { + for(int i = 0; i < tags->getLength(); i++) + { + DOM::Element tag = (DOM::Element&)tags->item(i); + DOM::Node parent = tag.getParentNode(); + + if(parent != NULL && !isElement(parent, parentName)) + parent.removeChild(tag); + +#if 0 + else if(tag.hasChildNodes()) + { + DOM::NodeList children = tag.getChildNodes(); + if(children != NULL && children->getLength() == 1) + { + DOM::Node child = children->item(0); + if(child != NULL && !child.hasChildNodes() && + isElement(child, kElBlock)) + parent.removeChild(tag); + } + } +#endif + + } + } +} + +void RtfParser::breakLists(DOM::Document& doc) +{ + // Now group stuff in destinations into tables + DOM::NodeList destinations = doc.getElementsByTagName(kElDest); + if(destinations != NULL) + { + for(int i = 0; i < destinations->getLength(); i++) + { + DOM::Element dest = (DOM::Element&)destinations->item(i); + + // Sanity Check + if(dest == NULL) + continue; + + // Go through the children of this destination + DOM::Node child = dest.getFirstChild(); + + DOM::Element list; + DOM::Element e; + + wstring previd; + + while(child != NULL) + { + // If it's a block and has a cell attribute + if(isElement(child, kElBlock)) + { + e = (DOM::Element&)child; + + // if it has a cell attribute + wstring listid = e.getAttribute(kAtList); + if(listid.length() > 0) + { + e.removeAttribute(kAtList); + + if(list == NULL || previd != listid) + { + list = doc.createElement(kElList); + list.setAttribute(kAtList, listid); + dest.insertBefore(list, child); + previd = listid; + } + } + else + { + list = NULL; + previd.clear(); + } + } + + // It's not a block + if(list != NULL) + { + list.appendChild(dest.removeChild(child)); + child = list; + } + + child = child.getNextSibling(); + } + } + } +} + +void RtfParser::fixStyles(const DOM::Document doc) +{ + DOM::NodeList styles = doc.getElementsByTagName(kElStyle); + if(styles != NULL) + { + DOM::NodeList blocks = doc.getElementsByTagName(kElBlock); + if(blocks != NULL) + { + for(int i = 0; i < blocks->getLength(); i++) + { + DOM::Element block = (DOM::Element&)blocks->item(i); + + if(block == NULL || !block.hasAttribute(kElStyle)) + continue; + + for(int j = 0; j < styles->getLength(); j++) + { + DOM::Element style = (DOM::Element&)styles->item(j); + if(style != NULL) + { + if(style.getAttribute(kAtId) == block.getAttribute(kElStyle)) + { + wstring name = style.getAttribute(kAtName); + if(name.length() > 0) + block.setAttribute(kElStyle, name); + } + } + } + } + } + + for(int i = 0; i < styles->getLength(); i++) + { + DOM::Element style = (DOM::Element&)styles->item(i); + if(style != NULL) + style.removeAttribute(kAtId); + } + } + + +} + + +void RtfParser::breakTables(DOM::Document& doc) +{ + DOM::NodeList rows = doc.getElementsByTagName(kElRow); + if(rows != NULL) + { + for(int i = 0; i < rows->getLength(); i++) + { + DOM::Element row = (DOM::Element&)rows->item(i); + DOM::Node parent = row.getParentNode(); + + if(parent == NULL) + continue; + + if(isElement(parent, kElBlock)) + { + DOM::Node grandparent = parent.getParentNode(); + + if(grandparent != NULL && !row.hasChildNodes()) + { + if(row.getPreviousSibling() == NULL) + grandparent.insertBefore(parent.removeChild(row), parent); + else if(row.getNextSibling() == NULL) + insertAfter(grandparent, parent.removeChild(row), parent); + } + } + + breakElement(row, kElDest); + } + } + + + + // Now group stuff in destinations into tables + DOM::NodeList destinations = doc.getElementsByTagName(kElDest); + if(destinations != NULL) + { + for(int i = 0; i < destinations->getLength(); i++) + { + DOM::Element dest = (DOM::Element&)destinations->item(i); + + // Sanity Check + if(dest == NULL) + continue; + + // Go through the children of this destination + DOM::Node child = dest.getFirstChild(); + + DOM::Element table; + DOM::Element e; + + while(child != NULL) + { + // If it's a block and has a cell attribute + if(isElement(child, kElBlock)) + { + e = (DOM::Element&)child; + + // if it has a cell attribute + if(e.getAttribute(kAtCell).length() > 0) + { + e.removeAttribute(kAtCell); + + if(table == NULL) + { + table = doc.createElement(kElTable); + dest.insertBefore(table, child); + } + } + else + { + table = NULL; + } + } + + // It's not a block + if(table != NULL) + { + table.appendChild(dest.removeChild(child)); + child = table; + } + + child = child.getNextSibling(); + } + } + } +} + +void RtfParser::insertAfter(DOM::Node& parent, const DOM::Node& node, + const DOM::Node& ref) +{ + DOM::Node sibling = ref.getNextSibling(); + if(sibling == NULL) + parent.appendChild(node); + else + parent.insertBefore(node, sibling); +} + +void RtfParser::removeTags(const DOM::Document& doc) +{ + // Go through the list of nodes + for(const char** t = kRemoveTags; *t != NULL; t++) + { + DOM::NodeList elements = doc.getElementsByTagName(*t); + if(elements != NULL) + { + for(int j = 0; j < elements->getLength(); j++) + { + DOM::Element el = (DOM::Element&)elements->item(j); + DOM::Node parent = el->getParentNode(); + + if(parent == NULL) + continue; + + while(el.hasChildNodes()) + parent.insertBefore(el.removeChild(el.getFirstChild()), el); + + parent.removeChild(el); + } + } + } +} + +void RtfParser::fixLists(const DOM::Document doc) +{ + DOM::NodeList lists = doc.getElementsByTagName(kElList); + if(lists != NULL) + { + DOM::NodeList listdefs = doc.getElementsByTagName(kElListdef); + if(listdefs != NULL) + { + for(int i = 0; i < listdefs->getLength(); i++) + { + DOM::Element listdef = (DOM::Element&)listdefs->item(i); + + if(listdef == NULL || !listdef.hasAttribute(kAtList)) + continue; + + for(int j = 0; j < lists->getLength(); j++) + { + DOM::Element list = (DOM::Element&)lists->item(j); + if(list != NULL) + { + if(list.getAttribute(kAtList) == listdef.getAttribute(kAtList)) + { + copyAttributes(listdef, list, kHideList); + list.removeAttribute(kAtList); + } + } + } + } + } + } +} + +void RtfParser::fixBlocks(const DOM::Document doc) +{ + // First break out all the paragraphs to the destination level + DOM::NodeList blocks = doc.getElementsByTagName(kElBlock); + if(blocks != NULL) + { + string fix; + wstring val; + + for(int i = 0; i < blocks->getLength(); i++) + { + DOM::Element block = (DOM::Element&)blocks->item(i); + DOM::Node parent = block.getParentNode(); + + if(parent == NULL) + continue; + + fix.resize(0); + val.resize(0); + + val = block.getAttribute(kAtFix); + if(val.length() > 0) + block.removeAttribute(kAtFix); + + + if(val.length() > 0) + { + val = block.getAttributeNS("", kAtFix); + if(val.length() > 0) + block.removeAttributeNS("", kAtFix); + } + + if(val.length() > 0) + DOM::transcode16to8(val, fix); + + if(fix.length() == 0) + fix = kElPara; + + DOM::Element el = doc.createElement(fix); + copyAttributes(block, el, NULL); + + while(block.hasChildNodes()) + el.appendChild(block.removeChild(block.getFirstChild())); + + parent.replaceChild(el, block); + } + } +} + + +/** + * Removes adjacent duplicate nodes of certain names + */ +void RtfParser::removeDuplicates(const DOM::Document& doc) +{ + // Go through the list of nodes + for(const char** t = kNoDuplicates; *t = NULL; t++) + { + DOM::NodeList elements = doc.getElementsByTagName(*t); + if(elements != NULL) + { + int x = elements->getLength(); + for(int j = 0; j < elements->getLength(); j++) + { + + // Make sure it's a valid element + DOM::Element element = (DOM::Element&)elements->item(j); + if(element == NULL) + continue; + + // Get neighbors + DOM::Node previous = element.getPreviousSibling(); + DOM::Node next = element.getNextSibling(); + + // Make sure it's still in the document, as we may have + // removed it on a previous loop + DOM::Node parent = element.getParentNode(); + if(parent == NULL) + continue; + + // Combine previous if valid + if(previous != NULL && previous.getNodeType() == DOM::Node::ELEMENT_NODE && + isEqualElement((DOM::Element&)previous, element)) + { + while(previous.hasChildNodes()) + { + DOM::Node child = previous.removeChild(previous.getLastChild()); + if(child != NULL) + { + if(element.hasChildNodes()) + element.insertBefore(child, element.getFirstChild()); + else + element.appendChild(child); + } + } + + // Remove duplicate node + parent.removeChild(previous); + } + + // Combine next if valid + if(next != NULL && next.getNodeType() == DOM::Node::ELEMENT_NODE && + isEqualElement((DOM::Element&)next, element)) + { + while(next.hasChildNodes()) + { + DOM::Node child = next.removeChild(next.getFirstChild()); + if(child != NULL) + element.appendChild(child); + } + + // Remove duplicate node + parent.removeChild(next); + } + } + } + } +}
\ No newline at end of file diff --git a/src/xmlcomposer.h b/src/xmlcomposer.h new file mode 100644 index 0000000..9463318 --- /dev/null +++ b/src/xmlcomposer.h @@ -0,0 +1,228 @@ +// RtfParser.h: interface for the RtfParser class. +// +////////////////////////////////////////////////////////////////////// + +#if !defined(AFX_RTFANALYSER_H__0243FE38_142A_4BDD_BA2C_1D122965444B__INCLUDED_) +#define AFX_RTFANALYSER_H__0243FE38_142A_4BDD_BA2C_1D122965444B__INCLUDED_ + +#if _MSC_VER > 1000 +#pragma once +#endif // _MSC_VER > 1000 + +#include "LevelHandler.h" + +struct RtfParserOptions +{ + RtfParserOptions() + { memset(this, 0, sizeof(*this)); } + + bool doColors; +}; + +class RtfParser : public LevelHandler +{ +public: + RtfParser(const RtfParserOptions& options); + virtual ~RtfParser(); + + virtual void startDocument(RtfReader* reader); + virtual void endDocument(); + virtual void controlWord(const string& cw, int flags, int param); + virtual void groupStart(); + virtual void groupEnd(); + virtual void charData(wstring data); + + + // Element management functions + DOM::Element createElement(const string& name); + void pushElement(const DOM::Element& element); + void replaceElement(const DOM::Element& element); + DOM::Element popElement(); + void setAttribute(const string& name, const wstring& value, DOM::Element el = DOM::Element()); + + // Changing the current parser functions + void setAnalyser(AnalyserPtr analy); + void setDestination(DestinationPtr dest); + DestinationPtr replaceDestination(DestinationPtr dest); + + // Current status functions + RtfFormatting& getTextFormatting(); + AnalyserPtr getAnalyser(); + DestinationPtr getDestination(); + DOM::Document getDocument() + { return m_document; } + + static bool isElement(const DOM::Node& node, const string& name); + static bool isEqualElement(const DOM::Element& el1, const DOM::Element& el2); + static void copyAttributes(const DOM::Element& src, DOM::Element& dest, const char** hideList); + static void insertAfter(DOM::Node& parent, const DOM::Node& node, const DOM::Node& ref); + static DOM::Element getContainingElement(const DOM::Node& node, const string& name); + static DOM::Element getPriorElement(const DOM::Node& node, const string& name); + + static wstring formatInt(int num); + + const RtfParserOptions& getOptions() + { return m_options; } + +protected: + // Cleanup Functions + void fixBlocks(DOM::Document doc); + void fixLists(const DOM::Document doc); + void fixStyles(const DOM::Document doc); + bool breakElement(DOM::Element& el, const string& contain); + + void breakBreak(DOM::Document& doc, const string& contain, + const string& tag); + void breakLists(DOM::Document& document); + void breakTables(DOM::Document& document); + void breakTags(DOM::Document& doc, const string& parentName, + const string& tagName); + void breakBlocks(DOM::Document& document); + void wrapTags(DOM::Document& document, const string& tagName, + const string& wrapName); + + void removeTags(const DOM::Document& doc); + void removeDuplicates(const DOM::Document& doc); + + virtual void clear(); + + +// Data +protected: + DOM::DOMImplementation m_impl; + DOM::Document m_document; + RtfParserOptions m_options; + + +// Sub classes +protected: + #define DESTINATION(cls) class cls : public Destination { public: + #define END_DESTINATION }; + #define ANALYSER(cls) class cls : public ParseAnalyser { public: + #define END_ANALYSER }; + #define DATA_PORTION protected: + #define INITIALIZE virtual void initialize(); + #define CHARDATA virtual void charData(wstring data); + #define CONTROLWORD virtual void controlWord(const string& cw, int flags, int param); + #define GROUPSTART virtual void groupStart(); + #define GROUPEND virtual void groupEnd(); + #define DONE virtual void done(); + + DESTINATION(Content) + INITIALIZE + CHARDATA + DATA_PORTION + bool created; + DOM::Element parent; + END_DESTINATION + + + DESTINATION(Null) + END_DESTINATION + + DESTINATION(Raw) + CHARDATA + END_DESTINATION + + DESTINATION(Attribute) + Attribute(const string& nm) : name(nm) {} + INITIALIZE + CHARDATA + DATA_PORTION + string name; + DOM::Element element; + END_DESTINATION + + class ParseAnalyser : + public Analyser + { + public: + virtual void controlWord(const string& cw, int flags, int param) + { processDefault(cw, flags, param); } + + protected: + // Some helper functions + bool processDefault(const string& cw, int flags, int param); + bool processTextFormatting(const string& cw, int flags, int param, RtfFormatting& format); + bool processTextContent(const string& cw, int flags, int param); + bool processTextFormatting(const string& cw, int flags, int param); + + DOM::Element getCurrentBlock(); + void applyParaFormatting(RtfFormatting* format, DOM::Element& el); + }; + + + ANALYSER(Skip) + INITIALIZE + GROUPSTART + END_ANALYSER + +#if 0 + ANALYSER(Table) + INITIALIZE + CONTROLWORD + DATA_PORTION + int stack; + LevelPtr level; + END_ANALYSER +#endif + + ANALYSER(Upr) + Upr(AnalyserPtr prv); + GROUPSTART + GROUPEND + DATA_PORTION + AnalyserPtr prev; + END_ANALYSER + + ANALYSER(Stylesheet) + INITIALIZE + GROUPSTART + END_ANALYSER + + ANALYSER(Style) + INITIALIZE + CONTROLWORD + GROUPSTART + GROUPEND + DATA_PORTION + bool haveStyle; + END_ANALYSER + + ANALYSER(ListTable) + INITIALIZE + GROUPSTART + END_ANALYSER + + ANALYSER(List) + INITIALIZE + CONTROLWORD + GROUPSTART + DATA_PORTION + int levelsSeen; + END_ANALYSER + + ANALYSER(ListOverrideTable) + INITIALIZE + CONTROLWORD + GROUPSTART + GROUPEND + DATA_PORTION + DOM::NodeList lists; + int lsId; + DOM::Element curList; + END_ANALYSER + + ANALYSER(Info) + INITIALIZE + CONTROLWORD + END_ANALYSER + + ANALYSER(Root) + INITIALIZE + CONTROLWORD + END_ANALYSER + +}; + +#endif // !defined(AFX_RTFANALYSER_H__0243FE38_142A_4BDD_BA2C_1D122965444B__INCLUDED_) diff --git a/win32/rtfx.dsp b/win32/rtfx.dsp new file mode 100644 index 0000000..0ca726b --- /dev/null +++ b/win32/rtfx.dsp @@ -0,0 +1,177 @@ +# Microsoft Developer Studio Project File - Name="rtfm" - Package Owner=<4> +# Microsoft Developer Studio Generated Build File, Format Version 6.00 +# ** DO NOT EDIT ** + +# TARGTYPE "Win32 (x86) Console Application" 0x0103 + +CFG=rtfm - Win32 Debug +!MESSAGE This is not a valid makefile. To build this project using NMAKE, +!MESSAGE use the Export Makefile command and run +!MESSAGE +!MESSAGE NMAKE /f "rtfm.mak". +!MESSAGE +!MESSAGE You can specify a configuration when running NMAKE +!MESSAGE by defining the macro CFG on the command line. For example: +!MESSAGE +!MESSAGE NMAKE /f "rtfm.mak" CFG="rtfm - Win32 Debug" +!MESSAGE +!MESSAGE Possible choices for configuration are: +!MESSAGE +!MESSAGE "rtfm - Win32 Release" (based on "Win32 (x86) Console Application") +!MESSAGE "rtfm - Win32 Debug" (based on "Win32 (x86) Console Application") +!MESSAGE + +# Begin Project +# PROP AllowPerConfigDependencies 0 +# PROP Scc_ProjName "" +# PROP Scc_LocalPath "" +CPP=cl.exe +RSC=rc.exe + +!IF "$(CFG)" == "rtfm - Win32 Release" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 0 +# PROP BASE Output_Dir "Release" +# PROP BASE Intermediate_Dir "Release" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 0 +# PROP Output_Dir "Release" +# PROP Intermediate_Dir "Release" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /Yu"stdafx.h" /FD /c +# ADD CPP /nologo /W3 /GX /O2 /I "sablot/include" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /Yu"stdafx.h" /FD /c +# ADD BASE RSC /l 0x409 /d "NDEBUG" +# ADD RSC /l 0x409 /d "NDEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 +# ADD LINK32 sablot.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 /libpath:"sablot/lib" /opt:nowin98 +# SUBTRACT LINK32 /pdb:none + +!ELSEIF "$(CFG)" == "rtfm - Win32 Debug" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 1 +# PROP BASE Output_Dir "Debug" +# PROP BASE Intermediate_Dir "Debug" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 1 +# PROP Output_Dir "Debug" +# PROP Intermediate_Dir "Debug" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /Yu"stdafx.h" /FD /GZ /c +# ADD CPP /nologo /W3 /Gm /GX /ZI /Od /I "sablot/include" /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /Yu"stdafx.h" /FD /GZ /c +# ADD BASE RSC /l 0x409 /d "_DEBUG" +# ADD RSC /l 0x409 /d "_DEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept +# ADD LINK32 sablot.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept /libpath:"sablot/lib" + +!ENDIF + +# Begin Target + +# Name "rtfm - Win32 Release" +# Name "rtfm - Win32 Debug" +# Begin Group "Source Files" + +# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat" +# Begin Source File + +SOURCE=.\BaseHandler.cpp +# End Source File +# Begin Source File + +SOURCE=.\LevelHandler.cpp +# End Source File +# Begin Source File + +SOURCE=.\RtfAnalyser.cpp +# End Source File +# Begin Source File + +SOURCE=.\rtfm.cpp +# End Source File +# Begin Source File + +SOURCE=.\RtfParseHelpers.cpp +# End Source File +# Begin Source File + +SOURCE=.\RtfReader.cpp +# End Source File +# Begin Source File + +SOURCE=.\sablotr.cpp +# SUBTRACT CPP /YX /Yc /Yu +# End Source File +# Begin Source File + +SOURCE=.\StdAfx.cpp +# ADD CPP /Yc"stdafx.h" +# End Source File +# End Group +# Begin Group "Header Files" + +# PROP Default_Filter "h;hpp;hxx;hm;inl" +# Begin Source File + +SOURCE=.\BaseHandler.h +# End Source File +# Begin Source File + +SOURCE=.\LevelHandler.h +# End Source File +# Begin Source File + +SOURCE=.\Reference.h +# End Source File +# Begin Source File + +SOURCE=.\RtfAnalyser.h +# End Source File +# Begin Source File + +SOURCE=.\RtfFormatting.h +# End Source File +# Begin Source File + +SOURCE=.\RtfParseHelpers.h +# End Source File +# Begin Source File + +SOURCE=.\RtfReader.h +# End Source File +# Begin Source File + +SOURCE=.\sablo.h +# End Source File +# Begin Source File + +SOURCE=.\StdAfx.h +# End Source File +# Begin Source File + +SOURCE=.\usuals.h +# End Source File +# End Group +# Begin Group "Resource Files" + +# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe" +# End Group +# Begin Source File + +SOURCE=.\ReadMe.txt +# End Source File +# End Target +# End Project diff --git a/win32/rtfx.dsw b/win32/rtfx.dsw new file mode 100644 index 0000000..c65a996 --- /dev/null +++ b/win32/rtfx.dsw @@ -0,0 +1,29 @@ +Microsoft Developer Studio Workspace File, Format Version 6.00 +# WARNING: DO NOT EDIT OR DELETE THIS WORKSPACE FILE! + +############################################################################### + +Project: "rtfm"=.\rtfm.dsp - Package Owner=<4> + +Package=<5> +{{{ +}}} + +Package=<4> +{{{ +}}} + +############################################################################### + +Global: + +Package=<5> +{{{ +}}} + +Package=<3> +{{{ +}}} + +############################################################################### + diff --git a/win32/sablot/INSTALL_WIN b/win32/sablot/INSTALL_WIN new file mode 100644 index 0000000..a84d7d2 --- /dev/null +++ b/win32/sablot/INSTALL_WIN @@ -0,0 +1,124 @@ +This file describes Sablotron installation procedure on Windows. To +find out more on Sablotron, please read the README file in the same +directory as this file. To get more info on installing on Linux or Unix, +please read the file INSTALL. + + +1. Intro +2. Sablotron binaries + 2.1. Binaries and dependencies +3. Building Sablotron from sources + 3.1. Notes on dependencies +4. More information + + +1. Intro +======================================== + +Sablotron is based on Expat XML parser. + +There is a significant change of how Expat is used since version +0.50. Sablotron _never_ looks for Expat under its own source tree, but +it supposes, that you have installed Expat library (1.95.1 or later) +in your system. + +To get Expat, visit + +http://sourceforge.net/projects/expat/ + +and download/install source/binary package. + + +2. Sablotron binaries +======================================== + +Download and extract the binary package. The only requirement is that +expat.dll must be installed in your system. You can download +the Sablot dll as binary from http://sourceforge.net/projects/expat/. +If you download a Windows binary of expat you have to rename it to expat.dll +(without a version number). Ensure expat.dll is on your PATH. + +Copy sablot.dll and sabcmd.exe somewhere to your PATH. + +2.1. Binaries and dependencies +------------------------------ + +To support other than built-in charsets you must use the iconv library. +To support JavaScript extensions you must use JS library by Mozilla. +For your convenience, There is a Sablotron binary package available +supporting both iconv and JavaScript. This package +(Sablot-Win-x.xx-FullPack.zip) actualy contains three independent products: + +- Sablotron binary (linking iconv) +- iconv binary (look at ftp://ftp.ilog.fr/pub/Users/haible/gnu/ for sources) +- JavaScript binary (look at http://www.mozilla.org/js/ for sources) + +Copy iconv.dll, js32.exe, sablot.dll and sabcmd.exe somewhere to your PATH. + + +3. Building Sablotron from sources +======================================== + +If you have downloaded the binary distribution, you can skip reading this +section. + +To build Sablotron from sources you have to build expat first. Download +expat sources from http://sourceforge.net/projects/expat/ and build it +(project file for MS DevStudio 6.0 is included). + +Then you need two files from the expat distribution: expat.h and expat.lib. +Copy both of them to a directory of your choice (c:\temp\lib, for example). + +Extract Sablot sources, go to Sablot-x.xx directory and type: + + nmake -f Makefile.nt EXPAT=<ex-path> EXPAT_LIB=<ex-lib-path> + +To build with iconv and JavaScript (these two dependencies are independent +of each other) type: + + nmake -f Makefile.nt EXPAT=<ex-path> EXPAT_LIB=<ex-lib-path> ICONV=<ic-path> JS=<js-path> + +where + <ex-path> is the path to the directory where you installed expat, and + <ex-lib-path> is the full name of expat.lib ***relative to <ex-path>***. + <ic-path> is the path where iconv is installed [OPTIONAL]. + <js-path> is the path where JavaScript is installed. + +For example, + nmake -f Makefile.nt EXPAT=c:\temp\expat EXPAT_LIB=lib\Debug\expat.lib +or + nmake -f Makefile.nt EXPAT=c:\expat-1.95.1 EXPAT_LIB=expat_1_95_1.lib ICONV=\libiconv-1.3 JS=jc:\js-1.5 + +Then install the Sablotron DLL and executable as described in section 2. +Make sure expat.dll (iconv.dll, js32.dll) are on your PATH. + + +3.1. Notes on dependencies +-------------------------- + +Iconv is a library for encoding conversions. You don't need to have it +installed, but in that case, you will only be able to use a few encodings. +Sablot has been tested on NT with Bruno Haible's implementation of iconv, +available from ftp://ftp.ilog.fr/pub/Users/haible/gnu/libiconv-1.3.tar.gz + +If you change the way Sablot or iconv are being built, make sure they both +use the DLL version of the C runtime library (msvcrt.dll), i.e. compile +with /MD. This is because they need to share the global variable errno. + +JavaScript is the popular Netscape-developed object scripting language. +Sablotron has to link the JS engine to enable extension functions. +See http://www.mozilla.org/js/ for more details or to get sources. + + +4. More information +======================================== + +If you are in trouble and haven't read the README file, you may find some +useful information there. You can check our website as well: + +http://www.gingerall.com + + +Enjoy Sablotron!! + +GA diff --git a/win32/sablot/README b/win32/sablot/README new file mode 100644 index 0000000..628a07a --- /dev/null +++ b/win32/sablot/README @@ -0,0 +1,87 @@ +This is the README file for the XSL processor called Sablotron + +1. What is it Sablotron? +2. Licensing +3. Warranty +4. Porting +5. Installation +6. Sablotron and Perl +7. More info + + +1. What is Sablotron? +======================================== + +Sablotron is an XML processor fully implemented in C++. It uses Expat by +James Clark as an XML parser. Sablotron implements XSLT 1.0, XPath 1.0 +and DOM Level2. + +Original creator of Sablotron is Ginger Alliance (www.gingerall.com). + + +2. Licensing +======================================== + +Sablotron is an Open Source project released under the MPL (Mozilla Public +License). Alternatively you may use Sablotron under the GNU's GPL license. + +Please, look at http://www.mozilla.org/MPL or +http://www.gnu.org/copyleft/gpl.html for further info. + +If you want use the debugger, you may optionally ask Sablotron to use +the GNU readline library. Since this library is available under GPL +only, you have to confirm, that you are accepting the GPL for the +whole Sablotron library. You can do it, if you set the SABLOT_GPL=1 +environment during the configure process. + +3. Warranty +======================================== + +We offer NO WARRANTY for using Sablotron in any conditions. + + +4. Porting +======================================== + +So far Sablotron is tested on Linux (RedHat 5.2, 6.0, 6.1, 6.2, and 7.0), +Windows (NT 4.0, 95, 98, 2000), Solaris (2.5.1, 2.6, 7), HP-UX (11.00), +FreeBSD (3.4, 4.1), OpenBSD (2.8 beta) and OpenServer (5.0.x). We hope, +that it should work on most other Unix systems as well. + +If you build Sablotron on some other system, please, let us +know. All porting activities are welcome. + + +5. Installation +======================================== + +To build and run Sablotron read the INSTALL file in the same +directory as this file. + +To install or compile Sablotron on Windows read the INSTALL_WIN +file instead. + + +6. Sablotron and Perl +======================================== + +If you'd like to call Sablotron from Perl, download the XML::Sablotron +module from our website (http://www.gingerall.org) or from CPAN. + +Follow the instructions from the README and INSTALL files of the Perl package. + + +7. More info +======================================== + +For more info (including Sablotron manual and API reference) look at +the Ginger Alliance web site: + +http://www.gingerall.org + +Maillist subscriptions are also available on this site. + + +Enjoy Sablotron!! + +GA diff --git a/win32/sablot/README_JS b/win32/sablot/README_JS new file mode 100644 index 0000000..c58760b --- /dev/null +++ b/win32/sablot/README_JS @@ -0,0 +1,107 @@ +Sablotron XSLT Extensions Readme File +===================================== + +1. Building +---------------------------------------- + +Extension elements and functions as defined by XSLT 1.0 are +implemented in Sablotron since the version 0.80. Sablotron recognizes +the extension element <http://www.exslt.org/functions/script> as +suggested by exslt.org. There are some exceptions described later in +this document. Please note, that this feature is still supposed to be +EXPERIMENTAL. + +If you want to benefit form this feature, you have to install +JavaScript engine from mozilla.org (SpiderMonkey). You have to do it +even when you have Mozilla browser installed, because the binary +browser installation doesn't include essential header files. + +An alternative way to get all neccessary JS files is to download and +install Charlie application framework (see gingerall.org). + +All you need to do on Sablotron side is to run the configure script +with --enable-javascript option. To use JS engine from Charlie +installation, type: ./configure --enable-javascript --enable-perlconnect + +If you have installed JS libraries into non-standard directories, you +need to set (and export) CPLUS_INCLUDE_PATH/LIBRARY_PATH to point to +directories where the header files/lib files (e.g. libjs.so) can be found. + +The default name for the linked library is 'js' (-ljs switch) - if you +need to override this value, you may set SABLOT_JSLIB environment +variable - the configure script uses -l$(SABLOT_JSLIB) in this case. + + +2. What is working +---------------------------------------- + +Sablotron supports JavaScript (ECMA) scripting as described in XSTL WD +1.1. with few exceptions: + +- DOM functions handling namespaces (with NS in their name) are not + supported (throw NOT_SUPPORTED exception) + +- DOM model is read only (as supported, may be changed later) + +- XSLTContext.stringValue is not supported + +- Document.getElementsByTagName{NS} are not supported + +- Element.getElementsByTagName{NS} are not supported + +- DTD definition nodes are not supported + + +The following summarizes what IS supported: + +- exslt:script element support + +- XSLTContext object + +- DOM2 acces to a processed document + +- type mapping between XPath and JavaScript including the XSLT + external object support + +- function-available() function + +- element-available() function + + +3. Sample stylesheet +---------------------------------------- + +<?xml version='1.0'?> +<xsl:stylesheet version='1.0' + xmlns:xsl='http://www.w3.org/1999/XSL/Transform' + xmlns:exslt='http://www.exslt.org/functions' + xmlns:my='http://gingerall.org/sablot/myfunc' + extension-element-prefixes='exslt' + exclude-result-prefixes='my'> + + <xsl:output method='xml' indent='yes'/> + + <exslt:script language='javascript' implements-prefix='my'> + <![CDATA[ + + function getNodeNames(nodeList) + { + ret = ''; + for (i = 0; i < nodeList.length; i++) + { + ret += nodeList[i].nodeName + " "; + } + return ret; + } + + ]]> + </exslt:script> + + + <xsl:template match='/'> + <output> + <xsl:value-of select='my:getNodeNames(*)'/> + </output> + </xsl:template> + +</xsl:stylesheet> diff --git a/win32/sablot/RELEASE b/win32/sablot/RELEASE new file mode 100644 index 0000000..196dc6f --- /dev/null +++ b/win32/sablot/RELEASE @@ -0,0 +1,309 @@ +#====================================================================# +# The release file for Sablotron # +#====================================================================# + +Version 0.96 +September 5, 2002 +------------- + - XSLT debugger implemented (sabcmd --debugger) + - API fixes for PHP extension + - log() function added to write to Sablot's log from JS scripts + - conflicts of top-level elements (variable, key, script) + initializations resolved + - fixed xslt_process buffer overflow + - fixed a bug of duplicate NS qualified attributes + - minor SXP bug fixes [using patches by Tim Crook] + - fixed a bug of the 'mod' operator [report by Jork Behrends] + - fixed a bug of sorting with multiple keys [report by Jork Behrends] + - fixed a bug of comparison [report by Jork Behrends] + - DOM: fixed a bug of the default XML namespace + [report by Albert Micheev] + - DOM: fixed a bug of PIs [report by Albert Micheev] + - improved configuration [by Melvyn Sopacua] + - fragment identifiers allowed in document() for custom + schemes [report by Bob Kline] + +Version 0.95 +June 24, 2002 +------------- + - DOM support upgraded to DOM Level2 + - xsl:strip-space and xsl:preserve-space implemented + - XSLT on external documents (accessed via callbacks) + - esxlt:document instruction implemented + - unparsed-entity-uri() function implemented + - added a manual page for sabcmd + - added a batch mode (multiple sources/stylesheets) + for sabcmd [by Stefan Behnel] + - added a system property to display version + - added a flag for document() to return an empty node-set when + it refers to non-existing document + - fixed a bug in relative paths to ext. entities + - fixed a sorting bug on Solaris + - fixed matching precedence for processing-instruction('lit') + - fixed a bug in xsl:decimal-format/format-number() + - fixed a bug in string() - comments excluded + - fixed a bug of RTFs in xsl:attribute (comment, pi) + - fixed a bug of generate-id for multiple documents + - various minor fixes + +Version 0.90 +March 8, 2002 +------------- + - xsl:import implemented + - fixed a bug in boolean expressions with nodesets + [reported by John Holland] + - current() implementation redesigned + - variables not allowed in 'match' attributes of templates + - fixed a bug in attribute value escaping [Christian Lefebvre] + - fixed a bug in ext. entities base URI [Christian Lefebvre] + - fixed a bug in sabcmd --base option [Christian Lefebvre] + - span not indented with html method + - processing-instruction() node test allows literal arguments + - only top-level variables and params visible in xsl:attribute-set + - fixed a bug in xsl:number + - fixed a bug in master situation cleanup + [patch proposed by Bill Hofmann and Bob Kline] + - fixed a bug in error reports from included files + - correct name of element, attribute and pi is checked + +Version 0.82 +January 30, 2002 +---------------- + - fixed the text output method + (nothing but text nodes are outputted) + - SCRIPT and STYLE escaped for xhtml + - fixed a bug in escaping attributtes (src, href) + - fixed a bug in aliasing doctype declaration + - fixed bugs related to Sun CC compiler [Tim Crook, Greg Cope] + - fixed a memory management bug [Tim Crook] + +Version 0.81 +January 16, 2002 +---------------- + - PUBLIC URLs in doctypes and ext. entities not parsed + by default (SAB_PARSE_PUBLIC_ENTITIES situation option) + - non-xsl elements allowed inside xsl:stylesheet + - xsl attributes not outputted anymore + - xsl:namespace-alias translates attributes as well + - content of SCRIPT and STYLE not escaped for html/xhtml + - Boolean attributes are abbreviated for html output method + +Version 0.80 +January 8, 2002 +--------------- + - implemented extensions (JavaScript functions + DOM, + extension-element-prefixes, xsl:fallback, function-available(), + element-available()) + - fixed a bux in the 'following' axis + - fixed current() assertion fault + - fixed "META tag placed outside HEAD" bug + - fixed a bug in abbreviated [position] expression in + template 'match' attribute + - implemented lang () function + - added 'namespace' attribute of xsl:element + - implemented 'exclude-result-prefixes' + - implemented xsl:attribute-set and 'use-attribute-sets' + - added 'namespace' attribute of xsl:attribute + - fixed translate() bug + - key() now works with document() + - fixed short stylesheet notation output bugs + - added use-attribute-sets attribute to xsl:copy + - minor bugs fixed + +Version 0.71 +November 5, 2001 +---------------- + - sum() bug fixed + - fixed a Solaris problem with key() [report by Archie Russell] + - SXP minor bugs fixed + - implemented axes 'following' and 'preceding' + - added an SXP callback for id() and a 'user data' pointer for + some callbacks + - fixed a namespace bug in SXP + - improved error messages + - fixed a bug in parsing external files [reported by Tim Crook] + - fixed a bug of current() in inner context() + [reported by Ian Davis] + - fixed a bug of calling XSLT instructions inside xsl:text + - fixed a SablorRunProcessorGen assertion bug + - fixed a scheme handler bug [reported by Tim Crook] + +Version 0.70 +September 17, 2001 +------------------ + - added Sablotron XPath processor + - implemented xsl:key and key() + - implemented format-number() and xsl:decimal-format + - implemented xsl:number + - fixed namespace treatment in the DOM part + - the string functions now work correctly with non-ASCII text + [fix partly by Christian Lefebvre] + - fixed a bug in the DOM interface + - fixes for BeOS [Gabe Bauman] + +Version 0.65 +August 15, 2001 +--------------- + - improved HTML indentation + - fixed bug causing output of XML declaration when method='text' + [reported by Jason Dumler] + - parameter entities are always parsed + - xml declaration ends with ?> using HTML method + - > is escaped in HTML + - fixed the RTF serialization [report by Christian Lefebvre] + - fixed the non-ASCII character escaping in URIs + - changed SDOM_xql so it uses namespace declarations on the + document element + - a few more minor bug fixes + +Version 0.60 +June 13, 2001 +------------- + - implemented a subset of DOM level 1 (see sdom.h) + - added sdom.cpp and sdom.h to the project + - new interface methods for Sablotron (preferred use) + SablotRunProcessorGen, SablotAddArgTree, SablotAddArgBuffer + SablotAddParam + - added a simple support of <xsl:output indent> + - fixed a namespace node name bug [report by A. Nuzhdov] + - added support for parametr entities [suggested by Peter Blum] + - fixed a bug related to evaluation of arithmetic + expressions [reported by Sam Brauer] + - fixed a minor bug related to the xhtml output method + +Version 0.52 +March 29, 2001 +-------------- + + - added support for encoding conversions using iconv + - all memory leaks occuring on processing errors are fixed + [thanks for project support to Mitel Networks Corporation] + - fixed the "duplicit attribute error" bug [Marc Lehmann] + - fixed a bug in context evaluation [reported by Marco Guazzone] + - fixed several bugs related to the hash table [Tom Moog] + +Version 0.51 +------------ + - a maintenance release + - fixed the quoting of quotes in HTML output [patch by Mark Bartel]. + - fixes by Tim Crook for AIX + - Sablot runs even without wcsxfrm() or wchar.h (although + it doesn't sort) + - the problem with the type of an iconv() call argument is fixed + - added support for the xhtml output method [Marc Lehmann] + - fixed bug causing elements being output as <:x> [Andreas Buschmann] + - xsl:sort works if wcsxfrm() is not present (sorts naively though) + +Version 0.50 +------------ + - added match predicate optimization + - added proper character class handling [patch by Marc Lehmann] + - fixed a bug in last() [reported by Dirk Siebnich] + - fixed a problem with empty NS decls [reported by S. Tryggvason] + - added SablotSetEncoding(). + - fixed an error with xml:lang [reported by Nicolas Sauret] + - fixed the 'divide by zero' MSVC warning [suggestion by Dirk Siebnich] + - added support for xsl:sort + - fixed an expression parsing error [report by Clayton Cottingham] + - fixed a bug in current() [report by Daniel Hammond] + - minor fix for RedHat 7 [David Hedbor] + - fixed another bug in translate() + - fixed a bug related to starts-with() [reported by Myoungki Kim] + - fixed top-level elements check + +Version 0.44 +------------ + - Fixed an elusive bug in HashTable causing occassional crashes + - Added the namespace-alias() and message() functions + [patch by Major] + - More fixes related to: XML namespace declarations, nested + stylesheet inclusion, Byte Order Mark in UTF-8 doc, + "SYSTEM" in DOCTYPE declaration, the preceding-sibling and + following-sibling axes, iconv. + - Patch for OpenBSD [Anil Madhavapeddy]. + +Version 0.43 +------------ + - Fundamental changes to improve performance. + - Added Latin2 input support [thanks to Matt Sergeant] + - Added Japanese encodings support [patch by Rui Hirokawa] + - Fixed treatment of NaNs etc. [based on patch by Robin Houston] + - Minor bug fixes. + - Added platform.cpp for platform-dependent code. + - Added code to generate the <META> tag when using the + html output method. + +Version 0.42 +------------ + - Sablotron is now thread-safe [thanks to Leo Yu]. + - Output recoding can be done if iconv is present [Sven Neumann]. + - Match predicates work. + - Several bug fixes [Robin Houston]. + - Added: + MiscHandler with documentInfo() + SablotSetBaseForScheme() + the current() function [Robin] + -The actual error code is returned from SablotProcess etc. + +Version 0.41 +------------ + - Sablotron now uses autoconf and works with an unmodified + copy of expat. Its layout is a bit different. + - Bug fix (reported by Kay Sievers). + +Version 0.4 +----------- + - The license was changed to MPL+GPL. + - Output conformance improved considerably (proper escaping + etc.). + - A SAX interface to the result document is available so one + can bypass the construction to the result tree. + - Some changes to the Sablotron interface. + - Implemented <xsl:output>, <xsl:comment>, + <xsl:processing-instruction>. + - External general entities work now. + - Bug fixes (thanks for reports and/or patches to Hans Raaf, + Florian Hars, <robin@kitsite.com>, Mark W. Eichin and + Robin Berjon). + - Performance improvements. + +Version 0.36 +------------ + - optimized string handling, resulting in a significant + performance improvement + - implemented <xsl:copy> + +Version 0.35 +------------ + - fixed a bug concerning the descendant axis, reported by Les + Woolsey + - included the Solaris port (patch contributed by Steven Rowe) + - added SablotSetLog() for setting the log file. Turned + logging off by default. + - added SablotRegMessageHandler() for setting a message + handler. Using this, you can handle all error/warning/log + messages the way you like. + - if the xsl prefix is bound to an obsolete URI, a warning is + issued + - added SablotProcessStringsWithBase() which makes it possible + to override the stylesheet's physical URI with a 'hard base + URI' + - added some command-line switches to sabcmd, e.g. --log-file + and --measure + +Version 0.34 +------------ + - bug fixes + - major memory leak fixes + +Version 0.33 +------------ +CVS: tag SABLOT_0_33 (tom, main trunk) + - this release log started + - first major release following Sablotron 0.3 + - added URI processing with relative URI resolution + - added support for the arg: scheme (named buffers) + - added support for passing global parameters + - introduced the SablotProcess() function as a general interface + - sabcmd can pass named buffers as well as the global params + - added a lot of core library functions diff --git a/win32/sablot/bin/sabcmd.exe b/win32/sablot/bin/sabcmd.exe Binary files differnew file mode 100644 index 0000000..8fb2698 --- /dev/null +++ b/win32/sablot/bin/sabcmd.exe diff --git a/win32/sablot/bin/sablot.dll b/win32/sablot/bin/sablot.dll Binary files differnew file mode 100644 index 0000000..c247b57 --- /dev/null +++ b/win32/sablot/bin/sablot.dll diff --git a/win32/sablot/include/sabdbg.h b/win32/sablot/include/sabdbg.h new file mode 100644 index 0000000..1428af6 --- /dev/null +++ b/win32/sablot/include/sabdbg.h @@ -0,0 +1,58 @@ +/* + * The contents of this file are subject to the Mozilla Public + * License Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS + * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or + * implied. See the License for the specific language governing + * rights and limitations under the License. + * + * The Original Code is the Sablotron XSLT Processor. + * + * The Initial Developer of the Original Code is Ginger Alliance Ltd. + * Portions created by Ginger Alliance are Copyright (C) 2000-2002 + * Ginger Alliance Ltd. All Rights Reserved. + * + * Contributor(s): + * + * Alternatively, the contents of this file may be used under the + * terms of the GNU General Public License Version 2 or later (the + * "GPL"), in which case the provisions of the GPL are applicable + * instead of those above. If you wish to allow use of your + * version of this file only under the terms of the GPL and not to + * allow others to use your version of this file under the MPL, + * indicate your decision by deleting the provisions above and + * replace them with the notice and other provisions required by + * the GPL. If you do not delete the provisions above, a recipient + * may use your version of this file under either the MPL or the + * GPL. + */ + +#ifndef SabdbgHIncl +#define SabdbgHIncl + +#define SablotAsExport +#include <sablot.h> + +/************************************************************/ +/* DEBUGGER stuff */ +/************************************************************/ + +Declare +( + void debuggerInit(); +) + +Declare +( + void debuggerDone(); +) + +Declare +( + void debuggerEnterIdle(); +) + +#endif diff --git a/win32/sablot/include/sablot.h b/win32/sablot/include/sablot.h new file mode 100644 index 0000000..71b05a2 --- /dev/null +++ b/win32/sablot/include/sablot.h @@ -0,0 +1,560 @@ +/* + * The contents of this file are subject to the Mozilla Public + * License Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS + * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or + * implied. See the License for the specific language governing + * rights and limitations under the License. + * + * The Original Code is the Sablotron XSLT Processor. + * + * The Initial Developer of the Original Code is Ginger Alliance Ltd. + * Portions created by Ginger Alliance are Copyright (C) 2000-2002 + * Ginger Alliance Ltd. All Rights Reserved. + * + * Contributor(s): + * + * Alternatively, the contents of this file may be used under the + * terms of the GNU General Public License Version 2 or later (the + * "GPL"), in which case the provisions of the GPL are applicable + * instead of those above. If you wish to allow use of your + * version of this file only under the terms of the GPL and not to + * allow others to use your version of this file under the MPL, + * indicate your decision by deleting the provisions above and + * replace them with the notice and other provisions required by + * the GPL. If you do not delete the provisions above, a recipient + * may use your version of this file under either the MPL or the + * GPL. + */ + +/* +sablot.h +TK Dec 14, 99 +header file for Sablot.cpp +*/ + +#ifndef SablotHIncl +#define SablotHIncl + + +/* version info */ +#define SAB_VERSION "0.96" +#define SAB_DATE "Sep 5, 2002" + +/* common types */ +typedef void *SablotHandle; +typedef void *SDOM_Document; +typedef void *SablotSituation; + +#if defined(WIN32) && defined(_MSC_VER) +#if defined(SablotAsExport) +#define DllImpExp __declspec( dllexport ) +#else +#define DllImpExp __declspec( dllimport ) +#endif /* SablotAsExport */ +#else /* WIN32 && _MSC_VER */ +#define DllImpExp extern +#endif + +#ifdef __cplusplus +#define DeclBegin extern "C" { DllImpExp +#define DeclEnd } +#else +#define DeclBegin DllImpExp +#define DeclEnd +#endif + +#define Declare(STATEMENT) DeclBegin STATEMENT DeclEnd + +#include "shandler.h" +#include "sxpath.h" + +typedef enum +{ + SAB_NO_ERROR_REPORTING = 0x1, + SAB_PARSE_PUBLIC_ENTITIES = 0x2, + SAB_DISABLE_ADDING_META = 0x4, + SAB_DISABLE_STRIPPING = 0x8, + SAB_IGNORE_DOC_NOT_FOUND =0x10 +} SablotFlag; + +/* create a new document */ + +Declare +( + int SablotCreateDocument( + SablotSituation S, + SDOM_Document *D); +) + +/* parse in a document from the given URI */ + +Declare +( + int SablotParse( + SablotSituation S, + const char *uri, + SDOM_Document *D); +) + +/* parse a document given in an in-memory buffer */ + +Declare +( + int SablotParseBuffer( + SablotSituation S, + const char *buffer, + SDOM_Document *D); +) + +Declare +( + int SablotParseStylesheet( + SablotSituation S, + const char *uri, + SDOM_Document *D); +) + + +Declare +( + int SablotParseStylesheetBuffer( + SablotSituation S, + const char *buffer, + SDOM_Document *D); +) + + +/* lock document before using it */ + +Declare +( + int SablotLockDocument( + SablotSituation S, + SDOM_Document D); +) + +Declare +( + int SablotDestroyDocument( + SablotSituation S, + SDOM_Document D); +) + +Declare +( + int SablotAddParam( + SablotSituation S, + void *processor_, + const char *paramName, + const char *paramValue); +) + +Declare +( + int SablotAddArgBuffer( + SablotSituation S, + void *processor_, + const char *argName, + const char *bufferValue); +) + +Declare( + int SablotAddArgTree( + SablotSituation S, + void *processor_, + const char *argName, + SDOM_Document tree); +) + +Declare +( + int SablotRunProcessorGen( + SablotSituation S, + void *processor_, + const char *sheetURI, + const char *inputURI, + const char *resultURI); +) + +Declare +( + int SablotRunProcessorExt( + SablotSituation S, + void *processor_, + const char *sheetURI, + const char *resultURI, + NodeHandle doc); +) + +/* + * Situation functions + */ + +/* Creates a new situation. */ + +Declare +( + int SablotCreateSituation(SablotSituation *sPtr); +) + +/* Sets situation flags. */ + +Declare +( + int SablotSetOptions(SablotSituation S, int flags); +) + +Declare +( + int SablotClearSituation(SablotSituation S); +) + +Declare +( + const char* SablotGetErrorURI(SablotSituation S); +) + +Declare +( + int SablotGetErrorLine(SablotSituation S); +) + +Declare +( + const char* SablotGetErrorMsg(SablotSituation S); +) + +/* Disposes of the situation. */ + +Declare +( + int SablotDestroySituation(SablotSituation S); +) + + +/***************************************************************** +SablotCreateProcessor +creates a new instance of the processor +*****************************************************************/ + +Declare +( + int SablotCreateProcessor(SablotHandle *processorPtr); +) + +/***************************************************************** +SablotCreateProcessorForSituation +use this instead of SablotCreateProcessor with any of the +situation-aware functions like SablotRunProcessorGen, +SablotAddArgTree etc. +*****************************************************************/ + +Declare +( + int SablotCreateProcessorForSituation(SablotSituation S, void **processorPtr); +) + + +/***************************************************************** +SablotDestroyProcessor +destroys the processor +*****************************************************************/ + +Declare +( + int SablotDestroyProcessor(SablotHandle processor_); +) + +/***************************************************************** +SablotRunProcessor +runs the existing instance on the given documents +*****************************************************************/ + +Declare +( + int SablotRunProcessor(SablotHandle processor_, + const char *sheetURI, + const char *inputURI, + const char *resultURI, + const char **params, + const char **arguments); +) + +/***************************************************************** +SablotGetResultArg +gets the result arg buffer from the last Sablot run +the buffer is identified by an URI (to enable output to + multiple documents) +*****************************************************************/ + +Declare +( + int SablotGetResultArg(SablotHandle processor_, + const char *argURI, + char **argValue); +) + +/***************************************************************** +SablotFreeResultArgs +kill all result arg buffers from the last Sablot run +*****************************************************************/ + +Declare +( + int SablotFreeResultArgs(SablotHandle processor_); +) + +/***************************************************************** +SablotRegHandler +General handler registrator. + type the type of the handler (scheme etc.) + handler pointer to the struct of callbacks of the given type + userData the user data this handler wishes to receive +*****************************************************************/ + +Declare +( + int SablotRegHandler( + SablotHandle processor_, + HandlerType type, /* HLR_MESSAGE, HLR_SCHEME, HLR_SAX */ + void *handler, + void *userData); +) + +/***************************************************************** +SablotUnregHandler + + General handler unregistrator. +*****************************************************************/ + +Declare +( + int SablotUnregHandler( + SablotHandle processor_, + HandlerType type, /* HLR_MESSAGE, HLR_SCHEME, HLR_SAX */ + void *handler, + void *userData); +) + +/***************************************************************** +SablotSetBase + + overrides the default base URI for relative reference + resolution. +*****************************************************************/ + +Declare +( + int SablotSetBase( + SablotHandle processor_, + const char *theBase); +) + +/***************************************************************** +SablotSetBaseForScheme + + a softer form of SablotSetBase: the hard base URI will only + be in effect for relative references whose bases have + the given scheme. + + Example: assume we call + SablotSetBaseForScheme( P, "arg", "http://server" ) + and then runs a stylesheet at "arg:/xxx" which contains "document('foo.xml')". + The relative reference is resolved as "http://server/foo.xml" + but if the stylesheet were at "file:/xxx" it would become "file:/foo.xml". +*****************************************************************/ + +Declare +( + int SablotSetBaseForScheme(void* processor_, + const char *scheme, + const char *base); +) + +/**************************************************************** +SablotSetLog + + sets the logging options. Logging is off by default. +*****************************************************************/ + +Declare +( + int SablotSetLog( + SablotHandle processor_, + const char *logFilename, + int logLevel); +) + + +/***************************************************************** +SablotProcess + + the main function published by Sablotron. Feeds given XML + input to a stylesheet. Both of these as well as the location for + output are identified by a URI. One can also pass top-level + stylesheet parameters and named buffers ('args'). +ARGS + sheetURI URI of the XSLT stylesheet + inputURI URI of the XML document + resultURI URI of the output document + params a NULL-terminated array of char*'s defining the top-level + parameters to be passed, interpreted as a + sequence of (name, value) pairs. + arguments a NULL-terminated array of char*'s defining the named + buffers to be passed, interpreted as a + sequence of (name, value) pairs. Both params and arguments + may be NULL. +RETURNS + . nonzero iff error + resultArg in case output goes to a named buffer, *resultArg is set to + point to it (otherwise to NULL). Free it using SablotFree(). +*****************************************************************/ + +Declare +( + int SablotProcess( + const char *sheetURI, const char *inputURI, const char *resultURI, + const char **params, const char **arguments, char **resultArg); +) + +/***************************************************************** +SablotProcessFiles + + calls SablotProcess to process files identified by their + file names. No named buffers or params are passed. Included + for backward compatibility. +ARGUMENTS + styleSheetName, inputName, resultName + file names of the stylesheet, XML input and output, + respectively. +RETURNS + . error flag +*****************************************************************/ + +Declare +( + int SablotProcessFiles( + const char *styleSheetName, + const char *inputName, + const char *resultName); +) + +/***************************************************************** +SablotProcessStrings + + calls SablotProcess to process documents in memory, passing + pointers to the documents. No named buffers or params are passed. + Included for backward compatibility. +ARGUMENTS + styleSheetStr, inputStr + text of the stylesheet and the XML input +RETURNS + . error flag + *resultStr pointer to the newly allocated block containing + the result +*****************************************************************/ + +Declare +( + int SablotProcessStrings( + const char *styleSheetStr, + const char *inputStr, + char **resultStr); +) + +/***************************************************************** +SablotProcessStringsWithBase + + Like SablotProcessStrings but lets you pass an URI that replaces + the stylesheet's URI in relative address resolution. + +ARGUMENTS + styleSheetStr, inputStr + text of the stylesheet and the XML input + theHardBase the "hard base URI" replacing the stylesheet's URI + in all relevant situations +RETURNS + . error flag + *resultStr pointer to the newly allocated block containing + the result +*****************************************************************/ + +Declare +( + int SablotProcessStringsWithBase( + const char *styleSheetStr, + const char *inputStr, + char **resultStr, + const char *theHardBase); +) + +/***************************************************************** +SablotFree + + Frees the Sablotron-allocated buffer. The user cannot free it + directly by free(). +*****************************************************************/ + +Declare +( + int SablotFree(char *resultStr); +) + +/***************************************************************** +SablotClearError + + Clears the pending error for the given instance of Sablot. +*****************************************************************/ + +Declare +( + int SablotClearError(SablotHandle processor_); +) + +/***************************************************************** +SablotGetMsgText + + Returns the text of a message with the given code. +*****************************************************************/ + +Declare +( + const char* SablotGetMsgText(int code); +) + +/***************************************************************** +SablotSetInstanceData +*****************************************************************/ + +Declare +( + void SablotSetInstanceData(SablotHandle processor_, void *idata); +) + +/***************************************************************** +SablotGetInstanceData +*****************************************************************/ + +Declare +( + void* SablotGetInstanceData(SablotHandle processor_); +) + +/* + SablotSetEncoding + sets the output encoding to be used regardless of the encoding + specified by the stylesheet + To unset, call with encoding_ NULL. +*/ + +Declare +( + void SablotSetEncoding(SablotHandle processor_, char *encoding_); +) + +#endif /* defined SablotHIncl */ + diff --git a/win32/sablot/include/sablot.lib b/win32/sablot/include/sablot.lib Binary files differnew file mode 100644 index 0000000..e002170 --- /dev/null +++ b/win32/sablot/include/sablot.lib diff --git a/win32/sablot/include/sdom.h b/win32/sablot/include/sdom.h new file mode 100644 index 0000000..cee53ff --- /dev/null +++ b/win32/sablot/include/sdom.h @@ -0,0 +1,921 @@ +/* + * The contents of this file are subject to the Mozilla Public + * License Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS + * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or + * implied. See the License for the specific language governing + * rights and limitations under the License. + * + * The Original Code is the Sablotron XSLT Processor. + * + * The Initial Developer of the Original Code is Ginger Alliance Ltd. + * Portions created by Ginger Alliance are Copyright (C) 2000-2002 + * Ginger Alliance Ltd. All Rights Reserved. + * + * Contributor(s): + * + * Alternatively, the contents of this file may be used under the + * terms of the GNU General Public License Version 2 or later (the + * "GPL"), in which case the provisions of the GPL are applicable + * instead of those above. If you wish to allow use of your + * version of this file only under the terms of the GPL and not to + * allow others to use your version of this file under the MPL, + * indicate your decision by deleting the provisions above and + * replace them with the notice and other provisions required by + * the GPL. If you do not delete the provisions above, a recipient + * may use your version of this file under either the MPL or the + * GPL. + */ + +/* sdom.h */ + +#ifndef SDomHIncl +#define SDomHIncl + +//used for DISABLE_DOM only anything else is DANGEROUS +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#ifndef DISABLE_DOM + +#include "sablot.h" + +/** + ** + ** types + ** + **/ + +typedef void* SDOM_Node; +typedef void* SDOM_NodeList; +/* typedef void* SDOM_Document; */ + +typedef enum +{ + SDOM_ELEMENT_NODE = 1, + SDOM_ATTRIBUTE_NODE = 2, + SDOM_TEXT_NODE = 3, + SDOM_CDATA_SECTION_NODE = 4, + SDOM_ENTITY_REFERENCE_NODE = 5, + SDOM_ENTITY_NODE = 6, + SDOM_PROCESSING_INSTRUCTION_NODE = 7, + SDOM_COMMENT_NODE = 8, + SDOM_DOCUMENT_NODE = 9, + SDOM_DOCUMENT_TYPE_NODE = 10, + SDOM_DOCUMENT_FRAGMENT_NODE = 11, + SDOM_NOTATION_NODE = 12, + SDOM_OTHER_NODE /* not in spec */ +} +SDOM_NodeType; + +/* + * we define DOM_char as char, although the spec says strings should be + * UTF_16. Needs check. + */ +typedef char SDOM_char; + + +/* + * DomException + * will be an enum of all the values given in the spec. + */ + +/* + INDEX_SIZE_ERR, 1 + STRING_SIZE_ERR, 2 + HIERARCHY_REQUEST_ERR, 3 + WRONG_DOCUMENT_ERR, 4 + INVALID_CHARACTER_ERR, 5 + NO_DATA_ALLOWED_ERR, 6 + NO_MODIFICATION_ALLOWED_ERR, 7 + NOT_FOUND_ERR, 8 + NOT_SUPPORTED_ERR, 9 + INUSE_ATTRIBUTE_ERR, 10 + INVALID_STATE_ERR, 11 + SYNTAX_ERR, 12 + INVALID_MODIFICATION_ERR, 13 + NAMESPACE_ERR, 14 + INVALID_ACCESS_ERR, 15 +*/ + +typedef enum +{ + SDOM_OK, + SDOM_INDEX_SIZE_ERR = 1, + SDOM_DOMSTRING_SIZE_ERR = 2, + SDOM_HIERARCHY_REQUEST_ERR = 3, + SDOM_WRONG_DOCUMENT_ERR = 4, + SDOM_INVALID_CHARACTER_ERR = 5, + SDOM_NO_DATA_ALLOWED_ERR = 6, + SDOM_NO_MODIFICATION_ALLOWED_ERR = 7, + SDOM_NOT_FOUND_ERR = 8, + SDOM_NOT_SUPPORTED_ERR = 9, + SDOM_INUSE_ATTRIBUTE_ERR = 10, + SDOM_INVALID_STATE_ERR = 11, + SDOM_SYNTAX_ERR = 12, + SDOM_INVALID_MODIFICATION_ERR = 13, + SDOM_NAMESPACE_ERR = 14, + SDOM_INVALID_ACCESS_ERR = 15, + /* not in spec below this point: */ + SDOM_INVALID_NODE_TYPE, /* eg passing a non-element for an element */ + SDOM_QUERY_PARSE_ERR, + SDOM_QUERY_EXECUTION_ERR, + SDOM_NOT_OK +} SDOM_Exception; + +/** + ** + ** Node + ** n is always the node the function is to operate on (kind of 'this') + ** + **/ + +/* + createElement + Creates a new element Node with NULL parent and specified owner, + and returns it in *pn. + Raises: + */ +Declare( +SDOM_Exception SDOM_createElement( + SablotSituation s, + SDOM_Document d, + SDOM_Node *pn, + const SDOM_char *tagName); +) + +//_JP_ v +/* + createElementNS + Creates a new element Node with NULL parent and specified owner, + and returns it in *pn. + Raises: + SDOM_NAMESPACE_ERR or SDOM_INVALID_CHARACTER_ERR when qName or uri malformed + */ +Declare( +SDOM_Exception SDOM_createElementNS( + SablotSituation s, + SDOM_Document d, + SDOM_Node *pn, + const SDOM_char *uri, + const SDOM_char *qName); +) +//_JP_ ^ + +/* + createAttribute + Creates a new attribute Node and returns it in *pn. + Raises: + */ + +Declare +( +SDOM_Exception SDOM_createAttribute( + SablotSituation s, + SDOM_Document d, + SDOM_Node *pn, + const SDOM_char *name); +) + +//_JP_ v +/* + createAttributeNS + Creates a new attribute Node and returns it in *pn. + Raises: + SDOM_NAMESPACE_ERR or SDOM_INVALID_CHARACTER_ERR when qName or uri malformed + */ +Declare( +SDOM_Exception SDOM_createAttributeNS( + SablotSituation s, + SDOM_Document d, + SDOM_Node *pn, + const SDOM_char *uri, + const SDOM_char *qName); +) +//_JP_ ^ + + +/* + createTextNode + Raises: + */ + +Declare +( +SDOM_Exception SDOM_createTextNode( + SablotSituation s, + SDOM_Document d, + SDOM_Node *pn, + const SDOM_char *data); +) + +/* + createAttribute + Creates a new attribute Node and returns it in *pn. + Raises: + */ + +Declare +( +SDOM_Exception SDOM_createCDATASection( + SablotSituation s, + SDOM_Document d, + SDOM_Node *pn, + const SDOM_char *data); +) + +/* + createComment + Raises: + */ + +Declare +( +SDOM_Exception SDOM_createComment( + SablotSituation s, + SDOM_Document d, + SDOM_Node *pn, + const SDOM_char *data); +) + +/* + createProcessingInstruction + Raises: + */ + +Declare +( +SDOM_Exception SDOM_createProcessingInstruction( + SablotSituation s, + SDOM_Document d, + SDOM_Node *pn, + const SDOM_char *target, + const SDOM_char *data); +) + +/* + disposeNode + Frees all memory used by the node. + Raises: + !SPEC: Not in the spec. + */ + +Declare +( +SDOM_Exception SDOM_disposeNode( + SablotSituation s, + SDOM_Node n); +) + + +/* + getNodeType + Returns the node type in *pType. + Raises: + */ + +Declare +( +SDOM_Exception SDOM_getNodeType( + SablotSituation s, + SDOM_Node n, + SDOM_NodeType *pType); +) + + +/* + getNodeName + Returns the pointer to the name of the node in *pName. +*/ + +Declare +( +SDOM_Exception SDOM_getNodeName( + SablotSituation s, + SDOM_Node n, + SDOM_char **pName); +) + +Declare +( +SDOM_Exception SDOM_getNodeNSUri( + SablotSituation s, + SDOM_Node n, + SDOM_char **pName); +) + +Declare +( +SDOM_Exception SDOM_getNodePrefix( + SablotSituation s, + SDOM_Node n, + SDOM_char **pName); +) + +Declare +( +SDOM_Exception SDOM_getNodeLocalName( + SablotSituation s, + SDOM_Node n, + SDOM_char **pName); +) + +/* + setNodeName + Sets the node name. +*/ +Declare( +SDOM_Exception SDOM_setNodeName( + SablotSituation s, + SDOM_Node n, + const SDOM_char *name); +) + +/* + getNodeValue + Returns in *pValue the string value of the node. +*/ +Declare( +SDOM_Exception SDOM_getNodeValue( + SablotSituation s, + SDOM_Node n, + SDOM_char **pValue); +) + +/* + setNodeValue + Sets the node value. +*/ +Declare( +SDOM_Exception SDOM_setNodeValue( + SablotSituation s, + SDOM_Node n, + const SDOM_char *value); +) + +/* + getParentNode + Returns the parent in *pParent. +*/ +Declare( +SDOM_Exception SDOM_getParentNode( + SablotSituation s, + SDOM_Node n, + SDOM_Node *pParent); +) + +/* + getFirstChild + Returns the first child in *pFirstChild. +*/ +Declare( +SDOM_Exception SDOM_getFirstChild( + SablotSituation s, + SDOM_Node n, + SDOM_Node *pFirstChild); +) + +/* + getLastChild + Returns the last child in *pLastChild. +*/ +Declare( +SDOM_Exception SDOM_getLastChild( + SablotSituation s, + SDOM_Node n, + SDOM_Node *pLastChild); +) + +/* + getPreviousSibling + Returns the previous sibling in *pPreviousSibling. +*/ +Declare( +SDOM_Exception SDOM_getPreviousSibling( + SablotSituation s, + SDOM_Node n, + SDOM_Node *pPreviousSibling); +) + +/* + getNextSibling + Returns the next sibling in *pNextSibling. +*/ +Declare( +SDOM_Exception SDOM_getNextSibling( + SablotSituation s, + SDOM_Node n, + SDOM_Node *pNextSibling); +) + +/* + getChildNode + Returns the child node in specified index or NULL. +*/ +Declare( +SDOM_Exception SDOM_getChildNodeIndex( + SablotSituation s, + SDOM_Node n, + int index, + SDOM_Node *pChildNode); +) + +/* + getChildNodeCount + Returns the count of child nodes. +*/ +Declare( +SDOM_Exception SDOM_getChildNodeCount( + SablotSituation s, + SDOM_Node n, + int *count); +) + +/* + getOwnerDocument + Returns, in *pOwnerDocument, the Document owning the node. + !SPEC: If the node is standalone, returns NULL. +*/ +Declare( +SDOM_Exception SDOM_getOwnerDocument( + SablotSituation s, + SDOM_Node n, + SDOM_Document *pOwnerDocument); +) + +/* + insertBefore + Inserts newChild as n's child, just before refChild. +*/ +Declare( +SDOM_Exception SDOM_insertBefore( + SablotSituation s, + SDOM_Node n, + SDOM_Node newChild, + SDOM_Node refChild); +) + +/* + removeChild + Removes oldChild (a child of n) without deallocating it. +*/ + +Declare( +SDOM_Exception SDOM_removeChild( + SablotSituation s, + SDOM_Node n, + SDOM_Node oldChild); +) + +/* + replaceChild + Replaces oldChild (a child of n) by newChild. +*/ +Declare( +SDOM_Exception SDOM_replaceChild( + SablotSituation s, + SDOM_Node n, + SDOM_Node newChild, + SDOM_Node oldChild); +) + +/* + appendChild + Appends newChild as the last of n's children. +*/ +Declare( +SDOM_Exception SDOM_appendChild( + SablotSituation s, + SDOM_Node n, + SDOM_Node newChild); +) + +/* + cloneNode + Duplicates the node, returning the result in *clone. + If deep is nonzero, the cloning process will be recursive. +*/ + +Declare( +SDOM_Exception SDOM_cloneNode( + SablotSituation s, + SDOM_Node n, + int deep, + SDOM_Node *clone); +) + +/* + cloneForeignNode + Duplicates a node from a different doc, returning the result in *clone. + If deep is nonzero, the cloning process will be recursive. + As opposed to cloneNode, represents a Document method +*/ + +Declare( +SDOM_Exception SDOM_cloneForeignNode( + SablotSituation s, + SDOM_Document d, + SDOM_Node n, + int deep, + SDOM_Node *clone); +) + +/* + getAttribute + Returns, in *pValue, the contents of the attribute (of n) named 'name'. +*/ +Declare( +SDOM_Exception SDOM_getAttribute( + SablotSituation s, + SDOM_Node n, + const SDOM_char *name, + SDOM_char **pValue); +) + +/* + getAttributeNS + Returns, in *pValue, the contents of the attribute (of n) with 'uri' and 'local'. +*/ +Declare( +SDOM_Exception SDOM_getAttributeNS( + SablotSituation s, + SDOM_Node n, + SDOM_char *uri, + SDOM_char *local, + SDOM_char **pValue); +) + +/* + getAttributeNode + Returns, in *attr, the attribute named 'name'. +*/ +Declare( +SDOM_Exception SDOM_getAttributeNode( + SablotSituation s, + SDOM_Node n, + const SDOM_char *name, + SDOM_Node *attr); +) + +/* + getAttributeNodeNS + Returns, in *attr, the n's attribute with uri and local. +*/ +Declare( +SDOM_Exception SDOM_getAttributeNodeNS( + SablotSituation s, + SDOM_Node n, + SDOM_char *uri, + SDOM_char *local, + SDOM_Node *attr); +) + +//_JP_ v +/* + getAttributeNodeIndex + Returns, in *attr, the index'th attribute, namespaces precede other atts. +*/ +Declare( +SDOM_Exception SDOM_getAttributeNodeIndex( + SablotSituation s, + SDOM_Node n, + const int index, + SDOM_Node *attr); +) + +/* + getAttributeNodeCount + Returns, in *count, the count of atts, including namespaces in scope. +*/ +Declare( +SDOM_Exception SDOM_getAttributeNodeCount( + SablotSituation s, + SDOM_Node n, + int *count); +) +//_JP_ ^ + +/* + setAttribute + Assigns the given value to n's attribute named 'name'. +*/ +Declare( +SDOM_Exception SDOM_setAttribute( + SablotSituation s, + SDOM_Node n, + const SDOM_char *name, + const SDOM_char *value); +) + +/* + setAttributeNS + Assigns the given value to n's attribute with 'uri' and qualified name 'qName'. +*/ +Declare( +SDOM_Exception SDOM_setAttributeNS( + SablotSituation s, + SDOM_Node n, + const SDOM_char *uri, + const SDOM_char *qName, + const SDOM_char *value); +) + +/* + setAttributeNode + Assigns the given attnode as n's attribute. + Returns replaced, if was replaced some node with the same nodeName. +*/ +Declare( +SDOM_Exception SDOM_setAttributeNode( + SablotSituation s, + SDOM_Node n, + SDOM_Node attnode, + SDOM_Node *replaced); +) + +/* + setAttributeNodeNS + Assigns the given attnode to n's attribute with attnode.uri and attnode.localname. + Returns replaced, if was replaced some node. +*/ +Declare( +SDOM_Exception SDOM_setAttributeNodeNS( + SablotSituation s, + SDOM_Node n, + SDOM_Node attnode, + SDOM_Node *replaced); +) + +/* + removeAttribute + Removes the given attribute of n. +*/ +Declare( +SDOM_Exception SDOM_removeAttribute( + SablotSituation s, + SDOM_Node n, + const SDOM_char *name); +) + +/* + removeAttribute + Removes the given attribute of n. +*/ +Declare( +SDOM_Exception SDOM_removeAttributeNode( + SablotSituation s, + SDOM_Node n, + SDOM_Node attnode, + SDOM_Node *removed); +) +/* + attributeElement + returns owner element of attribute specified +*/ +Declare( +SDOM_Exception SDOM_getAttributeElement( + SablotSituation s, + SDOM_Node attr, + SDOM_Node *owner); +) + +/* + getAttributeList + Returns, in *pAttrList, the list of all attributes of the element n. + !SPEC: Not in spec. +*/ +Declare( +SDOM_Exception SDOM_getAttributeList( + SablotSituation s, + SDOM_Node n, + SDOM_NodeList *pAttrList); +) + +/** + ** END Node + **/ + + +/** + ** Functions related to Document + **/ + + +/* + docToString + Serializes the document, returning the resulting string in + *pSerialized, which is a Sablotron-allocated buffer. + !SPEC: Not in spec. +*/ +Declare( +SDOM_Exception SDOM_docToString( + SablotSituation s, + SDOM_Document d, + SDOM_char **pSerialized); +) + +Declare( +SDOM_Exception SDOM_nodeToString( + SablotSituation s, + SDOM_Document d, + SDOM_Node n, + SDOM_char **pSerialized); +) + +/** + ** END Document functions + **/ + + +/** + ** NodeList + ** An ordered collection of nodes, returned by xql. + **/ + + +/* + getNodeListLength + Returns, in *pLength, the number of nodes in the list l. + !SPEC: Not in spec. +*/ +Declare( +SDOM_Exception SDOM_getNodeListLength( + SablotSituation s, + SDOM_NodeList l, + int *pLength); +) + +/* + getNodeListItem + Returns, in *pItem, the index'th member of the node list l. + !SPEC: Not in spec. +*/ +Declare( +SDOM_Exception SDOM_getNodeListItem( + SablotSituation s, + SDOM_NodeList l, + int index, + SDOM_Node *pItem); +) + +/* + disposeNodeList + Destroys the node list l. The nodes themselves are NOT disposed. + !SPEC: Not in spec. +*/ +Declare( +SDOM_Exception SDOM_disposeNodeList( + SablotSituation s, + SDOM_NodeList l); +) + +/** + ** END NodeList + **/ + + +/** + ** Miscellaneous + **/ + +/* + xql + Returns, in *pResult, the list of all nodes satisfying the XPath + query given as a string in 'query'. For the evaluation of the query, the + current node will be set to currentNode. + Note that the query is necessarily rather restricted. + After the contents of *pResult have been retrieved, the list should + be freed using disposeNodeList. + !SPEC: Not in spec. +*/ +Declare( +SDOM_Exception SDOM_xql( + SablotSituation s, + const SDOM_char *query, + SDOM_Node currentNode, + SDOM_NodeList *pResult); +) + +/** + ** END Miscellaneous + ** + ** + ** + ** Exception retrieval + **/ + +/* + getExceptionCode + returns the code of the pending exception +*/ + +Declare +( + int SDOM_getExceptionCode(SablotSituation s); +) + +/* + getExceptionMessage + returns the message for the pending exception +*/ + +Declare +( + char* SDOM_getExceptionMessage(SablotSituation s); +) + +/* + getExceptionDetails + returns extra information for the pending exception + - on the code and message of the primary error + - on the document and file line where the primary error occured +*/ + +Declare +( + void SDOM_getExceptionDetails( + SablotSituation s, + int *code, + char **message, + char **documentURI, + int *fileLine); +) + +/** + ** END Exception retrieval + ** + ** + ** Internal functions + **/ + +/* + setNodeInstanceData + saves a pointer in a node instance +*/ + +Declare( +void SDOM_setNodeInstanceData(SDOM_Node n, void *data); +) + +/* + getNodeInstanceData + retrieves the saved pointer +*/ +Declare( +void* SDOM_getNodeInstanceData(SDOM_Node n); +) +/* + setDisposeNodeCallback + sets callback to be called on every node disposal +*/ + +typedef void SDOM_NodeCallback(SDOM_Node n); + +Declare +( + void SDOM_setDisposeCallback(SDOM_NodeCallback *f); +) + +Declare +( + SDOM_NodeCallback* SDOM_getDisposeCallback(); +) + +/** + ** FOR IMPLEMENTATION IN PERL + ** None of these fuctions appear in the spec. + **/ + + +/* + ArrayRef getChildNodes(Node n) + Returns the array of n's children. + Implement using getFirstChild and getNextSibling. + + + HashRef getAttributes(Node n) + Returns the hash of n's attributes. + Implement using getAttributeList, getNodeListLength, + getNodeListItem, getNodeName and getNodeValue. + + + setAttributes(Node n, HashRef atts) + Sets n's attributes to atts, keeping the old ones alive but making + them standalone. + Implement using getAttributeList, getNodeListLength, removeAttribute + and setAttribute (not too efficient perhaps). +*/ + + /* _TH_ v */ +Declare +( + void SDOM_tmpListDump(SDOM_Document doc, int p); +) + +#endif /* DISABLE_DOM */ + +#endif /* SDomHIncl */ diff --git a/win32/sablot/include/shandler.h b/win32/sablot/include/shandler.h new file mode 100644 index 0000000..ea7fc83 --- /dev/null +++ b/win32/sablot/include/shandler.h @@ -0,0 +1,369 @@ +/* + * The contents of this file are subject to the Mozilla Public + * License Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS + * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or + * implied. See the License for the specific language governing + * rights and limitations under the License. + * + * The Original Code is the Sablotron XSLT Processor. + * + * The Initial Developer of the Original Code is Ginger Alliance Ltd. + * Portions created by Ginger Alliance are Copyright (C) 2000-2002 + * Ginger Alliance Ltd. All Rights Reserved. + * + * Contributor(s): + * + * Alternatively, the contents of this file may be used under the + * terms of the GNU General Public License Version 2 or later (the + * "GPL"), in which case the provisions of the GPL are applicable + * instead of those above. If you wish to allow use of your + * version of this file only under the terms of the GPL and not to + * allow others to use your version of this file under the MPL, + * indicate your decision by deleting the provisions above and + * replace them with the notice and other provisions required by + * the GPL. If you do not delete the provisions above, a recipient + * may use your version of this file under either the MPL or the + * GPL. + */ + +#ifndef ShandlerHIncl +#define ShandlerHIncl + +/* we have to deal with the size_t type, sys/types.h; + is needed on some platforms */ +#if !defined(_MSC_VER) +#include <sabcfg.h> +#endif + +#include <stddef.h> + +/* GP: clean */ + +/***************************************************************** + + handler types + +*****************************************************************/ + +typedef enum +{ + HLR_MESSAGE = 0, + HLR_SCHEME, + HLR_SAX, + HLR_MISC, + HLR_ENC +} HandlerType; + +extern const char* hlrTypeNames[]; /* found in base.cpp */ + +typedef enum +{ + SH_ERR_OK = 0, + SH_ERR_NOT_OK = 1, + SH_ERR_UNSUPPORTED_SCHEME +} SchemeHandlerErrors; + +/***************************************************************** +SchemeHandler + + is a structure for a scheme handler. It contains pointers to + the following functions of the handler: + open(), get(), put(), close(). + All of these function return an error flag (0=OK, 1=not). + open() may also return SH_ERR_UNSUPPORTED_SCHEME. +*****************************************************************/ + +/* getAll: open the URI and return the whole string + scheme = URI scheme (e.g. "http") + rest = the rest of the URI (without colon) + the document is returned in a handler-allocated buffer + byteCount holds the byte count on return + return *buffer = NULL if not processed +*/ +typedef int SchemeHandlerGetAll(void *userData, SablotHandle processor_, + const char *scheme, const char *rest, + char **buffer, int *byteCount); + +/* freeMemory: free the buffer allocated by getAll +*/ + +typedef int SchemeHandlerFreeMemory(void *userData, SablotHandle processor_, + char *buffer); + +/* open: open the URI and return a handle + scheme = URI scheme (e.g. "http") + rest = the rest of the URI (without colon) + the resulting handle is returned in '*handle' +*/ +typedef int SchemeHandlerOpen(void *userData, SablotHandle processor_, + const char *scheme, const char *rest, int *handle); + +/* get: retrieve data from the URI + handle = the handle assigned on open + buffer = pointer to the data + *byteCount = number of bytes to read + (the number actually read is returned here) +*/ +typedef int SchemeHandlerGet(void *userData, SablotHandle processor_, + int handle, char *buffer, int *byteCount); + +/* put: save data to the URI (if possible) + handle = the handle assigned on open + buffer = pointer to the data + *byteCount = number of bytes to write + (the number actually written is returned here) +*/ +typedef int SchemeHandlerPut(void *userData, SablotHandle processor_, + int handle, const char *buffer, int *byteCount); + +/* close: close the URI with the given handle + handle = the handle assigned on open +*/ +typedef int SchemeHandlerClose(void *userData, SablotHandle processor_, + int handle); + +typedef struct +{ + SchemeHandlerGetAll *getAll; + SchemeHandlerFreeMemory *freeMemory; + SchemeHandlerOpen *open; + SchemeHandlerGet *get; + SchemeHandlerPut *put; + SchemeHandlerClose *close; +} SchemeHandler; + +/***************************************************************** +MessageHandler + + a structure for external message handlers. Such a handler, if set, + receives all error reports, displays them, keeps the log, the + error trace, etc. +*****************************************************************/ + +/* + define the "facility number" for Sablotron. This does not mean much + nowadays. +*/ + +#define MH_FACILITY_SABLOTRON 2 + +/* type for the error codes used by the message handler */ + +typedef unsigned long MH_ERROR; + +/* logging levels for the message handler */ + +typedef enum +{ + MH_LEVEL_DEBUG, + MH_LEVEL_INFO, + MH_LEVEL_WARN, + MH_LEVEL_ERROR, + MH_LEVEL_CRITICAL +} MH_LEVEL; + +/* + makeCode() + makes the "external" error code to report with log() or error() + call with facility = module id; severity = 1 iff critical. + 'code' is the error code internal to Sablotron. +*/ + +typedef MH_ERROR +MessageHandlerMakeCode( + void *userData, SablotHandle processor_, + int severity, unsigned short facility, unsigned short code); + +/* + log() + pass code created by makeCode, level as necessary + fields is a NULL-terminated list of strings in form "field:contents" + distinguished fields include: msg, file, line, token +*/ + +typedef MH_ERROR +MessageHandlerLog( + void *userData, SablotHandle processor_, + MH_ERROR code, MH_LEVEL level, char **fields); + +/* + error() + for reporting errors, meaning as with log() +*/ + +typedef MH_ERROR +MessageHandlerError(void *userData, SablotHandle processor_, + MH_ERROR code, MH_LEVEL level, char **fields); + +/* the message handler structure. Use SablotRegMessageHandler() to register. */ + +typedef struct +{ + MessageHandlerMakeCode *makeCode; + MessageHandlerLog *log; + MessageHandlerError *error; +} MessageHandler; + + + + + +/* + + SAXHandler + a SAX-like, streaming interface for access to XML docs + +*/ + + +#define SAX_RETURN void + +typedef SAX_RETURN +SAXHandlerStartDocument(void* userData, SablotHandle processor_); + +typedef SAX_RETURN +SAXHandlerStartElement(void* userData, SablotHandle processor_, + const char* name, const char** atts); + +typedef SAX_RETURN +SAXHandlerEndElement(void* userData, SablotHandle processor_, + const char* name); + +typedef SAX_RETURN +SAXHandlerStartNamespace(void* userData, SablotHandle processor_, + const char* prefix, const char* uri); + +typedef SAX_RETURN +SAXHandlerEndNamespace(void* userData, SablotHandle processor_, + const char* prefix); + +typedef SAX_RETURN +SAXHandlerComment(void* userData, SablotHandle processor_, + const char* contents); + +typedef SAX_RETURN +SAXHandlerPI(void* userData, SablotHandle processor_, + const char* target, const char* contents); + +typedef SAX_RETURN +SAXHandlerCharacters(void* userData, SablotHandle processor_, + const char* contents, int length); + +typedef SAX_RETURN +SAXHandlerEndDocument(void* userData, SablotHandle processor_); + + +/* + The SAX handler structure. Use SablotRegSAXHandler() to register. +*/ + + +typedef struct +{ + SAXHandlerStartDocument *startDocument; + SAXHandlerStartElement *startElement; + SAXHandlerEndElement *endElement; + SAXHandlerStartNamespace *startNamespace; + SAXHandlerEndNamespace *endNamespace; + SAXHandlerComment *comment; + SAXHandlerPI *processingInstruction; + SAXHandlerCharacters *characters; + SAXHandlerEndDocument *endDocument; +} SAXHandler; + + +/***************************************************************** +MiscHandler + + Collects miscellaneous callbacks. +*****************************************************************/ + +/* + documentInfo() + If set, this callback gets called after the output of a result + document is finished, giving information about its content type + and encoding. +*/ + +typedef void +MiscHandlerDocumentInfo(void* userData, SablotHandle processor_, + const char *contentType, const char *encoding); + +/* + The Misc handler structure. + Use SablotRegHandler(HLR_MISC, ...) to register. +*/ + +typedef struct +{ + MiscHandlerDocumentInfo *documentInfo; +} MiscHandler; + +/***************************************************************** +EncHandler + + Handler for recoding requests in absence of iconv. +*****************************************************************/ + +#define EH_FROM_UTF8 1 +#define EH_TO_UTF8 0 + +/* + the conversion descriptor like iconv_t +*/ + +typedef void* EHDescriptor; + +typedef enum +{ + EH_OK, + EH_EINVAL, + EH_E2BIG, + EH_EILSEQ +} EHResult; + +/* + open() + direction is either EH_FROM_UTF8 or EH_TO_UTF8 + encoding is the other encoding + RETURN the descriptor, or -1 if the encoding is not supported +*/ + +typedef EHDescriptor EncHandlerOpen(void* userData, SablotHandle processor_, + int direction, const char *encoding); + +/* + conv() + arguments 3 through 7 are just like for iconv, see the manpage + RETURN -1 on error (set errno), a different value (e.g. 0) if OK +*/ + +typedef EHResult EncHandlerConv(void* userData, SablotHandle processor_, + EHDescriptor cd, const char** inbuf, size_t *inbytesleft, + char ** outbuf, size_t *outbytesleft); + +/* + close() + cd is the descriptor to close. Return 0 if OK, -1 on error. +*/ + +typedef int EncHandlerClose(void* userData, SablotHandle processor_, + EHDescriptor cd); + +/* + The EncHandler structure. + Use SablotRegHandler(HLR_ENC, ...) to register. +*/ + +typedef struct +{ + EncHandlerOpen *open; + EncHandlerConv *conv; + EncHandlerClose *close; +} EncHandler; + +#endif diff --git a/win32/sablot/include/sxpath.h b/win32/sablot/include/sxpath.h new file mode 100644 index 0000000..d6bd448 --- /dev/null +++ b/win32/sablot/include/sxpath.h @@ -0,0 +1,264 @@ +/* + * The contents of this file are subject to the Mozilla Public + * License Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS + * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or + * implied. See the License for the specific language governing + * rights and limitations under the License. + * + * The Original Code is the Sablotron XSLT Processor. + * + * The Initial Developer of the Original Code is Ginger Alliance Ltd. + * Portions created by Ginger Alliance are Copyright (C) 2000-2002 + * Ginger Alliance Ltd. All Rights Reserved. + * + * Contributor(s): + * + * Alternatively, the contents of this file may be used under the + * terms of the GNU General Public License Version 2 or later (the + * "GPL"), in which case the provisions of the GPL are applicable + * instead of those above. If you wish to allow use of your + * version of this file only under the terms of the GPL and not to + * allow others to use your version of this file under the MPL, + * indicate your decision by deleting the provisions above and + * replace them with the notice and other provisions required by + * the GPL. If you do not delete the provisions above, a recipient + * may use your version of this file under either the MPL or the + * GPL. + */ + +#ifndef SXPathHIncl +#define SXPathHIncl + +/* basic types needed in sablot.h */ +typedef void *SXP_Node; +typedef void *SXP_Document; +typedef void *SXP_NodeList; +typedef SXP_Node NodeHandle; + +#include "sablot.h" + +typedef enum +{ + ELEMENT_NODE = 1, + ATTRIBUTE_NODE = 2, + TEXT_NODE = 3, + PROCESSING_INSTRUCTION_NODE = 7, + COMMENT_NODE = 8, + DOCUMENT_NODE = 9, + NAMESPACE_NODE = 13 +} SXP_NodeType; + +typedef enum +{ + SXP_NONE, + SXP_NUMBER, + SXP_STRING, + SXP_BOOLEAN, + SXP_NODESET +} SXP_ExpressionType; + +typedef char SXP_char; + +typedef void *QueryContext; + +/*option constants */ +typedef enum +{ + SXPF_DISPOSE_NAMES = 0x1, + SXPF_DISPOSE_VALUES = 0x2, + SXPF_SUPPORTS_UNPARSED_ENTITIES =0x4 +} SXPFlags; + +/* + * DOM handler functions + * This handler is registered with the Situation rather than the Processor + */ + +/***************************************************************** +DOMHandler + + Handler providing information about a DOM tree to the XPath + processor +*****************************************************************/ + +typedef SXP_NodeType DOMH_getNodeType(SXP_Node n); +typedef const SXP_char* DOMH_getNodeName(SXP_Node n); +typedef const SXP_char* DOMH_getNodeNameURI(SXP_Node n); +typedef const SXP_char* DOMH_getNodeNameLocal(SXP_Node n); +typedef const SXP_char* DOMH_getNodeValue(SXP_Node n); +typedef SXP_Node DOMH_getNextSibling(SXP_Node n); +typedef SXP_Node DOMH_getPreviousSibling(SXP_Node n); +typedef SXP_Node DOMH_getNextAttrNS(SXP_Node n); +typedef SXP_Node DOMH_getPreviousAttrNS(SXP_Node n); +typedef int DOMH_getChildCount(SXP_Node n); +typedef int DOMH_getAttributeCount(SXP_Node n); +typedef int DOMH_getNamespaceCount(SXP_Node n); +typedef SXP_Node DOMH_getChildNo(SXP_Node n, int ndx); +typedef SXP_Node DOMH_getAttributeNo(SXP_Node n, int ndx); +typedef SXP_Node DOMH_getNamespaceNo(SXP_Node n, int ndx); +typedef SXP_Node DOMH_getParent(SXP_Node n); +typedef SXP_Document DOMH_getOwnerDocument(SXP_Node n); +typedef int DOMH_compareNodes(SXP_Node n1, SXP_Node n2); +typedef SXP_Document DOMH_retrieveDocument(const SXP_char* uri, + void* udata); +typedef SXP_Node DOMH_getNodeWithID(SXP_Document doc, const SXP_char* id); +typedef void DOMH_freeBuffer(SXP_char *buff); + +typedef struct +{ + DOMH_getNodeType *getNodeType; + DOMH_getNodeName *getNodeName; + DOMH_getNodeNameURI *getNodeNameURI; + DOMH_getNodeNameLocal *getNodeNameLocal; + DOMH_getNodeValue *getNodeValue; + DOMH_getNextSibling *getNextSibling; + DOMH_getPreviousSibling *getPreviousSibling; + DOMH_getNextAttrNS *getNextAttrNS; + DOMH_getPreviousAttrNS *getPreviousAttrNS; + DOMH_getChildCount *getChildCount; + DOMH_getAttributeCount *getAttributeCount; + DOMH_getNamespaceCount *getNamespaceCount; + DOMH_getChildNo *getChildNo; + DOMH_getAttributeNo *getAttributeNo; + DOMH_getNamespaceNo *getNamespaceNo; + DOMH_getParent *getParent; + DOMH_getOwnerDocument *getOwnerDocument; + DOMH_compareNodes *compareNodes; + DOMH_retrieveDocument *retrieveDocument; + DOMH_getNodeWithID *getNodeWithID; + /*optional entries - driven by sxpOptions */ + DOMH_freeBuffer *freeBuffer; +} DOMHandler; + +Declare +( + void SXP_registerDOMHandler(SablotSituation S, + DOMHandler *domh, void* udata); +) + +Declare +( + void SXP_unregisterDOMHandler(SablotSituation S); +) + +/* + * + * QueryContext functions + * + */ + +/* options setter getter */ +Declare +( + void SXP_setOptions(SablotSituation S, unsigned long options); +) + +Declare +( + void SXP_setMaskBit(SablotSituation S, int mask); +) + +Declare +( + unsigned long SXP_getOptions(SablotSituation S); +) + +Declare +( + int SXP_createQueryContext(SablotSituation S, QueryContext *Q); +) + +Declare +( + int SXP_addVariableBinding(QueryContext Q, + const SXP_char* name, QueryContext source); +) + +Declare +( + int SXP_addVariableNumber(QueryContext Q, + const SXP_char* name, double value); +) + +Declare +( + int SXP_addVariableString(QueryContext Q, + const SXP_char* name, const SXP_char* value); +) + +Declare +( + int SXP_addVariableBoolean(QueryContext Q, + const SXP_char* name, int value); +) + +Declare +( + int SXP_addNamespaceDeclaration(QueryContext Q, + const SXP_char* prefix, const SXP_char* uri); +) + +Declare +( + int SXP_query(QueryContext Q, const SXP_char* query, + SXP_Node n, int contextPosition, int contextSize); +) + +Declare +( + int SXP_destroyQueryContext(QueryContext Q); +) + +/* + * + * Functions to retrieve the query result and its type + * + */ + +Declare +( + int SXP_getResultType(QueryContext Q, SXP_ExpressionType *type); +) + +Declare +( + int SXP_getResultNumber(QueryContext Q, double *result); +) + +Declare +( + int SXP_getResultBool(QueryContext Q, int *result); +) + +Declare +( + int SXP_getResultString(QueryContext Q, const char **result); +) + +Declare +( + int SXP_getResultNodeset(QueryContext Q, SXP_NodeList *result); +) + +/* + * + * NodeList manipulation + * + */ + +Declare +( + int SXP_getNodeListLength(SXP_NodeList l); +) + +Declare +( + SXP_Node SXP_getNodeListItem(QueryContext Q, SXP_NodeList l, int index); +) + + +#endif /* SXPathHIncl */ diff --git a/win32/sablot/lib/sablot.lib b/win32/sablot/lib/sablot.lib Binary files differnew file mode 100644 index 0000000..e002170 --- /dev/null +++ b/win32/sablot/lib/sablot.lib diff --git a/win32/sablot/lib/sablotd.lib b/win32/sablot/lib/sablotd.lib Binary files differnew file mode 100644 index 0000000..4b59629 --- /dev/null +++ b/win32/sablot/lib/sablotd.lib |