From 53914f770f1e1dc1ab4342c64846fd995825b7e6 Mon Sep 17 00:00:00 2001 From: Stef Date: Wed, 17 Sep 2003 18:34:42 +0000 Subject: Initial Import --- src/basehandler.cpp | 59 ++ src/basehandler.h | 29 + src/levelhandler.cpp | 98 +++ src/levelhandler.h | 41 + src/reference.h | 112 +++ src/rtfformatting.h | 122 +++ src/rtfparser.cpp | 398 +++++++++ src/rtfparser.h | 80 ++ src/rtfx.cpp | 68 ++ src/sablo.h | 2139 +++++++++++++++++++++++++++++++++++++++++++++ src/sablo.txt | 0 src/sablotr.cpp | 121 +++ src/stdafx.cpp | 8 + src/stdafx.h | 21 + src/usuals.h | 8 + src/xmlcomposehelpers.cpp | 103 +++ src/xmlcomposehelpers.h | 84 ++ src/xmlcomposer.cpp | 1811 ++++++++++++++++++++++++++++++++++++++ src/xmlcomposer.h | 228 +++++ 19 files changed, 5530 insertions(+) create mode 100644 src/basehandler.cpp create mode 100644 src/basehandler.h create mode 100644 src/levelhandler.cpp create mode 100644 src/levelhandler.h create mode 100644 src/reference.h create mode 100644 src/rtfformatting.h create mode 100644 src/rtfparser.cpp create mode 100644 src/rtfparser.h create mode 100644 src/rtfx.cpp create mode 100644 src/sablo.h create mode 100644 src/sablo.txt create mode 100644 src/sablotr.cpp create mode 100644 src/stdafx.cpp create mode 100644 src/stdafx.h create mode 100644 src/usuals.h create mode 100644 src/xmlcomposehelpers.cpp create mode 100644 src/xmlcomposehelpers.h create mode 100644 src/xmlcomposer.cpp create mode 100644 src/xmlcomposer.h (limited to 'src') diff --git a/src/basehandler.cpp b/src/basehandler.cpp new file mode 100644 index 0000000..02c79c1 --- /dev/null +++ b/src/basehandler.cpp @@ -0,0 +1,59 @@ +// BaseHandler.cpp: implementation of the BaseHandler class. +// +////////////////////////////////////////////////////////////////////// + +#include "stdafx.h" +#include "BaseHandler.h" + +////////////////////////////////////////////////////////////////////// +// Construction/Destruction +////////////////////////////////////////////////////////////////////// + +BaseHandler::BaseHandler() +{ + m_reader = NULL; +} + +BaseHandler::~BaseHandler() +{ + clear(); +} + +void BaseHandler::clear() +{ + m_reader = NULL; +} + +void BaseHandler::startDocument(RtfReader* reader) +{ + clear(); + m_reader = reader; +} + +void BaseHandler::endDocument() +{ + // We leave document and levels here so it they + // can be accessed later + m_reader = NULL; +} + +void BaseHandler::controlWord(const string& cw, int flags, int param) +{ + +} + +void BaseHandler::groupStart() +{ + +} + +void BaseHandler::groupEnd() +{ + +} + +void BaseHandler::charData(wstring data) +{ + +} + diff --git a/src/basehandler.h b/src/basehandler.h new file mode 100644 index 0000000..33cfe61 --- /dev/null +++ b/src/basehandler.h @@ -0,0 +1,29 @@ +// BaseHandler +// Implements an RtfHandler for other classes (LevelHandler +// and RtfParser) to override. + +#ifndef _BASEHANDLER_H_ +#define _BASEHANDLER_H_ + +#include "RtfReader.h" + +class BaseHandler : + public RtfHandler +{ +public: + BaseHandler(); + virtual ~BaseHandler(); + + virtual void startDocument(RtfReader* reader); + virtual void endDocument(); + virtual void controlWord(const string& cw, int flags, int param); + virtual void groupStart(); + virtual void groupEnd(); + virtual void charData(wstring data); + +protected: + virtual void clear(); + RtfReader* m_reader; +}; + +#endif // _BASEHANDLER_H_ diff --git a/src/levelhandler.cpp b/src/levelhandler.cpp new file mode 100644 index 0000000..1c55e9c --- /dev/null +++ b/src/levelhandler.cpp @@ -0,0 +1,98 @@ +// LevelHandler.cpp: implementation of the LevelHandler class. +// +////////////////////////////////////////////////////////////////////// + +#include "stdafx.h" +#include "LevelHandler.h" + +////////////////////////////////////////////////////////////////////// +// Construction/Destruction +////////////////////////////////////////////////////////////////////// + +LevelHandler::LevelHandler() +{ + +} + +LevelHandler::~LevelHandler() +{ + clear(); +} + +void LevelHandler::clear() +{ + m_curLevel.release(); + m_topLevel.release(); + + BaseHandler::clear(); +} + +void LevelHandler::startDocument(RtfReader* reader) +{ + BaseHandler::startDocument(reader); + + m_topLevel = new Level; + m_curLevel = m_topLevel; +} + +void LevelHandler::endDocument() +{ + BaseHandler::endDocument(); +} + +void LevelHandler::groupStart() +{ + BaseHandler::groupStart(); + ASSERT(m_curLevel); + pushLevel(); +} + +void LevelHandler::groupEnd() +{ + ASSERT(m_curLevel); + popLevel(); + BaseHandler::groupEnd(); +} + +DOM::Element LevelHandler::getElement() +{ + ASSERT(m_curLevel); + return m_curLevel->getElement(); +} + +void LevelHandler::pushLevel() +{ + // Push a level on the stack + m_curLevel = m_curLevel->pushLevel(); +} + +void LevelHandler::popLevel() +{ + // Pull a level off the stack + LevelPtr level = m_curLevel->getPrevious(); + + // TODO: report errors here + if(level) + m_curLevel = level; +} + +void LevelHandler::rewindLevel(LevelPtr ptr) +{ + ASSERT(ptr != NULL); + + LevelPtr prev = ptr->getPrevious(); + + if(prev != NULL) + m_curLevel = prev; + else + m_curLevel = ptr; +} + +LevelPtr LevelHandler::getLevel() +{ + return m_curLevel; +} + + + + diff --git a/src/levelhandler.h b/src/levelhandler.h new file mode 100644 index 0000000..533fdfe --- /dev/null +++ b/src/levelhandler.h @@ -0,0 +1,41 @@ +// LevelHandler +// Keeps track of levels (braces) in the RTF file. +// + +#ifndef _LEVELHANDLER_H_ +#define _LEVELHANDLER_H_ + +#include "RtfReader.h" +#include "BaseHandler.h" +#include "RtfParseHelpers.h" +#include "Reference.h" + +class LevelHandler + : public BaseHandler +{ +public: + LevelHandler(); + virtual ~LevelHandler(); + + virtual void startDocument(RtfReader* reader); + virtual void endDocument(); + virtual void groupStart(); + virtual void groupEnd(); + + virtual DOM::Element getElement(); + + void pushLevel(); + void popLevel(); + void rewindLevel(LevelPtr ptr); + LevelPtr getLevel(); + + +protected: + virtual void clear(); + + LevelPtr m_topLevel; + LevelPtr m_curLevel; +}; + + +#endif // _LEVELHANDLER_H_ diff --git a/src/reference.h b/src/reference.h new file mode 100644 index 0000000..8b82a61 --- /dev/null +++ b/src/reference.h @@ -0,0 +1,112 @@ +#ifndef __REFERENCE_H__ +#define __REFERENCE_H__ + +template +class Reference +{ +public: + Reference() + { m_ptr = NULL; } + + Reference(C* ptr) + { + m_ptr = ptr; + addref(); + } + + Reference(C* ptr, bool addref) + { + m_ptr = ptr; + if(addref) + addref(); + } + + ~Reference() + { + release(); + } + + Reference(const Reference& orig) + { + m_ptr = orig.m_ptr; + addref(); + } + + Reference& operator=(const C* ptr) + { + C* old = m_ptr; + m_ptr = (C*)ptr; + addref(); + if(old) + old->release(); + return *this; + } + + Reference& operator=(const Reference& orig) + { return operator=(orig.m_ptr); } + + void attach(C* ptr) + { + release(); + m_ptr = ptr; + } + + C* detach() + { + C* ptr = m_ptr; + m_ptr = NULL; + return ptr; + } + + operator C*() const + { return m_ptr; } + + C* operator->() const + { return m_ptr; } + +#if 0 + operator bool() const + { + return m_ptr != NULL; + } +#endif + + void release() + { + if(m_ptr) + m_ptr->release(); + m_ptr = NULL; + } + + void addref() + { + if(m_ptr) + m_ptr->addRef(); + } + +private: + C* m_ptr; +}; + +class Instance +{ +public: + Instance() + { m_x = 0; } + virtual ~Instance() + { } + void addRef() + { m_x++; } + void release() + { + if((--m_x) <= 0) + delete this; + } + +private: + // The reference count + int m_x; +}; + + +#endif //__REFERENCE_H__ \ No newline at end of file diff --git a/src/rtfformatting.h b/src/rtfformatting.h new file mode 100644 index 0000000..2dfc126 --- /dev/null +++ b/src/rtfformatting.h @@ -0,0 +1,122 @@ +// RtfTextProperties.h: interface for the RtfTextProperties class. +// +////////////////////////////////////////////////////////////////////// + +#if !defined(AFX_RTFTEXTPROPERTIES_H__719D85C9_69D9_4499_BE5E_7A9A7F6F9C38__INCLUDED_) +#define AFX_RTFTEXTPROPERTIES_H__719D85C9_69D9_4499_BE5E_7A9A7F6F9C38__INCLUDED_ + +#if _MSC_VER > 1000 +#pragma once +#endif // _MSC_VER > 1000 + +class RtfFormatting +{ +public: + RtfFormatting() + { + resetText(); + resetPara(); + } + + RtfFormatting(const RtfFormatting& format) + { + copy(format); + } + + bool textEquals(const RtfFormatting& format) const + { + return m_bold == format.m_bold && + m_italic == format.m_italic && + m_strike == format.m_italic && + m_hidden == format.m_hidden && + m_underline == format.m_underline && + m_color == format.m_color; + } + + bool paraEquals(RtfFormatting& format) const + { + return m_style == format.m_style && + m_list == format.m_list && + m_inTbl == format.m_inTbl; + } + + void copy(const RtfFormatting& format) + { + m_bold = format.m_bold; + m_italic = format.m_italic; + m_strike = format.m_italic; + m_hidden = format.m_hidden; + m_underline = format.m_underline; + m_color = format.m_color; + + m_style = format.m_style; + m_list = format.m_list; + m_inTbl = format.m_inTbl; + } + + void resetText() + { + m_bold = m_italic = m_strike = + m_underline = m_hidden = false; + m_color = -1; + } + + void resetPara() + { + m_style = m_list = -1; + m_inTbl = false; + } + + bool textIsBold() const + { return m_bold; } + bool textIsItalic() const + { return m_italic; } + bool textIsStrike() const + { return m_strike; } + bool textIsUnderline() const + { return m_underline; } + bool textIsHidden() const + { return m_hidden; } + int textColor() const + { return m_color; } + int paraStyle() const + { return m_style; } + int paraList() const + { return m_list; } + bool paraInTable() const + { return m_inTbl; } + + void textSetBold(bool bold) + { m_bold = bold; } + void textSetItalic(bool italic) + { m_italic = italic; } + void textSetStrike(bool strike) + { m_strike = strike; } + void textSetUnderline(bool underline) + { m_underline = underline; } + void textSetHidden(bool hidden) + { m_hidden = hidden; } + void textSetColor(int color) + { m_color = color; } + void paraSetStyle(int style) + { m_style = style; } + void paraSetList(int list) + { m_list = list; } + void paraSetTable(bool inTable) + { m_inTbl = inTable; } + +protected: + bool m_bold; + bool m_italic; + bool m_strike; + bool m_underline; + bool m_hidden; + int m_color; + + int m_style; + int m_list; + bool m_inTbl; + // TODO: Character styles +}; + +#endif // !defined(AFX_RTFTEXTPROPERTIES_H__719D85C9_69D9_4499_BE5E_7A9A7F6F9C38__INCLUDED_) diff --git a/src/rtfparser.cpp b/src/rtfparser.cpp new file mode 100644 index 0000000..2928aa7 --- /dev/null +++ b/src/rtfparser.cpp @@ -0,0 +1,398 @@ +// RtfReader.cpp: implementation of the RtfReader class. +// +////////////////////////////////////////////////////////////////////// + +#include "stdafx.h" + +#include +#include +#include "RtfReader.h" + +////////////////////////////////////////////////////////////////////// +// Construction/Destruction +////////////////////////////////////////////////////////////////////// + +const int RtfHandler::kAsterisk = 0x00000001; +const int RtfHandler::kHasParam = 0x00000002; +const int RtfHandler::kIsEncoded = 0x00000004; + +RtfReader::RtfReader() +{ + m_handler = NULL; + m_depth = 0; + m_parseHex = true; + m_parseUnicode = false; + m_uniEat = 0; + m_uniEatStack.push(0); +} + +RtfReader::~RtfReader() +{ + +} + +bool RtfReader::parse(string fileName) +{ + FILE* file = fopen(fileName, "r"); + if(!file) + return false; + + bool ret = parse(file); + + fclose(file); + + return ret; +} + +void RtfReader::emptyData(RtfContext& cx) +{ + if(!cx.data.empty()) + { + if(m_handler) + m_handler->charData(cx.data); + cx.data.resize(0); + } +} + +void RtfReader::sendData(RtfContext& cx, wchar_t ch) +{ + if(m_uniEat > 0) + m_uniEat--; + else + cx.data.append(1, ch); +} + +void RtfReader::sendData(RtfContext& cx, wstring data) +{ + if(m_uniEat > 0) + { + int len = __min(data.size(), m_uniEat); + cx.data.append(data.substr(len)); + m_uniEat -= len; + } + else + { + cx.data.append(data); + } +} + +void RtfReader::sendControlWord(RtfContext& cx, string cw, int flags, int param) +{ + emptyData(cx); + if(m_handler) + m_handler->controlWord(cw, flags, param); +} + +bool RtfReader::parseHexChar(RtfContext& cx, int num) +{ + string data; + for(int i = 0; i < num; i++) + { + char ch = fgetc(cx.file); + + if(ch == -1) + return false; + + if((ch >= 'A' && ch <= 'F') || + (ch >= 'a' && ch <= 'f') || + (ch >= '0' && ch <= '9')) + { + data.append(1, ch); + } + else + { + m_parseErrors.append((string)"invalid hex char: " + ch + "\n"); + } + } + + if(m_parseHex) + { + char* end = NULL; + int val = strtol(data.c_str(), &end, 16); + if(end == data.c_str() + data.size() && m_parseHex) + sendData(cx, val); + else + m_parseErrors.append("invalid hex char: " + data + "\n"); + } + else + { + sendControlWord(cx, data, RtfHandler::kIsEncoded, -1); + } + + return true; +} + +bool RtfReader::parseControlWord(RtfContext& cx) +{ + bool isAsterisk = false; + string controlword; + string param; + + while(1) + { + int ch = fgetc(cx.file); + if(ch == WEOF) + return false; + + bool empty = controlword.empty(); + + // Part of the name of a control word + // NOTE: Although the RTF specification prohibits upercase + // control words, MS Word uses them :-/ + if(ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z') + controlword.append(1, ch); + + // Part of the parameter of a control word + else if(ch >= '0' && ch <= '9') + param.append(1, ch); + + // Now handle escapes and other special types of + // control words. These are all only valid at beginning + // of the "control word" + + // hex spelled out character + else if(empty && ch == '\'') + { + parseHexChar(cx, 2); + break; + } + + // Asterisk type destination + else if(empty && ch == '*') + { + isAsterisk = true; + + ch = fgetc(cx.file); + while(strchr("\r\n", ch)) + ch = fgetc(cx.file); + + if(ch != '\\') + ungetc(ch, cx.file); + } + + // Escaped backslash + else if(empty && ch == '\\') + { + sendData(cx, L'\\'); + break; + } + + // Non breaking space + else if(empty && ch == '~') + { + sendData(cx, 0x00A0); + break; + } + + // Optional hyphen + else if(empty && ch == '-') + { + sendData(cx, 0x00AD); + break; + } + + // a hyphen right after control word is part of number + else if(!empty && param.empty() && ch == '-') + { + param.append(1, ch); + } + + // TODO: This looks real hokey and acts that + // way too +#if 0 + // An enter as the first character of a control word + // makes a paragraph + else if(strchr("\n\r", ch)) + { + controlword = "par"; + break; + } +#endif + // Space end a rtf code (but get eaten) + else if(strchr(" ", ch)) + break; + + // Anything else (including a backslash ends a control word) + else + { + ungetc(ch, cx.file); + break; + } + } + + // Empty out the control word buffers + if(!controlword.empty()) + { + int flags = isAsterisk ? RtfHandler::kAsterisk : 0; + int numPar = -1; + + if(!param.empty()) + { + char* end = NULL; + numPar = strtol(param.c_str(), &end, 10); + if(end == param.c_str() + param.size()) + flags += RtfHandler::kHasParam; + } + + // Here we check for common characters + if(controlword == "emdash") + sendData(cx, 0x2014); + else if(controlword == "endash") + sendData(cx, 0x2013); + else if(controlword == "emspace") + sendData(cx, 0x2003); + else if(controlword == "enspace") + sendData(cx, 0x2002); + else if(controlword == "bullet") + sendData(cx, 0x2022); + else if(controlword == "lquote") + sendData(cx, 0x2018); + else if(controlword == "rquote") + sendData(cx, 0x2019); + else if(controlword == "ldblquote") + sendData(cx, 0x201C); + else if(controlword == "rdblquote") + sendData(cx, 0x201D); + + // Unicode values get sent through + else if(m_parseUnicode && flags & RtfHandler::kHasParam && + controlword == "u" ) + { + sendData(cx, numPar); + m_uniEat = m_uniEatStack.top(); + } + + // Unicode destination + else if(m_parseUnicode && controlword == "ud") + { + + } + + // Skip value for unicode characters + else if(m_parseUnicode && controlword == "uc") + { + m_uniEatStack.pop(); + m_uniEatStack.push(numPar); + } + + // Otherwise we send the control word + else + { + if(m_handler) + sendControlWord(cx, controlword, flags, numPar); + } + } + + return true; +} + +bool RtfReader::parse(FILE* file) +{ + m_depth = 0; + m_parseErrors = ""; + + int ch = 0; + + RtfContext cx; + cx.isData = false; + cx.file = file; + cx.data = L""; + + if(m_handler) + m_handler->startDocument(this); + + while(1) + { + ch = fgetc(file); + if(ch == EOF) + goto done; + + // Type is undetermined so we figure it out here + if(!cx.isData) + { + switch(ch) + { + case '\\': + if(!parseControlWord(cx)) + goto done; + break; + + case '{': + { + emptyData(cx); + + m_uniEatStack.push(m_uniEatStack.top()); + + if(m_handler) + m_handler->groupStart(); + + m_depth++; + } + break; + + case '}': + { + emptyData(cx); + + if(m_handler) + m_handler->groupEnd(); + + if(!m_uniEatStack.empty()) + m_uniEatStack.pop(); + + m_depth--; + } + break; + + default: + cx.isData = true; + break; + } + } + + if(cx.isData) + { + // We translate tabs into the appropriate control + // word + if(ch == '\t') + sendControlWord(cx, "tab", 0, -1); + +// Don't need this code, the XML outputter +// Takes care of it for us +#if 0 + if(ch == '&') + sendData(cx, L"&"); + + else if(ch == '\'') + sendData(cx, L"'"); + + else if(ch == '"') + sendData(cx, L"""); + + else if(ch == '<') + sendData(cx, L"<"); + + else if(ch == '>') + sendData(cx, L">"); +#endif + + // enters a + else if(!strchr("\r\n", ch)) + sendData(cx, ch); + + cx.isData = false; + } + } + +done: + + if(m_depth != 0) + m_parseErrors.append("unmatched braces\n"); + + // TODO: Check depth and give errors if screwy + if(m_handler) + m_handler->endDocument(); + + return m_parseErrors.empty(); +} + diff --git a/src/rtfparser.h b/src/rtfparser.h new file mode 100644 index 0000000..f28150e --- /dev/null +++ b/src/rtfparser.h @@ -0,0 +1,80 @@ +// RtfReader.h: interface for the RtfReader class. +// +////////////////////////////////////////////////////////////////////// + +#if !defined(AFX_RTFREADER_H__2C77784F_5333_4E16_B0E0_B56E211C2D82__INCLUDED_) +#define AFX_RTFREADER_H__2C77784F_5333_4E16_B0E0_B56E211C2D82__INCLUDED_ + +#if _MSC_VER > 1000 +#pragma once +#endif // _MSC_VER > 1000 + +#include +#include +class RtfReader; + +class RtfHandler +{ +public: + virtual void startDocument(RtfReader* reader) = 0; + virtual void endDocument() = 0; + virtual void controlWord(const string& cw, int flags, int param) = 0; + virtual void groupStart() = 0; + virtual void groupEnd() = 0; + virtual void charData(wstring data) = 0; + + static const int kAsterisk; + static const int kHasParam; + static const int kIsEncoded; +}; + +class RtfReader +{ +public: + RtfReader(); + virtual ~RtfReader(); + + + bool parse(string fileName); + bool parse(FILE* file); + + void setHandler(RtfHandler* handler) + { m_handler = handler; } + string getParseErrors() const + { return m_parseErrors; } + int getDepth() const + { return m_depth; } + void setHexParse(bool parse) + { m_parseHex = parse; } + void setUnicode(bool unicode); + +protected: + RtfHandler* m_handler; + int m_depth; + bool m_parseHex; + string m_parseErrors; + + // Unicode handling + bool m_parseUnicode; + typedef std::stack StackInt; + StackInt m_uniEatStack; + int m_uniEat; + +private: + + struct RtfContext + { + FILE* file; + bool isData; + wstring data; + }; + + bool parseControlWord(RtfContext& cx); + bool parseHexChar(RtfContext& cx, int num); + void sendControlWord(RtfContext& cx, string cw, int flags, int param); + void sendData(RtfContext& cx, wchar_t ch); + void sendData(RtfContext& cx, wstring data); + void emptyData(RtfContext& cx); +}; + +#endif // !defined(AFX_RTFREADER_H__2C77784F_5333_4E16_B0E0_B56E211C2D82__INCLUDED_) diff --git a/src/rtfx.cpp b/src/rtfx.cpp new file mode 100644 index 0000000..6a68a28 --- /dev/null +++ b/src/rtfx.cpp @@ -0,0 +1,68 @@ +// rtfm.cpp : Defines the entry point for the console application. +// + +#include "stdafx.h" +#include +#include + +#include "RtfReader.h" +#include "RtfAnalyser.h" + +int usage() +{ + fprintf(stderr, "usage: rtfm inrtf outxml\n"); + return 2; +} + +int main(int argc, char* argv[]) +{ + if(argc < 3) + return usage(); + + try + { + FILE* file = fopen(argv[1], "rb"); + + if(!file) + { + fprintf(stderr, "rtfm: couldn't open file: %s: %s\n", argv[1], strerror(errno)); + return 1; + } + + RtfParserOptions options; + + RtfParser handler(options); + RtfReader rtf; + rtf.setHandler(&handler); + bool ret = rtf.parse(file); + fclose(file); + + if(!ret) + { + fprintf(stderr, "rtfm: rtf parse failed: %s\n", rtf.getParseErrors().c_str()); + return 1; + } + + + DOM::Document doc = handler.getDocument(); + string xml = doc.serialize(); + + FILE* out = fopen(argv[2], "wb"); + if(!out) + { + fprintf(stderr, "rtfm: couldn't open file: %s: %s\n", argv[2], strerror(errno)); + return 1; + } + + fwrite(xml.c_str(), 1, xml.length(), out); + fclose(out); + return 0; + + } + catch(DOM::DOMException& e) + { + fprintf(stderr, "rtfm: xml dom error: %s\n", e.getMessage()); + } + + return 1; +} diff --git a/src/sablo.h b/src/sablo.h new file mode 100644 index 0000000..99cfc40 --- /dev/null +++ b/src/sablo.h @@ -0,0 +1,2139 @@ +// +// AUTHOR +// N. Nielsen +// +// LICENSE +// This software is in the public domain. +// +// The software is provided "as is", without warranty of any kind, +// express or implied, including but not limited to the warranties +// of merchantability, fitness for a particular purpose, and +// noninfringement. In no event shall the author(s) be liable for any +// claim, damages, or other liability, whether in an action of +// contract, tort, or otherwise, arising from, out of, or in connection +// with the software or the use or other dealings in the software. +// +// SUPPORT +// Send bug reports to: +// +// SITE +// http://memberwebs.com/nielsen/ +// + +// SABLOTRON C++ WRAPPER +// +// This wrapper tries to emulate the W3C DOM as much as possible. +// Objects returned can be copied liberally. When copied they still +// refer to the original Sablotron classes. +// Almost all classes are "light" wrappers, and shouldn't be more +// than 4 to 8 bytes a piece, with no additional memory allocation. +// Certain objects (NodeList, NamedNodeMap implementations) are +// heavier, and are reference counted. +// +// Salbotron uses UTF8 exclusively internally. This wrapper +// supports on the fly conversions from UTF16. Define the following +// constants to enable conversions: +// +// USE_UTF16 +// Convert all data values from UTF16 to UTF8. +// +// DOM_UTF16 +// Convert all DOM names from UTF16 to UTF8. +// +// If either of the above two constants are enabled you must include +// salbotr.cpp in your build. +// +// Everything is in the DOM namespace. +// +// Objects of type Document and DOMImplementation must be manually +// freed with their 'release' member function. +// +// + +#ifndef __SABLO_H__ +#define __SABLO_H__ + +#define USE_UTF16 +// #define DOM_UTF16 + +#include +#include +#include +#include + +namespace DOM +{ + namespace S + { + extern "C" + { + #include + #include + } + }; + + bool transcode16to8(const std::basic_string& data, + std::basic_string& ret); + bool transcode8to16(const std::basic_string& data, + std::basic_string& ret); + + namespace INT + { + typedef std::basic_string _8str; + typedef std::basic_string _16str; + } + +#ifdef ASSERT + #define DOM_ASSERT ASSERT +#else + #include "assert.h" + #define ASSERT assert +#endif + +#ifdef USE_UTF16 + #ifdef DOM_UTF16 + typedef INT::_16str data_str; + typedef INT::_16str dom_str; + #define FROM_V(v) _16to8(v) + #define FROM_D(v) _16to8(v) + #define TO_V(v) _8to16(v) + #define TO_D(v) _8to16(v) + #else + typedef INT::_16str data_str; + typedef INT::_8str dom_str; + #define FROM_V(v) _16to8(v) + #define FROM_D(v) v + #define TO_V(v) _8to16(v) + #define TO_D(v) v + #endif +#else + typedef INT::_8str data_str; + typedef INT::_8str dom_str; + #define FROM_V(v) v + #define FROM_D(v) v + #define TO_V(v) v + #define TO_D(v) v +#endif + + namespace INT + { + template + class Ref + { + public: + Ref() + { m_ptr = NULL; } + + Ref(C* ptr) + { + m_ptr = ptr; + addref(); + } + + Ref(C* ptr, bool addref) + { + m_ptr = ptr; + if(addref) + addref(); + } + + ~Ref() + { + release(); + } + + Ref(const Ref& orig) + { + m_ptr = orig.m_ptr; + addref(); + } + + Ref& operator=(const C* ptr) + { + C* old = m_ptr; + m_ptr = (C*)ptr; + addref(); + if(old) + old->release(); + return *this; + } + + Ref& operator=(const Ref& orig) + { return operator=(orig.m_ptr); } + +#ifdef COMPARE_REF + bool operator==(const C* ptr) + { + if(m_ptr == NULL && ptr == NULL) + return true; + else if(m_ptr == NULL || ptr == NULL) + return false; + + return *ptr == *m_ptr; + } + + bool operator==(const Ref& orig) + { return operator==(orig.m_ptr); } +#else + bool operator==(const C* ptr) + { + ASSERT(ptr == NULL); + return m_ptr == NULL; + } +#endif + operator C*() const + { return m_ptr; } + + operator C&() const + { return &m_ptr; } + + C* operator->() const + { return m_ptr; } + + protected: + void release() + { + if(m_ptr) + m_ptr->release(); + m_ptr = NULL; + } + + void addref() + { + if(m_ptr) + m_ptr->addref(); + } + + private: + C* m_ptr; + }; + + class Inst + { + public: + Inst() + { m_x = 0; } + virtual ~Inst() + { } + void addref() + { m_x++; } + void release() + { + if((--m_x) <= 0) + delete this; + } + + private: + int m_x; + }; + + class Base; + }; + + #define ASSERT_VALID() \ + ASSERT(isValid()); + #define ASSERT_VALID_NODE(node) \ + ASSERT(node.isValid()); + #define ASSERT_TYPE(t) \ + ASSERT(getNodeType() == t) + #define ASSERT_NODE_TYPE(n, t) \ + ASSERT(n.getNodeType() == t) + + class DOMException + { + public: + typedef enum + { + INDEX_SIZE_ERR = S::SDOM_INDEX_SIZE_ERR, + DOMSTRING_SIZE_ERR = S::SDOM_DOMSTRING_SIZE_ERR, + HIERARCHY_REQUEST_ERR = S::SDOM_HIERARCHY_REQUEST_ERR, + WRONG_DOCUMENT_ERR = S::SDOM_WRONG_DOCUMENT_ERR, + INVALID_CHARACTER_ERR = S::SDOM_INVALID_CHARACTER_ERR, + NO_DATA_ALLOWED_ERR = S::SDOM_NO_DATA_ALLOWED_ERR, + NO_MODIFICATION_ALLOWED_ERR = S::SDOM_NO_MODIFICATION_ALLOWED_ERR, + NOT_FOUND_ERR = S::SDOM_NOT_FOUND_ERR, + NOT_SUPPORTED_ERR = S::SDOM_NOT_SUPPORTED_ERR, + INUSE_ATTRIBUTE_ERR = S::SDOM_INUSE_ATTRIBUTE_ERR, + INVALID_STATE_ERR = S::SDOM_INVALID_STATE_ERR, + SYNTAX_ERR = S::SDOM_SYNTAX_ERR, + INVALID_MODIFICATION_ERR = S::SDOM_INVALID_MODIFICATION_ERR, + NAMESPACE_ERR = S::SDOM_NAMESPACE_ERR, + INVALID_ACCESS_ERR = S::SDOM_INVALID_ACCESS_ERR, + /* not in spec below this point: */ + INVALID_NODE_TYPE = S::SDOM_INVALID_NODE_TYPE, + QUERY_PARSE_ERR = S::SDOM_QUERY_PARSE_ERR, + QUERY_EXECUTION_ERR = S::SDOM_QUERY_EXECUTION_ERR, + NOT_OK = S::SDOM_NOT_OK + } CODES; + + int getCode() + { return code; } + char* getMessage() + { return S::SDOM_getExceptionMessage(m_sit); } + void getDetails(int* code, char** message, + char** documentUri, int* fileLine) + { S::SDOM_getExceptionDetails(m_sit, code, message, documentUri, fileLine); } + + short code; + + protected: + DOMException(S::SDOM_Exception e, S::SablotSituation s) + { + code = e; + m_sit = s; + } + + S::SablotSituation m_sit; + + friend class INT::Base; + }; + + namespace INT + { + /** + * The base class that keeps references to sablo + */ + class Base + { + public: + bool operator==(const Base& other) const + { return m_sit == other.m_sit; } + bool operator==(const void* null) const + { ASSERT(null == NULL); return m_sit == NULL; }; + bool operator!=(const Base& other) const + { return !operator==(other); } + bool operator!=(const void* null) const + { return !operator==(null); } + + protected: + Base(S::SablotSituation sit) + { m_sit = sit; } + Base(const Base& base) + { m_sit = base.m_sit; } + Base& operator=(const Base& other) + { m_sit = other.m_sit; return *this; } + Base& operator=(const void* null) + { ASSERT(null == NULL); m_sit = NULL; return *this; } + inline bool isValid() const + { return m_sit != NULL; } + + inline S::SDOM_Exception _try_(S::SDOM_Exception e) const + throw(DOMException) + { + if(e != S::SDOM_OK) + throw DOMException(e, m_sit); + return e; + } + +#ifdef USE_UTF16 + inline _16str _8to16(const _8str& d) const + throw(DOMException) + { + _16str s; + if(!transcode8to16(d, s)) + throw DOMException(S::SDOM_INVALID_CHARACTER_ERR, m_sit); + return s; + } + + inline _8str _16to8(const _16str& d) const + throw(DOMException) + { + _8str s; + if(!transcode16to8(d, s)) + throw DOMException(S::SDOM_INVALID_CHARACTER_ERR, m_sit); + return s; + } +#endif + + S::SablotSituation m_sit; + }; + + class NamedNodeMap; + class NodeList; + class ChildNodeList; + class AttrNodeList; + class DOMNodeList; + class AttrNamedNodeMap; + } + + class Element; + class Document; + class DOMImplementation; + + typedef INT::Ref NamedNodeMap; + typedef INT::Ref NodeList; + + /** + * Thin wrapper class for a DOM Node + */ + class Node : + public INT::Base + { + public: + enum TYPES + { + ELEMENT_NODE = S::SDOM_ELEMENT_NODE, + ATTRIBUTE_NODE = S::SDOM_ATTRIBUTE_NODE, + TEXT_NODE = S::SDOM_TEXT_NODE, + CDATA_SECTION_NODE = S::SDOM_CDATA_SECTION_NODE, + ENTITY_REFERENCE_NODE = S::SDOM_ENTITY_REFERENCE_NODE, + ENTITY_NODE = S::SDOM_ENTITY_NODE, + PROCESSING_INSTRUCTION_NODE = S::SDOM_PROCESSING_INSTRUCTION_NODE, + COMMENT_NODE = S::SDOM_COMMENT_NODE, + DOCUMENT_NODE = S::SDOM_DOCUMENT_NODE, + DOCUMENT_TYPE_NODE = S::SDOM_DOCUMENT_TYPE_NODE, + DOCUMENT_FRAGMENT_NODE = S::SDOM_DOCUMENT_FRAGMENT_NODE, + NOTATION_NODE = S::SDOM_NOTATION_NODE + }; + + Node() : INT::Base(NULL) + { + m_node = NULL; + } + + Node(const Node& node) + : INT::Base(node) + { + m_node = node.m_node; + } + + Node& operator=(const Node& other) + { + Base::operator=(other); + m_node = other.m_node; + return *this; + } + + Node& operator=(const void* null) + { + ASSERT(null == NULL); + Base::operator=(null); + m_node = NULL; + return *this; + } + + bool operator==(const Node& other) const + { + return Base::operator==(other) && + m_node == other.m_node; + } + + bool operator==(const void* null) const + { + ASSERT(null == NULL); + return Base::operator==(null) || + m_node == NULL; + } + + bool operator!=(const Node& other) const + { return !operator==(other); } + + bool operator!=(const void* null) const + { return !operator==(null); } + + const Node* operator->() const + { return (const Node*)this; } + Node* operator->() + { return this; } + + dom_str getNodeName() const + throw(DOMException) + { + ASSERT_VALID(); + S::SDOM_char* name; + _try_(S::SDOM_getNodeName(m_sit, m_node, &name)); + return TO_D(INT::_8str(name)); + } + + data_str getNodeValue() const + throw(DOMException) + { + ASSERT_VALID(); + S::SDOM_char* value; + _try_(S::SDOM_getNodeValue(m_sit, m_node, &value)); + return TO_V(INT::_8str(value)); + } + + void setNodeValue(const data_str& value) + throw(DOMException) + { + ASSERT_VALID(); + _try_(S::SDOM_setNodeValue(m_sit, m_node, FROM_V(value).c_str())); + } + + short getNodeType() const + throw(DOMException) + { + ASSERT_VALID(); + S::SDOM_NodeType type; + _try_(S::SDOM_getNodeType(m_sit, m_node, &type)); + return (short)type; + } + + Node getParentNode() const + throw(DOMException) + { + ASSERT_VALID(); + S::SDOM_Node parent; + _try_(S::SDOM_getParentNode(m_sit, m_node, &parent)); + return Node(m_sit, parent); + } + + NodeList getChildNodes() const + throw(DOMException); + + Node getFirstChild() const + throw(DOMException) + { + ASSERT_VALID(); + S::SDOM_Node child; + _try_(S::SDOM_getFirstChild(m_sit, m_node, &child)); + return Node(m_sit, child); + } + + Node getLastChild() const + throw(DOMException) + { + ASSERT_VALID(); + S::SDOM_Node child; + _try_(S::SDOM_getLastChild(m_sit, m_node, &child)); + return Node(m_sit, child); + } + + Node getPreviousSibling() const + throw(DOMException) + { + ASSERT_VALID(); + S::SDOM_Node sib; + _try_(S::SDOM_getPreviousSibling(m_sit, m_node, &sib)); + return Node(m_sit, sib); + } + + Node getNextSibling() const + throw(DOMException) + { + ASSERT_VALID(); + S::SDOM_Node sib; + _try_(S::SDOM_getNextSibling(m_sit, m_node, &sib)); + return Node(m_sit, sib); + } + + NamedNodeMap getAttributes() const + throw(DOMException); + + Document getOwnerDocument() const + throw(DOMException); + + Node insertBefore(const Node& newChild, const Node& refChild) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_VALID_NODE(newChild); + ASSERT_VALID_NODE(refChild); + _try_(S::SDOM_insertBefore(m_sit, m_node, newChild.m_node, refChild.m_node)); + return newChild; + } + + Node replaceChild(const Node& newChild, const Node& refChild) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_VALID_NODE(newChild); + ASSERT_VALID_NODE(refChild); + _try_(S::SDOM_replaceChild(m_sit, m_node, newChild.m_node, refChild.m_node)); + return newChild; + } + + Node removeChild(const Node& oldChild) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_VALID_NODE(oldChild); + _try_(S::SDOM_removeChild(m_sit, m_node, oldChild.m_node)); + return oldChild; + } + + Node appendChild(const Node& newChild) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_VALID_NODE(newChild); + _try_(S::SDOM_appendChild(m_sit, m_node, newChild.m_node)); + return newChild; + } + + bool hasChildNodes() const + throw(DOMException) + { + ASSERT_VALID(); + int count = 0; + _try_(S::SDOM_getChildNodeCount(m_sit, m_node, &count)); + return count != 0; + } + + Node cloneNode(bool deep) const + throw(DOMException) + { + ASSERT_VALID(); + S::SDOM_Node node; + _try_(S::SDOM_cloneNode(m_sit, m_node, deep ? 1 : 0, &node)); + return Node(m_sit, node); + } + + void normalize() + throw(DOMException) + { + ASSERT_VALID(); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + } + + bool isSupported(const dom_str& feature, + const dom_str& version) const + { + ASSERT_VALID(); + return false; + } + + dom_str getNamespaceURI() const + throw(DOMException) + { + ASSERT_VALID(); + S::SDOM_char* uri; + _try_(S::SDOM_getNodeNSUri(m_sit, m_node, &uri)); + return TO_D(INT::_8str(uri)); + } + + dom_str getPrefix() const + throw(DOMException) + { + ASSERT_VALID(); + S::SDOM_char* prefix; + _try_(S::SDOM_getNodePrefix(m_sit, m_node, &prefix)); + return TO_D(INT::_8str(prefix)); + } + + void setPrefix(const dom_str& prefix) + throw(DOMException) + { + ASSERT_VALID(); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + } + + dom_str getLocalName() const + throw(DOMException) + { + ASSERT_VALID(); + S::SDOM_char* name; + _try_(S::SDOM_getNodeLocalName(m_sit, m_node, &name)); + return TO_D(INT::_8str(name)); + } + + bool hasAttributes() const + throw (DOMException) + { + ASSERT_VALID(); + + if(getNodeType() != ELEMENT_NODE) + return false; + + int count = 0; + _try_(S::SDOM_getAttributeNodeCount(m_sit, m_node, &count)); + return count != 0; + } + + void* setUserData(void* data) + throw(DOMException) + { + ASSERT_VALID(); + void* old = S::SDOM_getNodeInstanceData(m_node); + S::SDOM_setNodeInstanceData(m_node, data); + return old; + } + + void* getUserData() const + throw(DOMException) + { + ASSERT_VALID(); + return S::SDOM_getNodeInstanceData(m_node); + } + + std::string serialize() const + throw(DOMException) + { + ASSERT_VALID(); + S::SDOM_Document doc; + _try_(S::SDOM_getOwnerDocument(m_sit, m_node, &doc)); + S::SDOM_char* serialized; + _try_(S::SDOM_nodeToString(m_sit, doc, m_node, &serialized)); + std::string ret(serialized); + S::SablotFree(serialized); + return ret; + } + + bool isValid() const + { + return Base::isValid() && + m_node != NULL; + } + + protected: + Node(S::SablotSituation sit, S::SDOM_Node node) : + INT::Base(sit) { m_node = node; } + + protected: + S::SDOM_Node m_node; + + friend class Document; + friend class INT::ChildNodeList; + friend class INT::AttrNodeList; + friend class INT::DOMNodeList; + friend class INT::AttrNamedNodeMap; + }; + + class Attr : + public Node + { + public: + Attr() { } + Attr(const Attr& node) : + Node(node) { } + + Attr& operator=(const Attr& other) + { Node::operator=(other); return *this; } + Attr& operator=(const void* null) + { Node::operator=(null); return *this; } + const Attr* operator->() const + { return (const Attr*)this; } + Attr* operator->() + { return this; } + + dom_str getName() const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(ATTRIBUTE_NODE); + return getNodeName(); + } + + Element getOwnerElement() const + throw(DOMException); + + bool getSpecified() const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(ATTRIBUTE_NODE); + return true; + } + + data_str getValue() const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(ATTRIBUTE_NODE); + return getNodeValue(); + } + + void setValue(const data_str& value) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(ATTRIBUTE_NODE); + setNodeValue(value); + } + + protected: + Attr(S::SablotSituation sit, S::SDOM_Node node) : + Node(sit, node) { } + + friend class Element; + friend class Document; + }; + + /** + * This wrapper class for an element + */ + class Element : + public Node + { + public: + Element() { } + Element(const Element& node) : + Node(node) {} + + Element& operator=(const Element& other) + { Node::operator=(other); return *this; } + Element& operator=(const void* null) + { Node::operator=(null); return *this; } + const Element* operator->() const + { return (const Element*)this; } + Element* operator->() + { return this; } + + dom_str getTagName() const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(ELEMENT_NODE); + return getNodeName(); + } + + data_str getAttribute(const dom_str& name) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(ELEMENT_NODE); + S::SDOM_char* value; + _try_(S::SDOM_getAttribute(m_sit, m_node, FROM_D(name).c_str(), &value)); + return TO_V(INT::_8str(value)); + } + + void setAttribute(const dom_str& name, const data_str& value) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(ELEMENT_NODE); + _try_(S::SDOM_setAttribute(m_sit, m_node, FROM_D(name).c_str(), + FROM_V(value).c_str())); + } + + void removeAttribute(const dom_str& name) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(ELEMENT_NODE); + _try_(S::SDOM_removeAttribute(m_sit, m_node, FROM_D(name).c_str())); + } + + Attr getAttributeNode(const dom_str& name) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(ELEMENT_NODE); + S::SDOM_Node attr; + _try_(S::SDOM_getAttributeNode(m_sit, m_node, FROM_D(name).c_str(), &attr)); + return Attr(m_sit, attr); + } + + Attr setAttributeNode(const Attr& attr) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_VALID_NODE(attr); + ASSERT_TYPE(ELEMENT_NODE); + S::SDOM_Node repl; + _try_(S::SDOM_setAttributeNode(m_sit, m_node, attr.m_node, &repl)); + return Attr(m_sit, repl); + } + + Attr removeAttributeNode(const Attr& attr) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_VALID_NODE(attr); + ASSERT_TYPE(ELEMENT_NODE); + S::SDOM_Node rem; + _try_(S::SDOM_removeAttributeNode(m_sit, m_node, attr.m_node, &rem)); + return Attr(m_sit, rem); + } + + NodeList getElementsByTagName(const dom_str& name) const + throw(DOMException); + + data_str getAttributeNS(const dom_str& uri, const dom_str& name) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(ELEMENT_NODE); + S::SDOM_char* value; + _try_(S::SDOM_getAttributeNS(m_sit, m_node, (char*)FROM_D(uri).c_str(), + (char*)FROM_D(name).c_str(), &value)); + return TO_V(INT::_8str(value)); + } + + void setAttributeNS(const dom_str& uri, const dom_str& name, + const data_str& value) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(ELEMENT_NODE); + _try_(S::SDOM_setAttributeNS(m_sit, m_node, FROM_D(uri).c_str(), + FROM_D(name).c_str(), FROM_V(value).c_str())); + } + + void removeAttributeNS(const dom_str& uri, const dom_str& name) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(ELEMENT_NODE); + DOM::Attr attr = getAttributeNodeNS(uri, name); + if(attr != NULL) + removeAttributeNode(attr); + } + + Attr getAttributeNodeNS(const dom_str& uri, const dom_str& name) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(ELEMENT_NODE); + S::SDOM_Node attr; + _try_(S::SDOM_getAttributeNodeNS(m_sit, m_node, (char*)FROM_D(uri).c_str(), + (char*)FROM_D(name).c_str(), &attr)); + return Attr(m_sit, attr); + } + + Attr setAttributeNodeNS(const Attr& attr) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_VALID_NODE(attr); + ASSERT_TYPE(ELEMENT_NODE); + S::SDOM_Node repl; + _try_(S::SDOM_setAttributeNodeNS(m_sit, m_node, attr.m_node, &repl)); + return Attr(m_sit, repl); + } + + NodeList getElementsByTagNameNS(const dom_str& uri, + const dom_str& name) const + throw(DOMException); + + bool hasAttribute(const dom_str& name) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(ELEMENT_NODE); + return getAttributeNode(name) != NULL; + } + + bool hasAttributeNS(const dom_str& uri, const dom_str& name) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(ELEMENT_NODE); + return getAttributeNodeNS(uri, name) != NULL; + } + + protected: + Element(S::SablotSituation sit, S::SDOM_Node node) : + Node(sit, node) { } + + friend class Attr; + friend class Document; + }; + + + class CharacterData : + public Node + { + public: + CharacterData() { } + CharacterData(const Node& node) : + Node(node) { } + + CharacterData& operator=(const CharacterData& other) + { Node::operator=(other); return *this; } + CharacterData& operator=(const void* null) + { Node::operator=(null); return *this; } + const CharacterData* operator->() const + { return (const CharacterData*)this; } + CharacterData* operator->() + { return this; } + + void appendData(const data_str& data) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT(getNodeType() == TEXT_NODE || + getNodeType() == CDATA_SECTION_NODE || + getNodeType() == COMMENT_NODE); + data_str val = getNodeValue(); + val.append(data); + setNodeValue(val); + } + + void deleteData(int offset, int count) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT(getNodeType() == TEXT_NODE || + getNodeType() == CDATA_SECTION_NODE || + getNodeType() == COMMENT_NODE); + data_str val = getNodeValue(); + val.erase(offset, count); + setNodeValue(val); + } + + data_str getData() const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT(getNodeType() == TEXT_NODE || + getNodeType() == CDATA_SECTION_NODE || + getNodeType() == COMMENT_NODE); + return getNodeValue(); + } + + int getLength() const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT(getNodeType() == TEXT_NODE || + getNodeType() == CDATA_SECTION_NODE || + getNodeType() == COMMENT_NODE); + return getNodeValue().size(); + } + + void insertData(int offset, const data_str& data) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT(getNodeType() == TEXT_NODE || + getNodeType() == CDATA_SECTION_NODE || + getNodeType() == COMMENT_NODE); + data_str val = getNodeValue(); + val.insert(offset, data); + setNodeValue(val); + } + + void replaceData(int offset, int count, const data_str& arg) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT(getNodeType() == TEXT_NODE || + getNodeType() == CDATA_SECTION_NODE || + getNodeType() == COMMENT_NODE); + data_str val = getNodeValue(); + val.erase(offset, count); + val.insert(offset, arg); + setNodeValue(val); + } + + void setData(const data_str& data) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT(getNodeType() == TEXT_NODE || + getNodeType() == CDATA_SECTION_NODE || + getNodeType() == COMMENT_NODE); + setNodeValue(data); + } + + data_str substringData(int offset, int count) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT(getNodeType() == TEXT_NODE || + getNodeType() == CDATA_SECTION_NODE || + getNodeType() == COMMENT_NODE); + data_str val = getNodeValue(); + return val.substr(offset, count); + } + + protected: + CharacterData(S::SablotSituation sit, S::SDOM_Node node) : + Node(sit, node) { } + }; + + class Text : + public CharacterData + { + public: + Text() { } + Text(const Node& node) : + CharacterData(node) { } + + Text& operator=(const Text& other) + { CharacterData::operator=(other); return *this; } + Text& operator=(const void* null) + { CharacterData::operator=(null); return *this; } + const Text* operator->() const + { return (const Text*)this; } + Text* operator->() + { return this; } + + Text splitText(int offset) + throw(DOMException); + + protected: + Text(S::SablotSituation sit, S::SDOM_Node node) : + CharacterData(sit, node) { } + + friend class Document; + }; + + class CDATASection : + public Text + { + public: + CDATASection() { } + CDATASection(const CDATASection& node) : + Text(node) { } + + CDATASection& operator=(const CDATASection& other) + { Text::operator=(other); return *this; } + CDATASection& operator=(void* null) + { Text::operator=(null); return *this; } + const CDATASection* operator->() const + { return (const CDATASection*)this; } + CDATASection* operator->() + { return this; } + + protected: + CDATASection(S::SablotSituation sit, S::SDOM_Node node) : + Text(sit, node) { } + + friend class Document; + }; + + class Comment : + public CharacterData + { + public: + Comment() { } + Comment(const Comment& node) : + CharacterData(node) { } + + Comment& operator=(const Comment& other) + { CharacterData::operator=(other); return *this; } + Comment& operator=(void* null) + { CharacterData::operator=(null); return *this; } + const Comment* operator->() const + { return (const Comment*)this; } + Comment* operator->() + { return this; } + + protected: + Comment(S::SablotSituation sit, S::SDOM_Node node) : + CharacterData(sit, node) { } + + friend class Document; + }; + + class ProcessingInstruction : + public Node + { + public: + ProcessingInstruction() { } + ProcessingInstruction(const ProcessingInstruction& node) : + Node(node) { } + + ProcessingInstruction& operator=(const ProcessingInstruction& other) + { Node::operator=(other); return *this; } + ProcessingInstruction& operator=(void* null) + { Node::operator=(null); return *this; } + const ProcessingInstruction* operator->() const + { return (const ProcessingInstruction*)this; } + ProcessingInstruction* operator->() + { return this; } + + data_str getData() const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(PROCESSING_INSTRUCTION_NODE); + return getNodeValue(); + } + + dom_str getTarget() const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(PROCESSING_INSTRUCTION_NODE); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return dom_str(); + } + + void setData(const data_str& data) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(PROCESSING_INSTRUCTION_NODE); + setNodeValue(data); + } + + protected: + ProcessingInstruction(S::SablotSituation sit, S::SDOM_Node node) : + Node(sit, node) { } + + friend class Document; + }; + + class DocumentFragment : + public Node + { + public: + DocumentFragment() { } + DocumentFragment(const DocumentFragment& node) : + Node(node) { } + + DocumentFragment& operator=(const DocumentFragment& other) + { Node::operator=(other); return *this; } + DocumentFragment& operator=(void* null) + { Node::operator=(null); return *this; } + const DocumentFragment* operator->() const + { return (const DocumentFragment*)this; } + DocumentFragment* operator->() + { return this; } + }; + + class Entity : + public Node + { + public: + Entity() { } + Entity(const Entity& node) : + Node(node) { } + + Entity& operator=(const Entity& other) + { Node::operator=(other); return *this; } + Entity& operator=(void* null) + { Node::operator=(null); return *this; } + const Entity* operator->() const + { return (const Entity*)this; } + Entity* operator->() + { return this; } + + dom_str getNotationName() const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(ENTITY_NODE); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return dom_str(); + } + + dom_str getPublicId() const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(ENTITY_NODE); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return dom_str(); + } + + dom_str getSystemId() const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(ENTITY_NODE); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return dom_str(); + } + }; + + class EntityReference : + public Node + { + public: + EntityReference() { } + EntityReference(const EntityReference& node) : + Node(node) { } + + EntityReference& operator=(const EntityReference& other) + { Node::operator=(other); return *this; } + EntityReference& operator=(void* null) + { Node::operator=(null); return *this; } + const EntityReference* operator->() const + { return (const EntityReference*)this; } + EntityReference* operator->() + { return this; } + }; + + class Notation : + public Node + { + public: + Notation() { } + Notation(const Notation& node) : + Node(node) { } + + Notation& operator=(const Notation& other) + { Node::operator=(other); return *this; } + Notation& operator=(void* null) + { Node::operator=(null); return *this; } + const Notation* operator->() const + { return (const Notation*)this; } + Notation* operator->() + { return this; } + + dom_str getPublicId() const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(NOTATION_NODE); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return dom_str(); + } + + dom_str getSystemId() const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(NOTATION_NODE); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return dom_str(); + } + }; + + class DocumentType : + public Node + { + public: + DocumentType() { } + DocumentType(const DocumentType& node) : + Node(node) { } + + DocumentType& operator=(const DocumentType& other) + { Node::operator=(other); return *this; } + DocumentType& operator=(void* null) + { Node::operator=(null); return *this; } + const DocumentType* operator->() const + { return (const DocumentType*)this; } + DocumentType* operator->() + { return this; } + + NamedNodeMap getEntities() const + throw(DOMException); + + dom_str getInternalSubset() const + throw(DOMException) + { + ASSERT_VALID(); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return dom_str(); + } + + dom_str getName() const + throw(DOMException) + { + ASSERT_VALID(); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return dom_str(); + } + + NamedNodeMap getNotations() const + throw(DOMException); + }; + + + class Document : + public Node + { + public: + Document() { } + Document(const Document& doc) : + Node(doc) { } + Document(S::SablotSituation sit, S::SDOM_Document doc) : + Node(sit, NULL) { m_node = doc; } + + Document& operator=(const Document& other) + { Node::operator=(other); return *this; } + Document& operator=(void* null) + { Node::operator=(null); return *this; } + const Document* operator->() const + { return (const Document*)this; } + Document* operator->() + { return this; } + + DocumentType getDocType() const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(DOCUMENT_NODE); + return DocumentType(); + } + + DOMImplementation getImplementation() const + throw(DOMException); + + Element getDocumentElement() const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(DOCUMENT_NODE); + S::SDOM_Node child; + _try_(S::SDOM_getFirstChild(m_sit, m_node, &child)); + return Element(m_sit, child); + } + + Element createElement(const dom_str& tag) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(DOCUMENT_NODE); + S::SDOM_Node el; + _try_(S::SDOM_createElement(m_sit, (S::SDOM_Document)m_node, + &el, FROM_D(tag).c_str())); + return Element(m_sit, el); + } + + DocumentFragment createDocumentFragment() const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(DOCUMENT_NODE); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return DocumentFragment(); + } + + Text createTextNode(const data_str& data) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(DOCUMENT_NODE); + S::SDOM_Node text; + _try_(S::SDOM_createTextNode(m_sit, (S::SDOM_Document)m_node, + &text, FROM_V(data).c_str())); + return Text(m_sit, text); + } + + Comment createComment(const data_str& data) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(DOCUMENT_NODE); + S::SDOM_Node comment; + _try_(S::SDOM_createComment(m_sit, (S::SDOM_Document)m_node, + &comment, FROM_V(data).c_str())); + return Comment(m_sit, comment); + } + + CDATASection createCDATASection(const data_str& data) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(DOCUMENT_NODE); + S::SDOM_Node cdata; + _try_(S::SDOM_createCDATASection(m_sit, (S::SDOM_Document)m_node, + &cdata, FROM_V(data).c_str())); + return CDATASection(m_sit, cdata); + } + + ProcessingInstruction createProcessingInstruction(const dom_str& targ, + const data_str& data) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(DOCUMENT_NODE); + S::SDOM_Node pi; + _try_(S::SDOM_createProcessingInstruction(m_sit, + (S::SDOM_Document)m_node, + &pi, FROM_D(targ).c_str(), + FROM_V(data).c_str())); + return ProcessingInstruction(m_sit, pi); + } + + Attr createAttribute(const dom_str& name) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(DOCUMENT_NODE); + S::SDOM_Node attr; + _try_(S::SDOM_createAttribute(m_sit, (S::SDOM_Document)m_node, + &attr, FROM_D(name).c_str())); + return Attr(m_sit, attr); + } + + EntityReference createEntityReference() + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(DOCUMENT_NODE); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return EntityReference(); + } + + NodeList getElementsByTagName(const dom_str& name) const + throw(DOMException); + + Node importNode(const Node& import, bool deep) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(DOCUMENT_NODE); + ASSERT_VALID_NODE(import); + S::SDOM_Node imported; + _try_(S::SDOM_cloneForeignNode(m_sit, (S::SDOM_Document)m_node, + import.m_node, deep ? 1 : 0, &imported)); + return Node(m_sit, imported); + } + + Element createElementNS(const dom_str& uri, const dom_str& tag) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(DOCUMENT_NODE); + S::SDOM_Node el; + _try_(S::SDOM_createElementNS(m_sit, (S::SDOM_Document)m_node, + &el, FROM_D(uri).c_str(), FROM_D(tag).c_str())); + return Element(m_sit, el); + } + + Attr createAttributeNS(const dom_str& uri, const dom_str& name) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(DOCUMENT_NODE); + S::SDOM_Node attr; + _try_(S::SDOM_createAttributeNS(m_sit, (S::SDOM_Document)m_node, + &attr, FROM_D(uri).c_str(), FROM_D(name).c_str())); + return Attr(m_sit, attr); + } + + NodeList getElementsByTagNameNS(const dom_str& uri, + const dom_str& name) const + throw(DOMException); + + Element getElementById(const dom_str& id) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(DOCUMENT_NODE); + dom_str query = "//*[id('" + id + "')]"; + S::SDOM_NodeList result; + _try_(S::SDOM_xql(m_sit, FROM_D(query).c_str(), m_node, &result)); + + int length; + _try_(S::SDOM_getNodeListLength(m_sit, result, &length)); + + Element ret; + if(length != 1) + { + ret = Element(); + } + else + { + S::SDOM_Node el; + _try_(S::SDOM_getNodeListItem(m_sit, result, 0, &el)); + ret = Element(m_sit, el); + } + + S::SDOM_disposeNodeList(m_sit, result); + return ret; + } + + std::string serialize() const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(DOCUMENT_NODE); + S::SDOM_char* serialized; + _try_(S::SDOM_docToString(m_sit, (S::SDOM_Document)m_node, &serialized)); + std::string ret(serialized); + S::SablotFree(serialized); + return ret; + } + + void release() + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(DOCUMENT_NODE); + S::SDOM_tmpListDump((S::SDOM_Document)m_node, 0); + if(S::SablotDestroyDocument(m_sit, (S::SDOM_Document)m_node)) + _try_(S::SDOM_NOT_OK); + *this = NULL; + } + + protected: + + friend class Node; + friend class DOMImplementation; + }; + + + class DOMImplementation : + public INT::Base + { + public: + DOMImplementation() + throw(DOMException) : INT::Base(NULL) + { + if(S::SablotCreateSituation(&m_sit)) + _try_(S::SDOM_NOT_OK); + } + DOMImplementation(S::SablotSituation sit) + throw(DOMException) : INT::Base(sit) { } + DOMImplementation(const DOMImplementation& impl) : + INT::Base(impl) { } + + DOMImplementation& operator=(const DOMImplementation& other) + { Base::operator=(other); return *this; } + DOMImplementation& operator=(void* null) + { Base::operator=(null); return *this; } + const DOMImplementation* operator->() const + { return (const DOMImplementation*)this; } + DOMImplementation* operator->() + { return this; } + + Document createDocument(const dom_str& uri, const dom_str& qname, + const DocumentType& type) const + throw(DOMException) + { + ASSERT_VALID(); + S::SDOM_Document doc; + if(S::SablotCreateDocument(m_sit, &doc)) + _try_(S::SDOM_NOT_OK); + + Document document(m_sit, doc); + + if(!qname.empty()) + { + if(!uri.empty()) + document.appendChild(document.createElementNS(uri, qname)); + else + document.appendChild(document.createElement(qname)); + } + + return document; + } + + DocumentType createDocumentType(const dom_str& qname, + const dom_str& publicId, + const dom_str& systemId) const + throw(DOMException) + { + ASSERT_VALID(); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return DocumentType(); + } + + bool hasFeature(const dom_str& feature, const dom_str& version) const + throw(DOMException) + { + ASSERT_VALID(); + return false; + } + + void release() + throw(DOMException) + { + ASSERT_VALID(); + if(S::SablotDestroySituation(m_sit)) + _try_(S::SDOM_NOT_OK); + } + }; + + namespace INT + { + class NodeList : + public INT::Base, + public INT::Inst + { + public: +#ifdef COMPARE_REF + virtual bool operator==(const NodeList& other) const + { return Base::operator==(other); } +#endif + virtual int getLength() const + throw(DOMException) + { + ASSERT_VALID(); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return 0; + } + + virtual Node item(int index) const + throw(DOMException) + { + ASSERT_VALID(); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return Node(); + } + + protected: + NodeList(S::SablotSituation sit) : + INT::Base(sit) { } + + virtual bool isValid() const + { + return false; + } + + private: + NodeList(const NodeList& list) : INT::Base(list) { } + }; + + class ChildNodeList : + public NodeList + { + public: +#ifdef COMPARE_REF + virtual bool operator==(const NodeList& other) const + { return m_el == ((ChildNodeList)other).m_el; } +#endif + virtual int getLength() const + throw(DOMException) + { + ASSERT_VALID(); + int length; + _try_(S::SDOM_getChildNodeCount(m_sit, m_el, &length)); + return length; + } + + virtual Node item(int index) const + throw(DOMException) + { + ASSERT_VALID(); + S::SDOM_Node attr; + _try_(S::SDOM_getChildNodeIndex(m_sit, m_el, index, &attr)); + return Node(m_sit, attr); + } + + protected: + ChildNodeList(S::SablotSituation sit, S::SDOM_Node node) : + NodeList(sit) { m_el = node; } + + virtual bool isValid() const + { + return m_el != NULL; + } + + protected: + S::SDOM_Node m_el; + + friend class Node; + }; + + class AttrNodeList : + public NodeList + { + public: +#ifdef COMPARE_REF + virtual bool operator==(const NodeList& other) const + { return m_el == ((AttrNodeList)other).m_el; } +#endif + virtual int getLength() const + throw(DOMException) + { + ASSERT_VALID(); + int length; + _try_(S::SDOM_getAttributeNodeCount(m_sit, m_el, &length)); + return length; + } + + virtual Node item(int index) const + throw(DOMException) + { + ASSERT_VALID(); + S::SDOM_Node attr; + _try_(S::SDOM_getAttributeNodeIndex(m_sit, m_el, index, &attr)); + return Node(m_sit, attr); + } + + protected: + AttrNodeList(S::SablotSituation sit, S::SDOM_Node el) : + NodeList(sit) { m_el = el; } + + virtual bool isValid() const + { + return m_el != NULL; + } + + protected: + S::SDOM_Node m_el; + }; + + + class DOMNodeList : + public NodeList + { + public: +#ifdef COMPARE_REF + virtual bool operator==(const NodeList& other) const + { return m_list == ((DOMNodeList&)other).m_list; } +#endif + virtual int getLength() const + throw(DOMException) + { + ASSERT_VALID(); + int length; + _try_(S::SDOM_getNodeListLength(m_sit, m_list, &length)); + return length; + } + + virtual Node item(int index) const + throw(DOMException) + { + ASSERT_VALID(); + S::SDOM_Node it; + _try_(S::SDOM_getNodeListItem(m_sit, m_list, index, &it)); + return Node(m_sit, it); + } + + protected: + DOMNodeList(S::SablotSituation sit, S::SDOM_NodeList list) : + NodeList(sit) + { + m_list = list; + } + + ~DOMNodeList() + { + if(m_list != NULL) + S::SDOM_disposeNodeList(m_sit, m_list); + m_list = NULL; + } + + virtual bool isValid() const + { + return m_list != NULL; + } + + protected: + S::SDOM_NodeList m_list; + + friend class Element; + friend class Document; + }; + + + class NamedNodeMap : + public INT::Base, + public INT::Inst + { + public: +#ifdef COMPARE_REF + virtual bool operator==(const NamedNodeMap& other) const + { Base::operator==(other); } +#endif + virtual int getLength() const + throw(DOMException) + { + ASSERT_VALID(); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return 0; + } + + virtual Node getNamedItem(const dom_str& name) const + throw(DOMException) + { + ASSERT_VALID(); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return Node(); + } + + virtual Node getNamedItemNS(const dom_str& uri, const dom_str& name) const + throw(DOMException) + { + ASSERT_VALID(); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return Node(); + } + + virtual Node item(int index) const + throw(DOMException) + { + ASSERT_VALID(); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return Node(); + } + + virtual Node removeNamedItem(const dom_str& name) + throw(DOMException) + { + ASSERT_VALID(); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return Node(); + } + + virtual Node removeNamedItemNS(const dom_str& uri, const dom_str& name) + throw(DOMException) + { + ASSERT_VALID(); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return Node(); + } + + virtual Node setNamedItem(const Node& arg) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_VALID_NODE(arg); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return Node(); + } + + virtual Node setNamedItemNS(const Node& arg) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_VALID_NODE(arg); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return Node(); + } + + protected: + NamedNodeMap(S::SablotSituation sit) : + INT::Base(sit) { } + + virtual bool isValid() const + { + return false; + } + + private: + NamedNodeMap(const NamedNodeMap& map) : INT::Base(map) { } + }; + + class AttrNamedNodeMap : + public NamedNodeMap + { + public: +#ifdef COMPARE_REF + virtual bool operator==(const NamedNodeMap& other) const + { return m_el == ((AttrNamedNodeMap)other).m_el; } +#endif + virtual int getLength() const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_VALID_NODE(m_el); + int length; + _try_(S::SDOM_getAttributeNodeCount(m_sit, m_el.m_node, &length)); + return length; + } + + virtual Node getNamedItem(const dom_str& name) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_VALID_NODE(m_el); + return m_el.getAttributeNode(name); + } + + virtual Node getNamedItemNS(const dom_str& uri, const dom_str& name) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_VALID_NODE(m_el); + return m_el.getAttributeNodeNS(uri, name); + } + + virtual Node item(int index) const + throw(DOMException) + { + ASSERT_VALID(); + S::SDOM_Node attr; + _try_(S::SDOM_getAttributeNodeIndex(m_sit, m_el.m_node, index, &attr)); + return Node(m_sit, attr); + } + + virtual Node removeNamedItem(const dom_str& name) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_VALID_NODE(m_el); + Node node = getNamedItem(name); + if(node != NULL) + m_el.removeAttributeNode((Attr&)node); + return node; + } + + virtual Node removeNamedItemNS(const dom_str& uri, const dom_str& name) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_VALID_NODE(m_el); + Node node = getNamedItemNS(uri, name); + if(node != NULL) + m_el.removeAttributeNode((Attr&)node); + return node; + } + + virtual Node setNamedItem(const Node& arg) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_VALID_NODE(m_el); + return m_el.setAttributeNode((Attr&)arg); + } + + virtual Node setNamedItemNS(const Node& arg) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_VALID_NODE(m_el); + return m_el.setAttributeNodeNS((Attr&)arg); + } + + protected: + AttrNamedNodeMap(S::SablotSituation sit, const Node& el) : + NamedNodeMap(sit) + { + ASSERT_VALID_NODE(el); + ASSERT_NODE_TYPE(el, Node::ELEMENT_NODE); + m_el = (Element&)el; + } + + virtual bool isValid() const + { + return m_el.isValid(); + } + + + protected: + Element m_el; + + friend class Node; + }; + }; + + + inline NodeList Node::getChildNodes() const + throw(DOMException) + { + ASSERT_VALID(); + return NodeList(new INT::ChildNodeList(m_sit, m_node)); + } + + inline NamedNodeMap Node::getAttributes() const + throw(DOMException) + { + ASSERT_VALID(); + if(getNodeType() != ELEMENT_NODE) + return NamedNodeMap(NULL); + + return NamedNodeMap(new INT::AttrNamedNodeMap(m_sit, *this)); + } + + inline Document Node::getOwnerDocument() const + throw(DOMException) + { + ASSERT_VALID(); + S::SDOM_Document doc; + _try_(S::SDOM_getOwnerDocument(m_sit, m_node, &doc)); + return Document(m_sit, doc); + } + + inline Element Attr::getOwnerElement() const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(ATTRIBUTE_NODE); + S::SDOM_Node el; + _try_(S::SDOM_getAttributeElement(m_sit, m_node, &el)); + return Element(m_sit, el); + } + + inline NodeList Element::getElementsByTagName(const dom_str& name) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(ELEMENT_NODE); + dom_str query = "descendant::" + name; + S::SDOM_NodeList result; + _try_(S::SDOM_xql(m_sit, FROM_D(query).c_str(), m_node, &result)); + return NodeList(new INT::DOMNodeList(m_sit, result)); + } + + inline NodeList Element::getElementsByTagNameNS(const dom_str& uri, + const dom_str& name) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(ELEMENT_NODE); + dom_str query = "descendant::*[namespace-uri()='" + uri + "'"; + if(name != "*") + query += " and local-name()='" + name + "']"; + S::SDOM_NodeList result; + _try_(S::SDOM_xql(m_sit, FROM_D(query).c_str(), m_node, &result)); + return NodeList(new INT::DOMNodeList(m_sit, result)); + } + + inline NodeList Document::getElementsByTagName(const dom_str& name) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(DOCUMENT_NODE); + dom_str query = "descendant::" + name; + S::SDOM_NodeList result; + _try_(S::SDOM_xql(m_sit, FROM_D(query).c_str(), m_node, &result)); + return NodeList(new INT::DOMNodeList(m_sit, result)); + } + + inline NodeList Document::getElementsByTagNameNS(const dom_str& uri, + const dom_str& name) const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(DOCUMENT_NODE); + dom_str query = "descendant::*[namespace-uri()='" + uri + "'"; + if(name != "*") + query += " and local-name()='" + name + "']"; + S::SDOM_NodeList result; + _try_(S::SDOM_xql(m_sit, FROM_D(query).c_str(), m_node, &result)); + return NodeList(new INT::DOMNodeList(m_sit, result)); + } + + inline DOMImplementation Document::getImplementation() const + throw(DOMException) + { + ASSERT_VALID(); + ASSERT_TYPE(DOCUMENT_NODE); + return DOMImplementation(m_sit); + } + + inline Text Text::splitText(int offset) + throw(DOMException) + { + ASSERT_VALID(); + ASSERT(getNodeType() == TEXT_NODE || + getNodeType() == CDATA_SECTION_NODE); + + data_str val = getNodeValue(); + setNodeValue(val.substr(0, offset)); + + Document doc = getOwnerDocument(); + ASSERT(doc != NULL); + + Text split(m_sit, NULL); + val = val.substr(0, offset); + + switch(getNodeType()) + { + case TEXT_NODE: + split = doc.createTextNode(val); + break; + + case CDATA_SECTION_NODE: + split = doc.createCDATASection(val); + break; + + default: + ASSERT(false); + }; + + Node parent = getParentNode(); + if(parent != NULL) + { + Node next = getNextSibling(); + if(next != NULL) + parent.insertBefore(split, next); + else + parent.appendChild(split); + } + + return split; + } + + inline NamedNodeMap DocumentType::getEntities() const + throw(DOMException) + { + ASSERT_VALID(); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return NamedNodeMap(NULL); + } + + inline NamedNodeMap DocumentType::getNotations() const + throw(DOMException) + { + ASSERT_VALID(); + _try_(S::SDOM_NOT_SUPPORTED_ERR); + return NamedNodeMap(NULL); + } + +}; // namespace DOM + + +#endif //__SABLO_H__ \ No newline at end of file diff --git a/src/sablo.txt b/src/sablo.txt new file mode 100644 index 0000000..e69de29 diff --git a/src/sablotr.cpp b/src/sablotr.cpp new file mode 100644 index 0000000..465648a --- /dev/null +++ b/src/sablotr.cpp @@ -0,0 +1,121 @@ +// +// AUTHOR +// N. Nielsen +// +// LICENSE +// This software is in the public domain. +// +// The software is provided "as is", without warranty of any kind, +// express or implied, including but not limited to the warranties +// of merchantability, fitness for a particular purpose, and +// noninfringement. In no event shall the author(s) be liable for any +// claim, damages, or other liability, whether in an action of +// contract, tort, or otherwise, arising from, out of, or in connection +// with the software or the use or other dealings in the software. +// +// SUPPORT +// Send bug reports to: +// +// SITE +// http://memberwebs.com/nielsen/ +// + +// SABLOTRON C++ WRAPPER CONVERSION FUNCTIONS +// + +#include "wchar.h" +#include "sablo.h" + +bool DOM::transcode16to8(const std::basic_string& data, + std::basic_string& ret) +{ + ret.resize(0); + ret.reserve(data.length() + (data.length() / 2)); + + // basic_string.c_str doesn't work properly everywhere + // most notably not in the g++ std library + + const wchar_t* c = data.length() ? data.data() : L""; + const wchar_t* e = c + data.length(); + + for( ; c != e; c++) + { + if(*c <= 0x007F) + { + ret.append(1, (char)*c); + } + else if(*c <= 0x07FF) + { + ret.append(1, (char)(192 | (*c >> 6))); + ret.append(1, (char)(128 | (*c & 63))); + } + else + { + ret.append(1, (char)(224 | (*c >> 12))); + ret.append(1, (char)(128 | ((*c >> 6) & 63))); + ret.append(1, (char)(128 | (*c & 63)) ); + } + } + + return true; +} + +bool DOM::transcode8to16(const std::basic_string& data, + std::basic_string& ret) +{ + ret.resize(0); + ret.reserve(data.length()); + + // basic_string.c_str doesn't work properly everywhere + // most notably not in the g++ std library + + const char* c = data.length() ? data.data() : ""; + const char* e = c + data.length(); + + for( ; c != e; c++) + { + // First 4 bits set + if((c[0] & 0xF8) == 0xF0 && + (c[1] & 0xC0) == 0x80 && + (c[2] & 0xC0) == 0x80 && + (c[3] & 0xC0) == 0x80) + { + ret.append(1, ((wchar_t)c[0] & 7) << 18 | + ((wchar_t)c[1] & 63) << 12 | + ((wchar_t)c[2] & 63) << 6 | + ((wchar_t)c[3] & 63)); + c += 3; + } + + // First 3 bits set + else if((c[0] & 0xF0) == 0xE0 && + (c[1] & 0xC0) == 0x80 && + (c[2] & 0xC0) == 0x80) + { + ret.append(1, ((wchar_t)c[0] & 15) << 12 | + ((wchar_t)c[1] & 63) << 6 | + ((wchar_t)c[2] & 63)); + c += 2; + } + + // First 2 bits set + else if((c[0] & 0xE0) == 0xC0 && + (c[1] & 0xC0) == 0x80) + { + ret.append(1, ((wchar_t)c[0] & 31) << 6 | + ((wchar_t)c[1] & 63)); + c += 1; + } + + // First bit set + else if(!(c[0] & 0x80)) + { + ret.append(1, (wchar_t)c[0]); + } + + else + return false; + } + + return true; +} diff --git a/src/stdafx.cpp b/src/stdafx.cpp new file mode 100644 index 0000000..1679211 --- /dev/null +++ b/src/stdafx.cpp @@ -0,0 +1,8 @@ +// stdafx.cpp : source file that includes just the standard includes +// rtfm.pch will be the pre-compiled header +// stdafx.obj will contain the pre-compiled type information + +#include "stdafx.h" + +// TODO: reference any additional headers you need in STDAFX.H +// and not in this file diff --git a/src/stdafx.h b/src/stdafx.h new file mode 100644 index 0000000..4876942 --- /dev/null +++ b/src/stdafx.h @@ -0,0 +1,21 @@ +// stdafx.h : include file for standard system include files, +// or project specific include files that are used frequently, but +// are changed infrequently +// + +#if !defined(AFX_STDAFX_H__AB96EF4C_5FF8_4004_96FA_747CCDFE3E7C__INCLUDED_) +#define AFX_STDAFX_H__AB96EF4C_5FF8_4004_96FA_747CCDFE3E7C__INCLUDED_ + +#if _MSC_VER > 1000 +#pragma once +#endif // _MSC_VER > 1000 + + +// TODO: reference additional headers your program requires here +#include +#include + +//{{AFX_INSERT_LOCATION}} +// Microsoft Visual C++ will insert additional declarations immediately before the previous line. + +#endif // !defined(AFX_STDAFX_H__AB96EF4C_5FF8_4004_96FA_747CCDFE3E7C__INCLUDED_) diff --git a/src/usuals.h b/src/usuals.h new file mode 100644 index 0000000..8cb7526 --- /dev/null +++ b/src/usuals.h @@ -0,0 +1,8 @@ +#ifndef __USUALS_H__ +#define __USUALS_H__ + +#ifndef NULL +#define NULL ((void*)0) +#endif + +#endif // __USUALS_H__ \ No newline at end of file diff --git a/src/xmlcomposehelpers.cpp b/src/xmlcomposehelpers.cpp new file mode 100644 index 0000000..4f6f106 --- /dev/null +++ b/src/xmlcomposehelpers.cpp @@ -0,0 +1,103 @@ + +#include "stdafx.h" +#include "RtfParseHelpers.h" + +Level::Level() +{ + m_previous = NULL; + m_element = NULL; + m_destination = NULL; + m_analyser = NULL; +} + +Level::Level(const Level& level) : + m_text(level.m_text) +{ + m_element = NULL; + m_destination = NULL; + m_analyser = NULL; + m_previous = &level; +} + +Level::~Level() +{ + +} + +LevelPtr Level::pushLevel() +{ + LevelPtr level = new Level(*this); + return level; +} + +LevelPtr Level::getPrevious() +{ + return m_previous; +} + +DOM::Element Level::getElement() +{ + if(m_element != NULL) + return m_element; + else if(m_previous) + return m_previous->getElement(); + else + ASSERT(0 && "should always have an element"); + + return DOM::Element(); +} + +void Level::setElement(DOM::Element element, bool deep) +{ + if(deep && m_previous && m_element == NULL) + m_previous->setElement(element, deep); + else + m_element = element; +} + +AnalyserPtr Level::getAnalyser() +{ + if(m_analyser) + return m_analyser; + else if(m_previous) + return m_previous->getAnalyser(); + else + return NULL; +} + +void Level::setAnalyser(AnalyserPtr analyser, bool deep) +{ + if(deep && m_previous && !m_analyser) + m_previous->setAnalyser(analyser, deep); + else + m_analyser = analyser; +} + +DestinationPtr Level::getDestination() +{ + if(m_destination) + return m_destination; + else if(m_previous) + return m_previous->getDestination(); + else + return NULL; +} + +void Level::setDestination(DestinationPtr destination, bool deep) +{ + if(deep && m_previous && !m_destination) + m_previous->setDestination(destination, deep); + else + m_destination = destination; +} + +RtfFormatting& Level::getFormatting() +{ + return m_text; +} + +void Level::setTextProperties(RtfFormatting& formatting) +{ + m_text.copy(formatting); +} + diff --git a/src/xmlcomposehelpers.h b/src/xmlcomposehelpers.h new file mode 100644 index 0000000..f6c06e7 --- /dev/null +++ b/src/xmlcomposehelpers.h @@ -0,0 +1,84 @@ +#ifndef __RTFPARSEHELPERS_H__ +#define __RTFPARSEHELPERS_H__ + +#include "Reference.h" +#include +#include "sablo.h" +#include "RtfFormatting.h" + +class RtfParser; + +class Destination : + public Instance +{ +public: + // This is called when the Destination is first used + virtual void initialize() {}; + // Called when data arrives at destination + virtual void charData(wstring data) {}; + // Called when the Destination goes out of scope + virtual void done() {}; + +protected: + RtfParser* m_parser; + friend class RtfParser; +}; + + +class Analyser : + public Instance +{ +public: + // This is called when the Analyser is first used + virtual void initialize() {}; + // Called when a control word is seen + virtual void controlWord(const string& cw, int flags, int param) {}; + // Called when a group is seen within this scope + virtual void groupStart() {}; + // Called when a group ends within this scope + virtual void groupEnd() {}; + // Called when when this analyser goes out of scope + virtual void done() {}; + +protected: + RtfParser* m_parser; + friend class RtfParser; +}; + +class Level; + +typedef Reference DestinationPtr; +typedef Reference AnalyserPtr; +typedef Reference LevelPtr; + +class Level : + public Instance +{ +public: + Level(); + virtual ~Level(); + + LevelPtr getPrevious(); + LevelPtr pushLevel(); + + DOM::Element getElement(); + void setElement(DOM::Element element, bool deep = false); + AnalyserPtr getAnalyser(); + void setAnalyser(AnalyserPtr analyser, bool deep = false); + DestinationPtr getDestination(); + void setDestination(DestinationPtr destination, bool deep = false); + RtfFormatting& getFormatting(); + void setTextProperties(RtfFormatting& textProperties); + +protected: + Level(const Level& level); + + LevelPtr m_previous; + DOM::Element m_element; + RtfFormatting m_text; + DestinationPtr m_destination; + AnalyserPtr m_analyser; +}; + + +#endif //__RTFPARSEHELPERS_H__ \ No newline at end of file diff --git a/src/xmlcomposer.cpp b/src/xmlcomposer.cpp new file mode 100644 index 0000000..1bfeb30 --- /dev/null +++ b/src/xmlcomposer.cpp @@ -0,0 +1,1811 @@ +// RtfParser.cpp: implementation of the RtfParser class. +// +////////////////////////////////////////////////////////////////////// + +#include "stdafx.h" +#include "RtfAnalyser.h" + +const char* kElDest = "i_dest"; +const char* kElBlock = "i_block"; +const char* kAtFix = "i_fix"; +const char* kAtCell = "i_cell"; +const char* kElListtable = "i_listtable"; +const char* kElListdef = "i_listdef"; + +const char* kElPara = "para"; +const char* kElDoc = "document"; +const char* kElTab = "tab"; +const char* kElSect = "sect"; +const char* kElPage = "page"; +const char* kElStyle = "style"; +const char* kElLine = "line"; +const char* kElList = "list"; +const char* kElStylesheet = "stylesheet"; +const char* kElInfo = "info"; +const char* kElTitle = "title"; +const char* kElAuthor = "author"; +const char* kElOperator = "operator"; +const char* kElB = "b"; +const char* kElHide = "hide"; +const char* kElI = "i"; +const char* kElStrike = "strike"; +const char* kElU = "u"; +const char* kElColor = "color"; +const char* kElCell = "cell"; +const char* kElRow = "row"; +const char* kElTable = "table"; + +const char* kAtList = "list"; +const char* kAtName = "name"; +const char* kAtBold = "bold"; +const char* kAtHidden = "hide"; +const char* kAtItalic = "italic"; +const char* kAtStrike = "strike"; +const char* kAtUnderline = "underline"; +const char* kAtColor = "color"; +const char* kAtType = "type"; +const char* kAtOrdered = "ordered"; +const char* kAtStart = "start"; +const char* kAtId = "id"; +const char* kAtIndex = "id"; + +const wchar_t* kValDisc = L"disc"; +const wchar_t* kValLowerAlpha = L"lower-alpha"; +const wchar_t* kValUpperAlpha = L"upper-alpha"; +const wchar_t* kValLowerRoman = L"lower-roman"; +const wchar_t* kValUpperRoman = L"upper-roman"; +const wchar_t* kValArabic = L"arabic"; +const wchar_t* kValNull = L""; + +const wchar_t* kValList = L"list"; +const wchar_t* kValPara = L"para"; +const wchar_t* kValTable = L"table"; + +const char* kNoDuplicates[] = + { kElB, kElU, kElI, kElColor, kElHide, kElColor, NULL }; + +const char* kRemoveTags[] = + { kElDest, kElListdef, kElListtable, NULL }; + +const char* kBlockTags[] = + { kElTable, kElPara, NULL }; + +const char* kHideList[] = + { kAtId, kAtList, NULL }; + +const char* kNSPrefix = "xmlns"; + +////////////////////////////////////////////////////////////////////// +// Construction/Destruction +////////////////////////////////////////////////////////////////////// + +RtfParser::RtfParser(const RtfParserOptions& options) +{ + m_document = NULL; + memcpy(&m_options, &options, sizeof(options)); +} + +RtfParser::~RtfParser() +{ + clear(); + + if(m_impl != NULL) + m_impl.release(); +} + +void RtfParser::clear() +{ + if(m_document != NULL) + { + try + { + m_document.release(); + } + catch(...) { } + + m_document = NULL; + } + LevelHandler::clear(); +} + +void RtfParser::startDocument(RtfReader* reader) +{ + LevelHandler::startDocument(reader); + + // Create a new document + m_document = m_impl.createDocument("", kElDoc, DOM::DocumentType()); + + // TODO: Throw error if document is null + ASSERT(m_document != NULL); + + ASSERT(m_curLevel != NULL); + m_curLevel->setElement(m_document.getDocumentElement(), true); + + // Set the attributes on the top level + setAnalyser(AnalyserPtr(new Root)); + setDestination(DestinationPtr(new Content)); + getTextFormatting().resetPara(); + getTextFormatting().resetText(); +} + +void RtfParser::endDocument() +{ + LevelHandler::endDocument(); + + // Cleanup the tree + removeDuplicates(m_document); + breakTables(m_document); + breakTags(m_document, kElTable, kElRow); + breakTags(m_document, kElRow, kElCell); + wrapTags(m_document, kElCell, kElDest); + breakBlocks(m_document); + breakLists(m_document); + fixLists(m_document); + fixStyles(m_document); + fixBlocks(m_document); + removeTags(m_document); + breakBreak(m_document, kElDoc, kElPage); + breakBreak(m_document, kElDoc, kElSect); + return; +} + + + + + +// ----------------------------------------------------------------------- +// Helper functions + +DOM::Element RtfParser::createElement(const string& name) +{ + ASSERT(name.length() > 0); + return m_document.createElement(name); + + // TODO: Throw exception here if necessary +} + +void RtfParser::replaceElement(const DOM::Element& element) +{ + ASSERT(m_curLevel != NULL); + m_curLevel->setElement(element, true); +} + +void RtfParser::pushElement(const DOM::Element& element) +{ + ASSERT(m_curLevel != NULL); + getElement().appendChild(element); + m_curLevel->setElement(element); +} + +DOM::Element RtfParser::popElement() +{ + DOM::Element element = getElement(); + ASSERT(m_curLevel != NULL); + + DOM::Node parent = element.getParentNode(); + ASSERT(parent.getNodeType() == DOM::Node::ELEMENT_NODE); + + // Set it deep so it replaces the current element + m_curLevel->setElement((DOM::Element&)parent, true); + return element; +} + +void RtfParser::setAttribute(const string& name, const wstring& value, DOM::Element el) +{ + ASSERT(name.length() > 0); + if(el == NULL) + el = getElement(); + el.setAttribute(name, value); +} + +void RtfParser::setDestination(DestinationPtr dest) +{ + ASSERT(m_curLevel); + + m_curLevel->setDestination(dest); + dest->m_parser = this; + dest->initialize(); +} + +DestinationPtr RtfParser::replaceDestination(DestinationPtr dest) +{ + ASSERT(m_curLevel); + + DestinationPtr old = m_curLevel->getDestination(); + m_curLevel->setDestination(dest, true); + dest->m_parser = this; + dest->initialize(); + + return old; +} + + +void RtfParser::setAnalyser(AnalyserPtr analy) +{ + ASSERT(m_curLevel); + ASSERT(analy != NULL); + + analy->m_parser = this; + m_curLevel->setAnalyser(analy); + analy->initialize(); +} + +AnalyserPtr RtfParser::getAnalyser() +{ + ASSERT(m_curLevel); + return m_curLevel->getAnalyser(); +} + +DestinationPtr RtfParser::getDestination() +{ + ASSERT(m_curLevel); + return m_curLevel->getDestination(); +} + +RtfFormatting& RtfParser::getTextFormatting() +{ + ASSERT(m_curLevel); + return m_curLevel->getFormatting(); +} + + +// --------------------------------------------------------------------------------- +// Pass this stuff on through to the appropriate analysers etc... + +void RtfParser::charData(wstring data) +{ + ASSERT(m_curLevel != NULL); + DestinationPtr destination = m_curLevel->getDestination(); + if(destination) + { + destination->charData(data); + } + else + { + destination = DestinationPtr(new Content); + setDestination(destination); + } + +} + +void RtfParser::controlWord(const string& cw, int flags, int param) +{ + ASSERT(m_curLevel != NULL); + AnalyserPtr analyser = m_curLevel->getAnalyser(); + if(analyser) + analyser->controlWord(cw, flags, param); +} + +void RtfParser::groupStart() +{ + LevelHandler::groupStart(); + + ASSERT(m_curLevel != NULL); + AnalyserPtr analyser = m_curLevel->getAnalyser(); + if(analyser) + analyser->groupStart(); +} + +void RtfParser::groupEnd() +{ + ASSERT(m_curLevel != NULL); + AnalyserPtr analyser = m_curLevel->getAnalyser(); + if(analyser) + analyser->groupEnd(); + + LevelHandler::groupEnd(); +} + +#define ON_INITIALIZE(cls) \ + void RtfParser::cls::initialize() +#define ON_CONTROLWORD(cls) \ + void RtfParser::cls::controlWord(const string& cw, int flags, int param) +#define ON_CHARDATA(cls) \ + void RtfParser::cls::charData(wstring data) +#define ON_GROUPSTART(cls) \ + void RtfParser::cls::groupStart() +#define ON_GROUPEND(cls) \ + void RtfParser::cls::groupEnd() +#define ON_DONE(cls) \ + void RtfParser::cls::done() +#define AN_ELEMENT(name) \ + m_parser->pushElement(m_parser->createElement(name)) +#define AN_POP_ELEMENT() \ + m_parser->popElement() +#define AN_ATTRIBUTE(name, value) \ + m_parser->setAttribute(name, value) +#define AN_DESTINATION_ATTR(name) \ + m_parser->setDestination(new Attribute(name)) +#define AN_DESTINATION(cls) \ + m_parser->setDestination(new cls) +#define AN_ANALYSER(cls) \ + m_parser->setAnalyser(AnalyserPtr(new cls)) +#define AN_SET_ANALYSER(cls) \ + m_parser->setAnalyser(AnalyserPtr(cls)) +#define HAS_PARAM (flags & kHasParam) +#define DEFAULT_CONTROLWORD processDefault(cw, flags, param) +#define DUMMY 1 == 1 +#define NUM_ATTR(n) m_parser->formatInt(n) + +bool RtfParser::ParseAnalyser::processDefault(const string& cw, int flags, int param) +{ + if(cw == "upr") + { + AnalyserPtr analy = m_parser->getAnalyser(); + ASSERT(analy != NULL); + AN_SET_ANALYSER(new Upr(analy)); + return true; + } + + return false; +} + +void RtfParser::ParseAnalyser::applyParaFormatting(RtfFormatting* format, + DOM::Element& el) +{ + if(format == NULL) + format = &(m_parser->getTextFormatting()); + + wstring fix = kValPara; + + int list = format->paraList(); + if(list != -1) + { + el.setAttribute(kAtList, NUM_ATTR(list)); + } + else + { + el.removeAttribute(kAtList); + } + + if(format->paraInTable()) + el.setAttribute(kAtCell, L"1"); + else + el.removeAttribute(kAtCell); + + int style = format->paraStyle(); + if(style != -1) + el.setAttribute(kElStyle, NUM_ATTR(style)); + else + el.removeAttribute(kElStyle); + + el.setAttribute(kAtFix, fix); +} + +DOM::Element RtfParser::ParseAnalyser::getCurrentBlock() +{ + DOM::Node node = m_parser->getElement(); + + if(node.hasChildNodes()) + node = node.getLastChild(); + + return m_parser->getPriorElement(node, kElBlock); + +} + +bool RtfParser::ParseAnalyser::processTextContent(const string& cw, int flags, int param) +{ + DOM::Element el; + bool process = false; + + RtfFormatting& format = m_parser->getTextFormatting(); + + if(cw == "par") + { + el = getCurrentBlock(); + if(el != NULL) + applyParaFormatting(&format, el); + + el = m_parser->createElement(kElBlock); + applyParaFormatting(&format, el); + } + + else if(cw == "intbl") + format.paraSetTable(true); + + else if(cw == "cell") + { + el = getCurrentBlock(); + if(el != NULL) + applyParaFormatting(&format, el); + + el = m_parser->createElement(kElCell); + m_parser->pushElement(el); + m_parser->popElement(); + el = m_parser->createElement(kElBlock); + applyParaFormatting(&format, el); + } + + else if(cw == "trowd") + el = m_parser->createElement(kElRow); + + else if(cw == "tab") + el = m_parser->createElement(kElTab); + + else if(cw == "sect") + el = m_parser->createElement(kElSect); + + else if(cw == "page") + el = m_parser->createElement(kElPage); + + else if(cw == "s" && HAS_PARAM) + format.paraSetStyle(param); + + else if(cw == "line") + el = m_parser->createElement(kElLine); + + else if(cw == "header") + AN_ANALYSER(Skip); + else if(cw == "footer") + AN_ANALYSER(Skip); + else if(cw == "bkmkstart") + AN_ANALYSER(Skip); + else if(cw == "listtext") + AN_ANALYSER(Skip); + + else if(cw == "ls" && HAS_PARAM) + format.paraSetList(param); + + if(el != NULL) + { + // This ensures that our content destination is open and ready + DestinationPtr dest = m_parser->getDestination(); + ASSERT(dest != NULL); + dest->charData(kValNull); + + m_parser->pushElement(el); + m_parser->popElement(); + } + + return (el != NULL) || process; + + /* TODO: cell, row, intbl, cellx, trowd*/ +} + +bool RtfParser::ParseAnalyser::processTextFormatting(const string& cw, int flags, + int param, RtfFormatting& format) +{ + bool on = true; + if(flags & HAS_PARAM && param == 0) + on = false; + + if(cw == "pard") + { + format.resetPara(); +// applyParaFormatting(); + } + else if(cw == "plain") + format.resetText(); + else if(cw == "b") + format.textSetBold(on); + else if(cw == "i") + format.textSetItalic(on); + else if(cw == "v") + format.textSetHidden(on); + else if(cw == "ul") + format.textSetUnderline(on); + else if(cw == "cf" && HAS_PARAM) + format.textSetColor(param); + else + return false; + + return true; +} + +bool RtfParser::ParseAnalyser::processTextFormatting(const string& cw, int flags, int param) +{ + return processTextFormatting(cw, flags, param, m_parser->getTextFormatting()); +} + + +ON_INITIALIZE(Skip) + { AN_DESTINATION(Null); } +ON_GROUPSTART(Skip) + { AN_ANALYSER(Skip); } + + +RtfParser::Upr::Upr(AnalyserPtr prv) +{ + ASSERT(prv); + prev = prv; +} +ON_GROUPSTART(Upr) + { AN_ANALYSER(Skip); } +ON_GROUPEND(Upr) +{ + ASSERT(prev); + m_parser->setAnalyser(prev); + prev = NULL; +} + + +ON_INITIALIZE(Stylesheet) +{ + AN_ELEMENT(kElStylesheet); +} +ON_GROUPSTART(Stylesheet) +{ + AN_ANALYSER(Style); + AN_DESTINATION(Null); +} + + + +ON_INITIALIZE(Style) +{ + // Were not sure if this element is really something + // so we can't always create + haveStyle = false; +} +ON_CONTROLWORD(Style) +{ + // Get the style id + if(flags & kAsterisk) + { + AN_ANALYSER(Skip); + return; + } + + if(!haveStyle) + { + AN_ELEMENT(kElStyle); + AN_DESTINATION_ATTR(kAtName); + haveStyle = true; + } + + if(cw == "s" && flags & kHasParam) + { + AN_ATTRIBUTE(kAtId, NUM_ATTR(param)); + } + + // Otherwise get as much formatting out of the tag as possible + else if(processTextFormatting(cw, flags, param)) + DUMMY; + + else + DEFAULT_CONTROLWORD; +} +ON_GROUPSTART(Style) +{ + AN_ANALYSER(Skip); +} +ON_GROUPEND(Style) +{ + RtfFormatting& props = m_parser->getTextFormatting(); + if(props.textIsBold()) + AN_ATTRIBUTE(kAtBold, L"1"); + if(props.textIsHidden()) + AN_ATTRIBUTE(kAtHidden, L"1"); + if(props.textIsItalic()) + AN_ATTRIBUTE(kAtItalic, L"1"); + if(props.textIsStrike()) + AN_ATTRIBUTE(kAtStrike, L"1"); + if(props.textIsUnderline()) + AN_ATTRIBUTE(kAtUnderline, L"1"); + if(props.textColor() != -1 && m_parser->getOptions().doColors) + AN_ATTRIBUTE(kAtColor, NUM_ATTR(props.textColor())); +} + + + +ON_INITIALIZE(ListTable) +{ + AN_ELEMENT(kElListtable); +} +ON_GROUPSTART(ListTable) +{ + AN_ANALYSER(List); + AN_DESTINATION(Null); +} + + + +ON_INITIALIZE(List) +{ + AN_ELEMENT(kElListdef); + AN_ATTRIBUTE(kAtType, kValDisc); + AN_ATTRIBUTE(kAtOrdered, L"0"); + levelsSeen = 0; +} +ON_CONTROLWORD(List) +{ + if(cw == "listname") + AN_DESTINATION_ATTR(kAtName); + else if(cw == "listid" && HAS_PARAM) + AN_ATTRIBUTE(kAtId, NUM_ATTR(param)); + + // We let listlevel in here too + else if(cw == "levelstartat" && HAS_PARAM) + AN_ATTRIBUTE(kAtStart, NUM_ATTR(param)); + + else if(cw == "levelnfc" && HAS_PARAM) + { + switch(param) + { + case 0: // 1, 2, 3 + case 5: // 1st, 2nd, 3rd + case 6: // One, Two, Three + case 7: // First, Second, Third + case 22: // 01, 02, 03 + AN_ATTRIBUTE(kAtType, kValArabic); + break; + case 1: // I, II, III + AN_ATTRIBUTE(kAtType, kValUpperRoman); + break; + case 2: // i, ii, iii + AN_ATTRIBUTE(kAtType, kValLowerRoman); + break; + case 3: // A, B, C + AN_ATTRIBUTE(kAtType, kValUpperAlpha); + break; + case 4: // a, b, c + AN_ATTRIBUTE(kAtType, kValLowerAlpha); + break; + default: + AN_ATTRIBUTE(kAtType, kValDisc); + break; + } + + switch(param) + { + case 0: case 5: case 6: case 7: case 22: + case 1: case 2: case 3: case 4: + AN_ATTRIBUTE(kAtOrdered, L"1"); + break; + default: + AN_ATTRIBUTE(kAtOrdered, L"0"); + } + } + + else + DEFAULT_CONTROLWORD; +} +ON_GROUPSTART(List) +{ + if(levelsSeen > 0) + AN_ANALYSER(Skip); + levelsSeen++; +} + + + + +ON_INITIALIZE(ListOverrideTable) +{ + DOM::Document document = m_parser->getDocument(); + lists = document.getElementsByTagName(kElListdef); + curList = NULL; + lsId = -1; +} +ON_GROUPSTART(ListOverrideTable) +{ + AN_DESTINATION(Null); +} +ON_CONTROLWORD(ListOverrideTable) +{ + // New list override clear + if(cw == "listoverride") + curList = NULL; + + // List id for current listoverride + else if(cw == "listid" && HAS_PARAM) + { + wstring id = NUM_ATTR(param); + + if(lists != NULL) + { + for(int i = 0; i < lists->getLength(); i++) + { + DOM::Node node = lists->item(i); + if(node != NULL && node.getNodeType() == DOM::Node::ELEMENT_NODE) + { + DOM::Element element = (DOM::Element&)node; + if(element.getAttribute(kAtId) == id) + { + curList = element; + break; + } + } + } + } + } + + // The actual list code + else if(cw == "ls" && HAS_PARAM) + lsId = param; + + // Override the starting level for the node + else if(cw == "levelstartat" && HAS_PARAM) + { + if(curList != NULL) + curList.setAttribute(kAtStart, NUM_ATTR(param)); + } + + else + DEFAULT_CONTROLWORD; + + + // Okay before any overrides take effect we need to duplicate + // the list node for overriding, using the 'listid' and 'ls' we gathered + if(curList != NULL && lsId != -1) + { + DOM::Element parent = (DOM::Element&)curList.getParentNode(); + if(parent != NULL) + { + curList = (DOM::Element&)curList.cloneNode(true); + if(curList != NULL) + { + parent.appendChild(curList); + curList.setAttribute(kAtList, NUM_ATTR(lsId)); + } + } + + lsId = -1; + } + +} +ON_GROUPEND(ListOverrideTable) +{ + +} + + + + +ON_INITIALIZE(Info) +{ + // Create a new element + AN_ELEMENT(kElInfo); + AN_DESTINATION(Null); +} +ON_CONTROLWORD(Info) +{ + // The title + if(cw == "title") + { + AN_ELEMENT(kElTitle); + AN_DESTINATION(Raw); + } + else if(cw == "author") + { + AN_ELEMENT(kElAuthor); + AN_DESTINATION(Raw); + } + else if(cw == "operator") + { + AN_ELEMENT(kElOperator); + AN_DESTINATION(Raw); + } + else if(flags & kAsterisk) + AN_ANALYSER(Skip); + else + DEFAULT_CONTROLWORD; +} + + + + +ON_INITIALIZE(Root) +{ + +} +ON_CONTROLWORD(Root) +{ + if(cw == "stylesheet") + AN_ANALYSER(Stylesheet); + else if(cw == "listtable") + AN_ANALYSER(ListTable); + else if(cw == "listoverridetable") + AN_ANALYSER(ListOverrideTable); + else if(cw == "info") + AN_ANALYSER(Info); + else if(cw == "fonttbl") + AN_ANALYSER(Skip); + else if(cw == "colortbl") + AN_ANALYSER(Skip); + else if(cw == "pict") + { + AN_ANALYSER(Skip); + AN_DESTINATION(Null); + } + else if(flags & kAsterisk) + AN_ANALYSER(Skip); + else if(processTextContent(cw, flags, param)) + DUMMY; + else if(processTextFormatting(cw, flags, param)) + DUMMY; + else + DEFAULT_CONTROLWORD; +} + + +ON_INITIALIZE(Content) +{ + parent = m_parser->getElement(); + created = false; +} +ON_CHARDATA(Content) +{ + // Create the first time we get content + if(!created) + { + DOM::Element dest = m_parser->createElement(kElDest); + parent.appendChild(dest); + m_parser->replaceElement(dest); + + DOM::Element el = m_parser->createElement(kElBlock); + m_parser->pushElement(el); + m_parser->popElement(); + + created = true; + } + + if(data.length() == 0) + return; + + int elements = 0; + RtfFormatting& format = m_parser->getTextFormatting(); + + // Now do text Properties if necessary + if(format.textIsBold()) + { + AN_ELEMENT(kElB); + elements++; + } + if(format.textIsHidden()) + { + AN_ELEMENT(kElHide); + elements++; + } + if(format.textIsItalic()) + { + AN_ELEMENT(kElI); + elements++; + } + if(format.textIsStrike()) + { + AN_ELEMENT(kElStrike); + elements++; + } + if(format.textIsUnderline()) + { + AN_ELEMENT(kElU); + elements++; + } + if(format.textColor() != -1 && m_parser->getOptions().doColors) + { + AN_ELEMENT(kElColor); + AN_ATTRIBUTE(kAtIndex, NUM_ATTR(format.textColor())); + elements++; + } + + // Write the data to the element + m_parser->getElement().appendChild( + m_parser->getDocument().createTextNode(data)); + + // Now drop out of all the above formatting + while(elements-- > 0) + AN_POP_ELEMENT(); +} + +#if 0 +ON_INITIALIZE(Table) +{ + stack = 0; + level = m_parser->getLevel(); + AN_ELEMENT(kElTable); + AN_DESTINATION(Content); +} + +ON_CONTROLWORD(Table) +{ + ASSERT(stack >= 0); + ASSERT(level != NULL); + + if(cw == "trowd") + { + stack++; + } + else if(cw == "row") + { + stack--; + if(stack <= 0) + m_parser->rewindLevel(level); + } + + else if(processTextContent(cw, flags, param)) + DUMMY; + else if(processTextFormatting(cw, flags, param)) + DUMMY; + else + DEFAULT_CONTROLWORD; + + if(!m_parser->getTextFormatting().paraInTable()) + { + m_parser->rewindLevel(level); + } + +} +#endif + + + + +ON_CHARDATA(Raw) +{ + // Write the data to the element + m_parser->getElement().appendChild( + m_parser->getDocument().createTextNode(data)); +} + + + + +ON_INITIALIZE(Attribute) +{ + element = m_parser->getElement(); + ASSERT(element != NULL); +} + +ON_CHARDATA(Attribute) +{ + // Get the current value + wstring cur = element.getAttribute(name); + + if(data.at(data.size() - 1) == L';') + data.resize(data.size() - 1); + + // Append data + cur.append(data); + + // Write it back + element.setAttribute(name, cur); +} + + + + + +/** + * A quick check to see if a node is an element of a certain + * name + */ +bool RtfParser::isElement(const DOM::Node& node, const string& name) +{ + return node != NULL && node.getNodeType() == DOM::Node::ELEMENT_NODE && + node.getNodeName() == name; +} + +bool RtfParser::isEqualElement(const DOM::Element& el1, const DOM::Element& el2) +{ + if(el1.getNodeName() == el2.getNodeName()) + return false; + + DOM::NamedNodeMap at1 = el1.getAttributes(); + DOM::NamedNodeMap at2 = el2.getAttributes(); + + if(at1 == NULL && at2 == NULL) + return true; + + if(at1 == NULL || at2 == NULL || + at1->getLength() != at2->getLength()) + return false; + + for(int i = 0; i < at1->getLength(); i++) + { + DOM::Attr attr1 = (DOM::Attr&)at1->item(0); + if(attr1 != NULL) + return false; + + DOM::Attr attr2 = (DOM::Attr&)at2->getNamedItem(attr1.getNodeName()); + if(attr2 != NULL) + return false; + + if(attr1.getNodeValue() == attr2.getNodeValue()) + return false; + } + + return true; +} + +wstring RtfParser::formatInt(int num) +{ + wstring n; + n.format(L"%d", num); + return n; +} + +/** + * Gets the pertinent ancestor of this node, or returns null + * if not found. + */ +DOM::Element RtfParser::getContainingElement(const DOM::Node& node, const string& name) +{ + DOM::Node n = node; + + while(true) + { + n = n.getParentNode(); + if(n == NULL) + break; + + if(isElement(n, name)) + return (DOM::Element&)n; + } + + return DOM::Element(); +} + +DOM::Element RtfParser::getPriorElement(const DOM::Node& node, const string& name) +{ + DOM::Node n = node; + + while(n != NULL) + { + if(isElement(n, name)) + return (DOM::Element&)n; + + n = n.getPreviousSibling(); + } + + DOM::Node parent = node.getParentNode(); + + if(parent == NULL) + return DOM::Element(); + else + return getPriorElement(parent, name); +} + +bool isNsAttr(const string& name) +{ + return strncmp(name.c_str(), kNSPrefix, strlen(kNSPrefix)) ? false : true; +} + +void RtfParser::copyAttributes(const DOM::Element& src, DOM::Element& dest, + const char** hideList) +{ + // Now get both sets of attributes + DOM::NamedNodeMap srcMap = src.getAttributes(); + DOM::NamedNodeMap destMap = dest.getAttributes(); + + if(srcMap == NULL || destMap == NULL) + return; + + // And copy them from one to the other + for(int j = 0; j < srcMap->getLength(); j++) + { + DOM::Node attr = srcMap->item(j); + if(attr != NULL) + { + // BUG: Sablotron seems to have a bug in it's + // setAttributeNode implementation. It always + // adds a blank namespace + // attr = attr.cloneNode(false); + // if(attr != NULL) + // destMap.setNamedItem(attr); + + string name = attr.getNodeName(); + + if(hideList) + { + + for(const char** t = hideList; *t != NULL; t++) + { + if(name == *t) + name.clear(); + } + } + + if(name.length() > 0 && !isNsAttr(name)) + dest.setAttribute(attr.getNodeName(), attr.getNodeValue()); + } + } +} + + +void RtfParser::breakBreak(DOM::Document& doc, const string& contain, + const string& tag) +{ + DOM::NodeList els = doc.getElementsByTagName(tag); + if(els != NULL) + { + for(int i = 0; i < els->getLength(); i++) + { + DOM::Element el = (DOM::Element&)els->item(i); +#if 0 + // See if parent node only has this break tag + // in it. If so then replace parent with this + + DOM::Node parent = el.getParentNode(); + + if(parent != NULL) + { + DOM::Node grandparent = parent.getParentNode(); + + if(grandparent != NULL && + el.getPreviousSibling() == NULL && + el.getNextSibling() == NULL) + { + grandparent.replaceChild(parent.removeChild(el), parent); + } + } +#endif + + breakElement(el, contain); + } + } +} + +/** + * Breaks a paragraph up through a previous level. Calls itself + * recursively to break paragraphs totally free up to containing + * destination. + * + * For example: + * + * + * This is a + * test of your concentration. + * + * + * Becomes: + * + * + * This is a + * test of your concentration. + * + */ +bool RtfParser::breakElement(DOM::Element& el, const string& contain) +{ + ASSERT(el != NULL); + + DOM::Element parent = (DOM::Element&)el.getParentNode(); + DOM::Element grandparent; + + string s = el.getNodeName(); + s = parent.getNodeName(); + + // Get the parent node + if(parent != NULL) + grandparent = (DOM::Element&)parent.getParentNode(); + + // Make sure we have something to work with before continuing + if(grandparent == NULL || parent == NULL || + isElement(parent, contain)) + return true; + + DOM::Node e; + + // Check to see if this is the first node in the parent. + // If so then just move out to before + if(el.getPreviousSibling() == NULL) + { + e = grandparent.insertBefore(parent.removeChild(el), parent); + } + + + // Check to see if this is the last node in the parent. + // If so then just move out to after the parent + else if(el.getNextSibling() == NULL) + { + DOM::Node next = parent.getNextSibling(); + if(next == NULL) + e = grandparent.appendChild(parent.removeChild(el)); + else + e = grandparent.insertBefore(parent.removeChild(el), next); + } + + + // Otherwise it's in the middle so split the parent + // element etc... + else + { + // Clone it but not deep + DOM::Element parent2 = (DOM::Element&)parent.cloneNode(false); + + if(parent2 == NULL) + return false; + + // Flag that tells us whether we moved anything up to parent + bool moved = false; + + // Now move all nodes after this one to the second parent. + while((e = el.getNextSibling()) != NULL) + { + parent2.appendChild(parent.removeChild(e)); + moved = true; + } + + // Remove the element from it's parent + e = parent.removeChild(el); + + // Okay now we move the paragraph up to the parent + insertAfter(grandparent, e, parent); + if(moved) + insertAfter(grandparent, parent2, e); + } + + // Now call it again with the paragraph in the new position + // untill everything's cut through! + return breakElement((DOM::Element&)e, contain); +} + +/** + * Changes from a marker based paragraph system to a contained + * paragraph system. Also applies paragraph attributes to the + * appropriate paragraph. + * + * For example: + * + * + * This is a + * test of your concentration. + * + * + * Becomes: + * + * This is a + * test of your concentration. + */ +void RtfParser::breakBlocks(DOM::Document& document) +{ + // First break out all the paragraphs to the destination level + DOM::NodeList blocks = document.getElementsByTagName(kElBlock); + if(blocks != NULL) + { + for(int i = 0; i < blocks->getLength(); i++) + { + DOM::Element block = (DOM::Element&)blocks->item(i); + + // If it's the single closed style para then break it + if(block != NULL && !block.hasChildNodes()) + breakElement(block, kElDest); + } + } + + + // Now group stuff in destinations into paras or other blocks + DOM::NodeList destinations = document.getElementsByTagName(kElDest); + if(destinations != NULL) + { + for(int i = 0; i < destinations->getLength(); i++) + { + DOM::Element dest = (DOM::Element&)destinations->item(i); + + // Sanity Check + if(dest == NULL || !dest.hasChildNodes()) + continue; + + // Go through the children of this destination + DOM::Node child = dest.getFirstChild(); + + DOM::Element block; + + while(child != NULL) + { + // If it's a block + if(isElement(child, kElBlock)) + { + block = (DOM::Element&)child; + child = child.getNextSibling(); + continue; + } + + // If it's already a real block element + for(const char** t = kBlockTags; *t != NULL; t++) + { + if(isElement(child, *t)) + { + block = NULL; + break; + } + } + + // If there's a block then add to it + if(block != NULL) + { + block.appendChild(dest.removeChild(child)); + child = block; + } + + child = child.getNextSibling(); + } + } + } +} + +void RtfParser::wrapTags(DOM::Document& doc, const string& tagName, + const string& wrapName) +{ + DOM::NodeList tags = doc.getElementsByTagName(tagName); + if(tags != NULL) + { + for(int i = 0; i < tags->getLength(); i++) + { + DOM::Element tag = (DOM::Element&)tags->item(i); + + DOM::Element wrap = doc.createElement(wrapName); + while(tag.hasChildNodes()) + wrap.appendChild(tag.removeChild(tag.getFirstChild())); + + tag.appendChild(wrap); + } + } +} + +void RtfParser::breakTags(DOM::Document& doc, const string& parentName, + const string& tagName) +{ + DOM::NodeList parents = doc.getElementsByTagName(parentName); + if(parents != NULL) + { + for(int i = 0; i < parents->getLength(); i++) + { + DOM::Element parent = (DOM::Element&)parents->item(i); + + if(!parent.hasChildNodes()) + continue; + + DOM::NodeList tags = parent.getElementsByTagName(tagName); + if(tags != NULL) + { + for(int i = 0; i < tags->getLength(); i++) + breakElement((DOM::Element&)tags->item(i), parentName); + } + + DOM::Node tag = doc.createElement(tagName); + parent.insertBefore(tag, parent.getFirstChild()); + + DOM::Node child = tag; + + while(child != NULL && (child = child.getNextSibling()) != NULL) + { + if(isElement(child, kElBlock)) + { + DOM::Node next = child.getNextSibling(); + if(next == NULL) + { + parent.removeChild(child); + continue; + } + + if(isElement(next, tagName)) + { + DOM::Node twodown = next.getNextSibling(); + if(!isElement(twodown, kElBlock)) + { + child = parent.insertBefore(parent.removeChild(next), child); + } + else + { + parent.removeChild(child); + child = next; + } + } + } + + if(isElement(child, tagName)) + { + if(!tag.hasChildNodes()) + parent.removeChild(tag); + tag = child; + } + else + { + tag.appendChild(parent.removeChild(child)); + child = tag; + } + } + + if(!tag.hasChildNodes()) + parent.removeChild(tag); + } + } + + DOM::NodeList tags = doc.getElementsByTagName(tagName); + if(tags != NULL) + { + for(int i = 0; i < tags->getLength(); i++) + { + DOM::Element tag = (DOM::Element&)tags->item(i); + DOM::Node parent = tag.getParentNode(); + + if(parent != NULL && !isElement(parent, parentName)) + parent.removeChild(tag); + +#if 0 + else if(tag.hasChildNodes()) + { + DOM::NodeList children = tag.getChildNodes(); + if(children != NULL && children->getLength() == 1) + { + DOM::Node child = children->item(0); + if(child != NULL && !child.hasChildNodes() && + isElement(child, kElBlock)) + parent.removeChild(tag); + } + } +#endif + + } + } +} + +void RtfParser::breakLists(DOM::Document& doc) +{ + // Now group stuff in destinations into tables + DOM::NodeList destinations = doc.getElementsByTagName(kElDest); + if(destinations != NULL) + { + for(int i = 0; i < destinations->getLength(); i++) + { + DOM::Element dest = (DOM::Element&)destinations->item(i); + + // Sanity Check + if(dest == NULL) + continue; + + // Go through the children of this destination + DOM::Node child = dest.getFirstChild(); + + DOM::Element list; + DOM::Element e; + + wstring previd; + + while(child != NULL) + { + // If it's a block and has a cell attribute + if(isElement(child, kElBlock)) + { + e = (DOM::Element&)child; + + // if it has a cell attribute + wstring listid = e.getAttribute(kAtList); + if(listid.length() > 0) + { + e.removeAttribute(kAtList); + + if(list == NULL || previd != listid) + { + list = doc.createElement(kElList); + list.setAttribute(kAtList, listid); + dest.insertBefore(list, child); + previd = listid; + } + } + else + { + list = NULL; + previd.clear(); + } + } + + // It's not a block + if(list != NULL) + { + list.appendChild(dest.removeChild(child)); + child = list; + } + + child = child.getNextSibling(); + } + } + } +} + +void RtfParser::fixStyles(const DOM::Document doc) +{ + DOM::NodeList styles = doc.getElementsByTagName(kElStyle); + if(styles != NULL) + { + DOM::NodeList blocks = doc.getElementsByTagName(kElBlock); + if(blocks != NULL) + { + for(int i = 0; i < blocks->getLength(); i++) + { + DOM::Element block = (DOM::Element&)blocks->item(i); + + if(block == NULL || !block.hasAttribute(kElStyle)) + continue; + + for(int j = 0; j < styles->getLength(); j++) + { + DOM::Element style = (DOM::Element&)styles->item(j); + if(style != NULL) + { + if(style.getAttribute(kAtId) == block.getAttribute(kElStyle)) + { + wstring name = style.getAttribute(kAtName); + if(name.length() > 0) + block.setAttribute(kElStyle, name); + } + } + } + } + } + + for(int i = 0; i < styles->getLength(); i++) + { + DOM::Element style = (DOM::Element&)styles->item(i); + if(style != NULL) + style.removeAttribute(kAtId); + } + } + + +} + + +void RtfParser::breakTables(DOM::Document& doc) +{ + DOM::NodeList rows = doc.getElementsByTagName(kElRow); + if(rows != NULL) + { + for(int i = 0; i < rows->getLength(); i++) + { + DOM::Element row = (DOM::Element&)rows->item(i); + DOM::Node parent = row.getParentNode(); + + if(parent == NULL) + continue; + + if(isElement(parent, kElBlock)) + { + DOM::Node grandparent = parent.getParentNode(); + + if(grandparent != NULL && !row.hasChildNodes()) + { + if(row.getPreviousSibling() == NULL) + grandparent.insertBefore(parent.removeChild(row), parent); + else if(row.getNextSibling() == NULL) + insertAfter(grandparent, parent.removeChild(row), parent); + } + } + + breakElement(row, kElDest); + } + } + + + + // Now group stuff in destinations into tables + DOM::NodeList destinations = doc.getElementsByTagName(kElDest); + if(destinations != NULL) + { + for(int i = 0; i < destinations->getLength(); i++) + { + DOM::Element dest = (DOM::Element&)destinations->item(i); + + // Sanity Check + if(dest == NULL) + continue; + + // Go through the children of this destination + DOM::Node child = dest.getFirstChild(); + + DOM::Element table; + DOM::Element e; + + while(child != NULL) + { + // If it's a block and has a cell attribute + if(isElement(child, kElBlock)) + { + e = (DOM::Element&)child; + + // if it has a cell attribute + if(e.getAttribute(kAtCell).length() > 0) + { + e.removeAttribute(kAtCell); + + if(table == NULL) + { + table = doc.createElement(kElTable); + dest.insertBefore(table, child); + } + } + else + { + table = NULL; + } + } + + // It's not a block + if(table != NULL) + { + table.appendChild(dest.removeChild(child)); + child = table; + } + + child = child.getNextSibling(); + } + } + } +} + +void RtfParser::insertAfter(DOM::Node& parent, const DOM::Node& node, + const DOM::Node& ref) +{ + DOM::Node sibling = ref.getNextSibling(); + if(sibling == NULL) + parent.appendChild(node); + else + parent.insertBefore(node, sibling); +} + +void RtfParser::removeTags(const DOM::Document& doc) +{ + // Go through the list of nodes + for(const char** t = kRemoveTags; *t != NULL; t++) + { + DOM::NodeList elements = doc.getElementsByTagName(*t); + if(elements != NULL) + { + for(int j = 0; j < elements->getLength(); j++) + { + DOM::Element el = (DOM::Element&)elements->item(j); + DOM::Node parent = el->getParentNode(); + + if(parent == NULL) + continue; + + while(el.hasChildNodes()) + parent.insertBefore(el.removeChild(el.getFirstChild()), el); + + parent.removeChild(el); + } + } + } +} + +void RtfParser::fixLists(const DOM::Document doc) +{ + DOM::NodeList lists = doc.getElementsByTagName(kElList); + if(lists != NULL) + { + DOM::NodeList listdefs = doc.getElementsByTagName(kElListdef); + if(listdefs != NULL) + { + for(int i = 0; i < listdefs->getLength(); i++) + { + DOM::Element listdef = (DOM::Element&)listdefs->item(i); + + if(listdef == NULL || !listdef.hasAttribute(kAtList)) + continue; + + for(int j = 0; j < lists->getLength(); j++) + { + DOM::Element list = (DOM::Element&)lists->item(j); + if(list != NULL) + { + if(list.getAttribute(kAtList) == listdef.getAttribute(kAtList)) + { + copyAttributes(listdef, list, kHideList); + list.removeAttribute(kAtList); + } + } + } + } + } + } +} + +void RtfParser::fixBlocks(const DOM::Document doc) +{ + // First break out all the paragraphs to the destination level + DOM::NodeList blocks = doc.getElementsByTagName(kElBlock); + if(blocks != NULL) + { + string fix; + wstring val; + + for(int i = 0; i < blocks->getLength(); i++) + { + DOM::Element block = (DOM::Element&)blocks->item(i); + DOM::Node parent = block.getParentNode(); + + if(parent == NULL) + continue; + + fix.resize(0); + val.resize(0); + + val = block.getAttribute(kAtFix); + if(val.length() > 0) + block.removeAttribute(kAtFix); + + + if(val.length() > 0) + { + val = block.getAttributeNS("", kAtFix); + if(val.length() > 0) + block.removeAttributeNS("", kAtFix); + } + + if(val.length() > 0) + DOM::transcode16to8(val, fix); + + if(fix.length() == 0) + fix = kElPara; + + DOM::Element el = doc.createElement(fix); + copyAttributes(block, el, NULL); + + while(block.hasChildNodes()) + el.appendChild(block.removeChild(block.getFirstChild())); + + parent.replaceChild(el, block); + } + } +} + + +/** + * Removes adjacent duplicate nodes of certain names + */ +void RtfParser::removeDuplicates(const DOM::Document& doc) +{ + // Go through the list of nodes + for(const char** t = kNoDuplicates; *t = NULL; t++) + { + DOM::NodeList elements = doc.getElementsByTagName(*t); + if(elements != NULL) + { + int x = elements->getLength(); + for(int j = 0; j < elements->getLength(); j++) + { + + // Make sure it's a valid element + DOM::Element element = (DOM::Element&)elements->item(j); + if(element == NULL) + continue; + + // Get neighbors + DOM::Node previous = element.getPreviousSibling(); + DOM::Node next = element.getNextSibling(); + + // Make sure it's still in the document, as we may have + // removed it on a previous loop + DOM::Node parent = element.getParentNode(); + if(parent == NULL) + continue; + + // Combine previous if valid + if(previous != NULL && previous.getNodeType() == DOM::Node::ELEMENT_NODE && + isEqualElement((DOM::Element&)previous, element)) + { + while(previous.hasChildNodes()) + { + DOM::Node child = previous.removeChild(previous.getLastChild()); + if(child != NULL) + { + if(element.hasChildNodes()) + element.insertBefore(child, element.getFirstChild()); + else + element.appendChild(child); + } + } + + // Remove duplicate node + parent.removeChild(previous); + } + + // Combine next if valid + if(next != NULL && next.getNodeType() == DOM::Node::ELEMENT_NODE && + isEqualElement((DOM::Element&)next, element)) + { + while(next.hasChildNodes()) + { + DOM::Node child = next.removeChild(next.getFirstChild()); + if(child != NULL) + element.appendChild(child); + } + + // Remove duplicate node + parent.removeChild(next); + } + } + } + } +} \ No newline at end of file diff --git a/src/xmlcomposer.h b/src/xmlcomposer.h new file mode 100644 index 0000000..9463318 --- /dev/null +++ b/src/xmlcomposer.h @@ -0,0 +1,228 @@ +// RtfParser.h: interface for the RtfParser class. +// +////////////////////////////////////////////////////////////////////// + +#if !defined(AFX_RTFANALYSER_H__0243FE38_142A_4BDD_BA2C_1D122965444B__INCLUDED_) +#define AFX_RTFANALYSER_H__0243FE38_142A_4BDD_BA2C_1D122965444B__INCLUDED_ + +#if _MSC_VER > 1000 +#pragma once +#endif // _MSC_VER > 1000 + +#include "LevelHandler.h" + +struct RtfParserOptions +{ + RtfParserOptions() + { memset(this, 0, sizeof(*this)); } + + bool doColors; +}; + +class RtfParser : public LevelHandler +{ +public: + RtfParser(const RtfParserOptions& options); + virtual ~RtfParser(); + + virtual void startDocument(RtfReader* reader); + virtual void endDocument(); + virtual void controlWord(const string& cw, int flags, int param); + virtual void groupStart(); + virtual void groupEnd(); + virtual void charData(wstring data); + + + // Element management functions + DOM::Element createElement(const string& name); + void pushElement(const DOM::Element& element); + void replaceElement(const DOM::Element& element); + DOM::Element popElement(); + void setAttribute(const string& name, const wstring& value, DOM::Element el = DOM::Element()); + + // Changing the current parser functions + void setAnalyser(AnalyserPtr analy); + void setDestination(DestinationPtr dest); + DestinationPtr replaceDestination(DestinationPtr dest); + + // Current status functions + RtfFormatting& getTextFormatting(); + AnalyserPtr getAnalyser(); + DestinationPtr getDestination(); + DOM::Document getDocument() + { return m_document; } + + static bool isElement(const DOM::Node& node, const string& name); + static bool isEqualElement(const DOM::Element& el1, const DOM::Element& el2); + static void copyAttributes(const DOM::Element& src, DOM::Element& dest, const char** hideList); + static void insertAfter(DOM::Node& parent, const DOM::Node& node, const DOM::Node& ref); + static DOM::Element getContainingElement(const DOM::Node& node, const string& name); + static DOM::Element getPriorElement(const DOM::Node& node, const string& name); + + static wstring formatInt(int num); + + const RtfParserOptions& getOptions() + { return m_options; } + +protected: + // Cleanup Functions + void fixBlocks(DOM::Document doc); + void fixLists(const DOM::Document doc); + void fixStyles(const DOM::Document doc); + bool breakElement(DOM::Element& el, const string& contain); + + void breakBreak(DOM::Document& doc, const string& contain, + const string& tag); + void breakLists(DOM::Document& document); + void breakTables(DOM::Document& document); + void breakTags(DOM::Document& doc, const string& parentName, + const string& tagName); + void breakBlocks(DOM::Document& document); + void wrapTags(DOM::Document& document, const string& tagName, + const string& wrapName); + + void removeTags(const DOM::Document& doc); + void removeDuplicates(const DOM::Document& doc); + + virtual void clear(); + + +// Data +protected: + DOM::DOMImplementation m_impl; + DOM::Document m_document; + RtfParserOptions m_options; + + +// Sub classes +protected: + #define DESTINATION(cls) class cls : public Destination { public: + #define END_DESTINATION }; + #define ANALYSER(cls) class cls : public ParseAnalyser { public: + #define END_ANALYSER }; + #define DATA_PORTION protected: + #define INITIALIZE virtual void initialize(); + #define CHARDATA virtual void charData(wstring data); + #define CONTROLWORD virtual void controlWord(const string& cw, int flags, int param); + #define GROUPSTART virtual void groupStart(); + #define GROUPEND virtual void groupEnd(); + #define DONE virtual void done(); + + DESTINATION(Content) + INITIALIZE + CHARDATA + DATA_PORTION + bool created; + DOM::Element parent; + END_DESTINATION + + + DESTINATION(Null) + END_DESTINATION + + DESTINATION(Raw) + CHARDATA + END_DESTINATION + + DESTINATION(Attribute) + Attribute(const string& nm) : name(nm) {} + INITIALIZE + CHARDATA + DATA_PORTION + string name; + DOM::Element element; + END_DESTINATION + + class ParseAnalyser : + public Analyser + { + public: + virtual void controlWord(const string& cw, int flags, int param) + { processDefault(cw, flags, param); } + + protected: + // Some helper functions + bool processDefault(const string& cw, int flags, int param); + bool processTextFormatting(const string& cw, int flags, int param, RtfFormatting& format); + bool processTextContent(const string& cw, int flags, int param); + bool processTextFormatting(const string& cw, int flags, int param); + + DOM::Element getCurrentBlock(); + void applyParaFormatting(RtfFormatting* format, DOM::Element& el); + }; + + + ANALYSER(Skip) + INITIALIZE + GROUPSTART + END_ANALYSER + +#if 0 + ANALYSER(Table) + INITIALIZE + CONTROLWORD + DATA_PORTION + int stack; + LevelPtr level; + END_ANALYSER +#endif + + ANALYSER(Upr) + Upr(AnalyserPtr prv); + GROUPSTART + GROUPEND + DATA_PORTION + AnalyserPtr prev; + END_ANALYSER + + ANALYSER(Stylesheet) + INITIALIZE + GROUPSTART + END_ANALYSER + + ANALYSER(Style) + INITIALIZE + CONTROLWORD + GROUPSTART + GROUPEND + DATA_PORTION + bool haveStyle; + END_ANALYSER + + ANALYSER(ListTable) + INITIALIZE + GROUPSTART + END_ANALYSER + + ANALYSER(List) + INITIALIZE + CONTROLWORD + GROUPSTART + DATA_PORTION + int levelsSeen; + END_ANALYSER + + ANALYSER(ListOverrideTable) + INITIALIZE + CONTROLWORD + GROUPSTART + GROUPEND + DATA_PORTION + DOM::NodeList lists; + int lsId; + DOM::Element curList; + END_ANALYSER + + ANALYSER(Info) + INITIALIZE + CONTROLWORD + END_ANALYSER + + ANALYSER(Root) + INITIALIZE + CONTROLWORD + END_ANALYSER + +}; + +#endif // !defined(AFX_RTFANALYSER_H__0243FE38_142A_4BDD_BA2C_1D122965444B__INCLUDED_) -- cgit v1.2.3