From ae28df20927567f2d62b575ed4aef2d127569265 Mon Sep 17 00:00:00 2001 From: Stef Date: Thu, 22 Jul 2004 22:30:48 +0000 Subject: - Comments and formatting changes. --- src/basehandler.cpp | 4 +- src/basehandler.h | 2 + src/domhelpers.cpp | 167 +++++----- src/domhelpers.h | 18 +- src/levelhandler.cpp | 26 +- src/levelhandler.h | 29 +- src/reference.h | 14 +- src/rtfformatting.h | 8 +- src/rtfparser.cpp | 294 +++++++++-------- src/rtfparser.h | 97 ++++-- src/rtfx.cpp | 20 +- src/xmlcomposehelpers.h | 73 +++- src/xmlcomposer.cpp | 475 +++++++++++++++----------- src/xmlcomposer.h | 99 ++++-- src/xmlfixups.cpp | 859 ++++++++++++++++++++++-------------------------- src/xmlfixups.h | 80 ++++- 16 files changed, 1298 insertions(+), 967 deletions(-) diff --git a/src/basehandler.cpp b/src/basehandler.cpp index 4820051..53e5a18 100644 --- a/src/basehandler.cpp +++ b/src/basehandler.cpp @@ -1,4 +1,4 @@ -/* +XXXXXXXXXXXXXXXXXXXXXXXXXXXx/* * Copyright (c) 2004, Nate Nielsen * All rights reserved. * @@ -36,6 +36,8 @@ * */ +// DELETE + #include "usuals.h" #include "basehandler.h" diff --git a/src/basehandler.h b/src/basehandler.h index 55c6e24..3becdf1 100644 --- a/src/basehandler.h +++ b/src/basehandler.h @@ -36,6 +36,8 @@ * */ +// DELETE + // BaseHandler // Implements an RtfHandler for other classes (LevelHandler // and RtfParser) to override. diff --git a/src/domhelpers.cpp b/src/domhelpers.cpp index 7b06f55..6cf8052 100644 --- a/src/domhelpers.cpp +++ b/src/domhelpers.cpp @@ -40,142 +40,139 @@ #include "domhelpers.h" #include "tags.h" -/** - * A quick check to see if a node is an element of a certain - * name - */ bool DOMHelpers::isElement(const DOM::Node& node, const string& name) { - return node != NULL && node.getNodeType() == DOM::Node::ELEMENT_NODE && - node.getNodeName() == name; + return node != NULL && node.getNodeType() == DOM::Node::ELEMENT_NODE && + node.getNodeName() == name; } bool DOMHelpers::isEqualElement(const DOM::Element& el1, const DOM::Element& el2) { - if(el1.getNodeName() == el2.getNodeName()) - return false; + if(el1.getNodeName() == el2.getNodeName()) + return false; - DOM::NamedNodeMap at1 = el1.getAttributes(); - DOM::NamedNodeMap at2 = el2.getAttributes(); + // Compare attributes + DOM::NamedNodeMap at1 = el1.getAttributes(); + DOM::NamedNodeMap at2 = el2.getAttributes(); - if(at1 == NULL && at2 == NULL) - return true; + if(at1 == NULL && at2 == NULL) + return true; - if(at1 == NULL || at2 == NULL || - at1->getLength() != at2->getLength()) - return false; + if(at1 == NULL || at2 == NULL || + at1->getLength() != at2->getLength()) + return false; - for(int i = 0; i < at1->getLength(); i++) - { - DOM::Attr attr1 = (const DOM::Attr&)at1->item(0); - if(attr1 != NULL) - return false; + for(int i = 0; i < at1->getLength(); i++) + { + DOM::Attr attr1 = (const DOM::Attr&)at1->item(0); + if(attr1 != NULL) + return false; - DOM::Attr attr2 = (const DOM::Attr&)at2->getNamedItem(attr1.getNodeName()); + DOM::Attr attr2 = (const DOM::Attr&)at2->getNamedItem(attr1.getNodeName()); if(attr2 != NULL) - return false; + return false; - if(attr1.getNodeValue() == attr2.getNodeValue()) - return false; - } + if(attr1.getNodeValue() == attr2.getNodeValue()) + return false; + } - return true; + return true; } -/** - * Gets the pertinent ancestor of this node, or returns null - * if not found. - */ DOM::Element DOMHelpers::getContainingElement(const DOM::Node& node, const string& name) { DOM::Node n = node; - while(true) + while(true) { - n = n.getParentNode(); - if(n == NULL) - break; + n = n.getParentNode(); + if(n == NULL) + break; + // Match parent to given name if(isElement(n, name)) - return (DOM::Element&)n; - } + return (DOM::Element&)n; + } - return DOM::Element(); + return DOM::Element(); } bool isNsAttr(const string& name) { - return strncmp(name.c_str(), kNSPrefix, strlen(kNSPrefix)) ? false : true; + // Check if this attribute is a xmlns: attribute + return strncmp(name.c_str(), kNSPrefix, strlen(kNSPrefix)) ? false : true; } void DOMHelpers::copyAttributes(const DOM::Element& src, DOM::Element& dest, - const char** hideList) + const char** hideList) { - // Now get both sets of attributes - DOM::NamedNodeMap srcMap = src.getAttributes(); + // Get both sets of attributes + DOM::NamedNodeMap srcMap = src.getAttributes(); DOM::NamedNodeMap destMap = dest.getAttributes(); - if(srcMap == NULL || destMap == NULL) - return; + if(srcMap == NULL || destMap == NULL) + return; - // And copy them from one to the other + // And copy them from one to the other for(int j = 0; j < srcMap->getLength(); j++) - { - DOM::Node attr = srcMap->item(j); - if(attr != NULL) - { - // BUG: Sablotron seems to have a bug in it's - // setAttributeNode implementation. It always - // adds a blank namespace - // attr = attr.cloneNode(false); - // if(attr != NULL) - // destMap.setNamedItem(attr); - - string name = attr.getNodeName(); - - if(hideList) - { - - for(const char** t = hideList; *t != NULL; t++) + { + DOM::Node attr = srcMap->item(j); + if(attr != NULL) + { + string name = attr.getNodeName(); + + if(hideList) + { + for(const char** t = hideList; *t != NULL; t++) { - if(name == *t) - name.erase(); - } - } - - if(name.length() > 0 && !isNsAttr(name)) - dest.setAttribute(attr.getNodeName(), attr.getNodeValue()); + if(name == *t) + name.erase(); + } + } + + // BUG: Sablotron seems to have a bug in it's + // setAttributeNode implementation. It always + // adds a blank namespace + // + // attr = attr.cloneNode(false); + // if(attr != NULL) + // destMap.setNamedItem(attr); + + // We never copy xmlns: attributes + if(name.length() > 0 && !isNsAttr(name)) + dest.setAttribute(attr.getNodeName(), attr.getNodeValue()); } - } + } } DOM::Element DOMHelpers::getPriorElement(const DOM::Node& node, const string& name) { - DOM::Node n = node; + DOM::Node n = node; - while(n != NULL) - { - if(isElement(n, name)) - return (DOM::Element&)n; + while(n != NULL) + { + // Note that we return ourselves if it matches + if(isElement(n, name)) + return (DOM::Element&)n; - n = n.getPreviousSibling(); + n = n.getPreviousSibling(); } - DOM::Node parent = node.getParentNode(); + DOM::Node parent = node.getParentNode(); - if(parent == NULL) - return DOM::Element(); - else - return getPriorElement(parent, name); + if(parent == NULL) + return DOM::Element(); + else + return getPriorElement(parent, name); } void DOMHelpers::insertAfter(DOM::Node& parent, const DOM::Node& node, - const DOM::Node& ref) + const DOM::Node& ref) { DOM::Node sibling = ref.getNextSibling(); - if(sibling == NULL) + + if(sibling == NULL) parent.appendChild(node); - else - parent.insertBefore(node, sibling); + else + parent.insertBefore(node, sibling); } - diff --git a/src/domhelpers.h b/src/domhelpers.h index 16afd79..043ffd4 100644 --- a/src/domhelpers.h +++ b/src/domhelpers.h @@ -41,16 +41,32 @@ #include "sablo.h" +/* + * DOMHelpers + * + * A collection of functions for doing some things with an XML DOM. + * Used mainly by XMLComposer. + */ class DOMHelpers { public: - // DOM Helper Functions + // Check if given node is an element with a certain name static bool isElement(const DOM::Node& node, const string& name); + + // Check if two elements have the same name and attributes static bool isEqualElement(const DOM::Element& el1, const DOM::Element& el2); + + // Copy attributes from one element to another optionaly ignoring some static void copyAttributes(const DOM::Element& src, DOM::Element& dest, const char** hideList); + + // Insert a child node after a given reference node static void insertAfter(DOM::Node& parent, const DOM::Node& node, const DOM::Node& ref); + + // Get containing element of a given name static DOM::Element getContainingElement(const DOM::Node& node, const string& name); + + // Get previous element (in XML flow) of a given name static DOM::Element getPriorElement(const DOM::Node& node, const string& name); }; diff --git a/src/levelhandler.cpp b/src/levelhandler.cpp index 7fc2dd6..cdd3337 100644 --- a/src/levelhandler.cpp +++ b/src/levelhandler.cpp @@ -39,9 +39,12 @@ #include "usuals.h" #include "levelhandler.h" +/* ---------------------------------------------------------------------------------- + * CONSTRUCTION + */ LevelHandler::LevelHandler() { - + m_reader = NULL; } LevelHandler::~LevelHandler() @@ -54,12 +57,18 @@ void LevelHandler::clear() m_curLevel.release(); m_topLevel.release(); - BaseHandler::clear(); + m_parser = NULL; } -void LevelHandler::startDocument(RtfReader* reader) +/* ---------------------------------------------------------------------------------- + * OVERRIDES + */ + +void LevelHandler::startDocument(RtfParser* parser) { - BaseHandler::startDocument(reader); + clear(); + + m_parser = parser; m_topLevel = new Level; m_curLevel = m_topLevel; @@ -67,12 +76,11 @@ void LevelHandler::startDocument(RtfReader* reader) void LevelHandler::endDocument() { - BaseHandler::endDocument(); + } void LevelHandler::groupStart() { - BaseHandler::groupStart(); ASSERT(m_curLevel); pushLevel(); } @@ -81,9 +89,12 @@ void LevelHandler::groupEnd() { ASSERT(m_curLevel); popLevel(); - BaseHandler::groupEnd(); } +/* ---------------------------------------------------------------------------------- + * OPERATIONS + */ + DOM::Element LevelHandler::getElement() { ASSERT(m_curLevel); @@ -101,7 +112,6 @@ void LevelHandler::popLevel() // Pull a level off the stack LevelPtr level = m_curLevel->getPrevious(); - // TODO: report errors here if(level) m_curLevel = level; } diff --git a/src/levelhandler.h b/src/levelhandler.h index 3077c13..bee1a8e 100644 --- a/src/levelhandler.h +++ b/src/levelhandler.h @@ -44,36 +44,49 @@ #define __LEVELHANDLER_H__ #include "rtfreader.h" -#include "basehandler.h" -#include "rtfparsehelpers.h" #include "reference.h" +#include "rtfparsehelpers.h" +/* + * LevelHandler + * + * A base class that manages a set of Levels (see XMLComposeHelpers.cpp) + * based on the RTF groups seen. + */ class LevelHandler - : public BaseHandler + : public RTFHandler { public: LevelHandler(); virtual ~LevelHandler(); - virtual void startDocument(RtfReader* reader); + // Overrides + virtual void startDocument(RtfParser* parser); virtual void endDocument(); virtual void groupStart(); virtual void groupEnd(); + // Convenience function to get XML element from current level virtual DOM::Element getElement(); + // Create a new level on top of stack void pushLevel(); + + // Pop top level and discard void popLevel(); + + // Back out all the way past a given level void rewindLevel(LevelPtr ptr); - LevelPtr getLevel(); + // Get the current level + LevelPtr getLevel(); protected: virtual void clear(); - LevelPtr m_topLevel; - LevelPtr m_curLevel; + LevelPtr m_topLevel; // First level + LevelPtr m_curLevel; // The current level + RtfParser* m_parser; // The parser we're listening to }; - #endif // __LEVELHANDLER_H__ diff --git a/src/reference.h b/src/reference.h index 1e68515..1a78d4c 100644 --- a/src/reference.h +++ b/src/reference.h @@ -39,6 +39,11 @@ #ifndef __REFERENCE_H__ #define __REFERENCE_H__ +/* + * Reference + * + * A basic reference counting pointer + */ template class Reference { @@ -60,9 +65,7 @@ public: } ~Reference() - { - release(); - } + { release(); } Reference(const Reference& orig) { @@ -126,6 +129,11 @@ private: C* m_ptr; }; +/* + * Instance + * + * A basic reference counted object. + */ class Instance { public: diff --git a/src/rtfformatting.h b/src/rtfformatting.h index 6fbcf57..bb49cf1 100644 --- a/src/rtfformatting.h +++ b/src/rtfformatting.h @@ -39,6 +39,13 @@ #ifndef __RTFTEXPROPERTIES_H__ #define __RTFTEXPROPERTIES_H__ +/* + * RtfFormatting + * + * For keeping track of all the various transient formatting options + * within a given Rtf group. Any supported text options (not block) + * should be added here. + */ class RtfFormatting { public: @@ -161,7 +168,6 @@ protected: int m_list; bool m_inTbl; - // TODO: Character styles }; diff --git a/src/rtfparser.cpp b/src/rtfparser.cpp index 6d07c80..78945b6 100644 --- a/src/rtfparser.cpp +++ b/src/rtfparser.cpp @@ -36,6 +36,8 @@ * */ +// RENAME RTFParser.cpp + #include "usuals.h" #include @@ -43,11 +45,11 @@ #include "rtfreader.h" -const int RtfHandler::kAsterisk = 0x00000001; -const int RtfHandler::kHasParam = 0x00000002; -const int RtfHandler::kIsEncoded = 0x00000004; +/* ---------------------------------------------------------------------------------- + * CONSTRUCTION + */ -RtfReader::RtfReader() +RTFParser::RTFParser() { m_handler = NULL; m_depth = 0; @@ -57,12 +59,17 @@ RtfReader::RtfReader() m_uniEatStack.push(0); } -RtfReader::~RtfReader() +RTFParser::~RTFParser() { } -bool RtfReader::parse(string fileName) + +/* ---------------------------------------------------------------------------------- + * PUBLIC METHODS + */ + +bool RTFParser::parse(string fileName) { FILE* file = fopen(fileName.c_str(), "r"); if(!file) @@ -75,26 +82,134 @@ bool RtfReader::parse(string fileName) return ret; } -void RtfReader::emptyData(RtfContext& cx) +bool RTFParser::parse(FILE* file) +{ + int ch = 0; + + // The group depth + m_depth = 0; + m_parseErrors = ""; + + RtfContext cx; + cx.isData = false; + cx.file = file; + cx.data = L""; + + if(m_handler) + m_handler->startDocument(this); + + while(1) + { + ch = fgetc(file); + if(ch == EOF) + goto done; + + // TODO: Do we need this ? + if(!cx.isData) + { + switch(ch) + { + + // Starting a control word + case '\\': + if(!parseControlWord(cx)) + goto done; + break; + + // Starting an RTF group + case '{': + { + // Send all previous data + flushData(cx); + + // Handle any unicode destinations properly + m_uniEatStack.push(m_uniEatStack.top()); + + if(m_handler) + m_handler->groupStart(); + + m_depth++; + } + break; + + case '}': + { + // Send all previous data + flushData(cx); + + if(m_handler) + m_handler->groupEnd(); + + // Handle any unicode destinations properly + if(!m_uniEatStack.empty()) + m_uniEatStack.pop(); + + m_depth--; + } + break; + + default: + cx.isData = true; + break; + } + } + + if(cx.isData) + { + // We translate tabs into the appropriate control word + if(ch == '\t') + sendControlWord(cx, "tab", 0, -1); + + // line endings aren't used + else if(!strchr("\r\n", ch)) + sendData(cx, ch); + + cx.isData = false; + } + } + + +done: + + if(m_depth != 0) + m_parseErrors.append("unmatched braces\n"); + + if(m_handler) + m_handler->endDocument(); + + // If any parse errors return failure + return m_parseErrors.empty(); +} + + +/* ---------------------------------------------------------------------------------- + * HANDLER CALLS + */ + +void RTFParser::flushData(RtfContext& cx) { if(!cx.data.empty()) { if(m_handler) m_handler->charData(cx.data); + cx.data.resize(0); } } -void RtfReader::sendData(RtfContext& cx, wchar_t ch) +void RTFParser::sendData(RtfContext& cx, wchar_t ch) { + // Skip unicode chars we've been asked to if(m_uniEat > 0) m_uniEat--; + else cx.data.append(1, ch); } -void RtfReader::sendData(RtfContext& cx, wstring data) +void RTFParser::sendData(RtfContext& cx, wstring data) { + // Skip any unicode chars we've been asked to if(m_uniEat > 0) { int len = data.size(); @@ -110,16 +225,24 @@ void RtfReader::sendData(RtfContext& cx, wstring data) } } -void RtfReader::sendControlWord(RtfContext& cx, string cw, int flags, int param) +void RTFParser::sendControlWord(RtfContext& cx, string cw, int flags, int param) { - emptyData(cx); + flushData(cx); + if(m_handler) m_handler->controlWord(cw, flags, param); } -bool RtfReader::parseHexChar(RtfContext& cx, int num) + +/* ---------------------------------------------------------------------------------- + * PARSE HELPERS + */ + +bool RTFParser::parseHexChar(RtfContext& cx, int num) { string data; + + // Get num chars and put them in the string for(int i = 0; i < num; i++) { char ch = fgetc(cx.file); @@ -139,6 +262,7 @@ bool RtfReader::parseHexChar(RtfContext& cx, int num) } } + // If parsing hex, then convert to appropriate unicode if(m_parseHex) { char* end = NULL; @@ -148,6 +272,9 @@ bool RtfReader::parseHexChar(RtfContext& cx, int num) else m_parseErrors.append("invalid hex char: " + data + "\n"); } + + // TODO: Why would we ever want to do this? + // Otherwise just send as a hex control word else { sendControlWord(cx, data, RtfHandler::kIsEncoded, -1); @@ -156,7 +283,7 @@ bool RtfReader::parseHexChar(RtfContext& cx, int num) return true; } -bool RtfReader::parseControlWord(RtfContext& cx) +bool RTFParser::parseControlWord(RtfContext& cx) { bool isAsterisk = false; string controlword; @@ -171,7 +298,7 @@ bool RtfReader::parseControlWord(RtfContext& cx) bool empty = controlword.empty(); // Part of the name of a control word - // NOTE: Although the RTF specification prohibits upercase + // NOTE: Although the RTF specification prohibits uppercase // control words, MS Word uses them :-/ if(ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z') controlword.append(1, (char)ch); @@ -211,16 +338,16 @@ bool RtfReader::parseControlWord(RtfContext& cx) break; } - // Escaped braces - else if(empty && ch == '{') - { - sendData(cx, L'{'); - } + // Escaped braces + else if(empty && ch == '{') + { + sendData(cx, L'{'); + } - else if(empty && ch == '}') - { - sendData(cx, L'}'); - } + else if(empty && ch == '}') + { + sendData(cx, L'}'); + } // Non breaking space else if(empty && ch == '~') @@ -242,18 +369,7 @@ bool RtfReader::parseControlWord(RtfContext& cx) param.append(1, (char)ch); } - // TODO: This looks real hokey and acts that - // way too -#if 0 - // An enter as the first character of a control word - // makes a paragraph - else if(strchr("\n\r", ch)) - { - controlword = "par"; - break; - } -#endif - // Space end a rtf code (but get eaten) + // Space at end a rtf code (it gets eaten) else if(strchr(" ", ch)) break; @@ -331,111 +447,3 @@ bool RtfReader::parseControlWord(RtfContext& cx) return true; } -bool RtfReader::parse(FILE* file) -{ - m_depth = 0; - m_parseErrors = ""; - - int ch = 0; - - RtfContext cx; - cx.isData = false; - cx.file = file; - cx.data = L""; - - if(m_handler) - m_handler->startDocument(this); - - while(1) - { - ch = fgetc(file); - if(ch == EOF) - goto done; - - // Type is undetermined so we figure it out here - if(!cx.isData) - { - switch(ch) - { - case '\\': - if(!parseControlWord(cx)) - goto done; - break; - - case '{': - { - emptyData(cx); - - m_uniEatStack.push(m_uniEatStack.top()); - - if(m_handler) - m_handler->groupStart(); - - m_depth++; - } - break; - - case '}': - { - emptyData(cx); - - if(m_handler) - m_handler->groupEnd(); - - if(!m_uniEatStack.empty()) - m_uniEatStack.pop(); - - m_depth--; - } - break; - - default: - cx.isData = true; - break; - } - } - - if(cx.isData) - { - // We translate tabs into the appropriate control - // word - if(ch == '\t') - sendControlWord(cx, "tab", 0, -1); - -// Don't need this code, the XML outputter -// Takes care of it for us -#if 0 - if(ch == '&') - sendData(cx, L"&"); - - else if(ch == '\'') - sendData(cx, L"'"); - - else if(ch == '"') - sendData(cx, L"""); - - else if(ch == '<') - sendData(cx, L"<"); - - else if(ch == '>') - sendData(cx, L">"); -#endif - - // enters a - else if(!strchr("\r\n", ch)) - sendData(cx, ch); - - cx.isData = false; - } - } - -done: - - if(m_depth != 0) - m_parseErrors.append("unmatched braces\n"); - - if(m_handler) - m_handler->endDocument(); - - return m_parseErrors.empty(); -} diff --git a/src/rtfparser.h b/src/rtfparser.h index bfa2e59..6b9e10d 100644 --- a/src/rtfparser.h +++ b/src/rtfparser.h @@ -36,6 +36,8 @@ * */ +// RENAME RTFParser.h + #ifndef __RTFREADER_H__ #define __RTFREADER_H__ @@ -43,29 +45,22 @@ #include #include -class RtfReader; +class RtfHandler; -class RtfHandler -{ -public: - virtual void startDocument(RtfReader* reader) = 0; - virtual void endDocument() = 0; - virtual void controlWord(const string& cw, int flags, int param) = 0; - virtual void groupStart() = 0; - virtual void groupEnd() = 0; - virtual void charData(wstring data) = 0; - - static const int kAsterisk; - static const int kHasParam; - static const int kIsEncoded; -}; - -class RtfReader +/* + * RTFParser + * + * A class that parses the RTF into it's tags and groups etc... It feeds its + * parsed data into into a handler interface (see below) for processing. + * + * Performs some basic conversion and sanity checking (unicode chars etc...) + * as well. + */ +class RtfParser { public: - RtfReader(); - virtual ~RtfReader(); - + RtfParser(); + virtual ~RtfParser(); bool parse(string fileName); bool parse(FILE* file); @@ -81,12 +76,13 @@ public: void setUnicode(bool unicode); protected: - RtfHandler* m_handler; - int m_depth; - bool m_parseHex; - string m_parseErrors; + RtfHandler* m_handler; // The current handler + int m_depth; // To keep track of group depth + bool m_parseHex; // Whether to parse hex chars or not + string m_parseErrors; // A list of all the RTF parsing errors - // Unicode handling + // TODO: Look at exactly what this is doing + // Unicode char handling bool m_parseUnicode; typedef std::stack StackInt; StackInt m_uniEatStack; @@ -94,19 +90,62 @@ protected: private: + // TODO: Why aren't these just members? + struct RtfContext { - FILE* file; - bool isData; - wstring data; + FILE* file; // The current file being parsed + wstring data; // Any data stored up ready to be sent to handler + bool isData; // TODO: Do we need this? }; + // Parse helpers bool parseControlWord(RtfContext& cx); bool parseHexChar(RtfContext& cx, int num); + + // Convenience functions for calling the handler void sendControlWord(RtfContext& cx, string cw, int flags, int param); void sendData(RtfContext& cx, wchar_t ch); void sendData(RtfContext& cx, wstring data); - void emptyData(RtfContext& cx); + void flushData(RtfContext& cx); +}; + +/* + * RTFHandler + * + * An interface called by RTFParser with tags and groups etc... parsed from + * an RTF file. + */ +class RtfHandler +{ +public: + + // Called at the beginning of the document + virtual void startDocument(RtfReader* reader) = 0; + + // Called at the end of the document + virtual void endDocument() = 0; + + // Called when an RTF control word is hit. Flags below. + // If control word has no param then param is -1 + virtual void controlWord(const string& cw, int flags, int param) = 0; + + // Called when an RTF group opened + virtual void groupStart() = 0; + + // Called when an RTF group is closed + virtual void groupEnd() = 0; + + // A block of character data encountered + virtual void charData(wstring data) = 0; + + // Flags for controlWord + enum + { + kAsterisk = 0x00000001, + kHasParam = 0x00000002, + kIsEncoded = 0x00000004 + }; }; #endif // __RTFREADER_H__ diff --git a/src/rtfx.cpp b/src/rtfx.cpp index 7576d51..a620498 100644 --- a/src/rtfx.cpp +++ b/src/rtfx.cpp @@ -57,19 +57,25 @@ int main(int argc, char* argv[]) try { + // The input file FILE* file = fopen(argv[1], "rb"); if(!file) { - fprintf(stderr, "rtfx: couldn't open file: %s: %s\n", argv[1], strerror(errno)); + fprintf(stderr, "rtfx: couldn't open rtf file: %s: %s\n", argv[1], strerror(errno)); return 1; } + // Default options RtfParserOptions options; - RtfParser handler(options); - RtfReader rtf; - rtf.setHandler(&handler); + // Reads RTF tags and blocks + RtfParser rtf; + + // Interprets tags and blocks from RTFParser + XMLComposer composer(options); + rtf.setHandler(&composer); + bool ret = rtf.parse(file); fclose(file); @@ -79,8 +85,9 @@ int main(int argc, char* argv[]) return 1; } - - DOM::Document doc = handler.getDocument(); + // TODO: This is disgusting. We need to bug the sablotron guys + // for a better way to serialize a document. + DOM::Document doc = composer.getDocument(); string xml = doc.serialize(); FILE* out = fopen(argv[2], "wb"); @@ -93,7 +100,6 @@ int main(int argc, char* argv[]) fwrite(xml.c_str(), 1, xml.length(), out); fclose(out); return 0; - } catch(DOM::DOMException& e) { diff --git a/src/xmlcomposehelpers.h b/src/xmlcomposehelpers.h index f91923e..9d36ef5 100644 --- a/src/xmlcomposehelpers.h +++ b/src/xmlcomposehelpers.h @@ -36,6 +36,9 @@ * */ +// RENAME: XMLComposeHelpers.h +// Possibly merge with XMLComposer.h + #ifndef __RTFPARSEHELPERS_H__ #define __RTFPARSEHELPERS_H__ @@ -44,8 +47,15 @@ #include "sablo.h" #include "rtfformatting.h" -class RtfParser; +class XMLComposer; +/* + * Destination + * + * A destination is a small class that handles the character data found + * in the RTF document. Depending on the current context in the RTF + * different destinations are used. + */ class Destination : public Instance { @@ -58,11 +68,17 @@ public: virtual void done() {}; protected: - RtfParser* m_parser; - friend class RtfParser; + XMLComposer* m_composer; + friend class XMLComposer; }; - +/* + * Analyser + * + * An analyser is a small class that handles the RTF control words. + * Depending on the current context in the RTF different analysers + * are used. + */ class Analyser : public Instance { @@ -79,16 +95,40 @@ public: virtual void done() {}; protected: - RtfParser* m_parser; - friend class RtfParser; + XMLComposer* m_composer; + friend class XMLComposer; }; class Level; +// Reference counted pointers typedef Reference DestinationPtr; typedef Reference AnalyserPtr; typedef Reference LevelPtr; +/* + * Level + * + * A level is a combination of a Destination, Analyser, XML Element and + * some other options. They're used in a stack to push and pop these as + * RTF groups are found. + * + * About the stack: + * Not each level has it's own options. If a certain option isn't found + * in the current level the previous one is looked up. That's what all + * the 'deep' stuff is about below: + * + * get* methods: + * When 'deep' is set look to previous levels for the given object if not + * found at the current level. When not set returns object in current level + * or null when none exists here. + * + * set* methods: + * When 'deep' is set then replace the object currently being used at it's + * own level. So if get* would return an object from a previous level, with + * deep set to true it would replace that object in the given level. When + * not set, then the object is set in the current level. + */ class Level : public Instance { @@ -99,24 +139,33 @@ public: LevelPtr getPrevious(); LevelPtr pushLevel(); + // The current XML Element + // TODO: Add deep semantics here DOM::Element getElement(); void setElement(DOM::Element element, bool deep = false); + + // The current Analyser AnalyserPtr getAnalyser(bool deep = true); void setAnalyser(AnalyserPtr analyser, bool deep = false); + + // The current Destination DestinationPtr getDestination(bool deep = true); void setDestination(DestinationPtr destination, bool deep = false); + + // The current formatting options RtfFormatting& getFormatting(); void setTextProperties(RtfFormatting& textProperties); protected: + + // Constructor for stacking levels Level(const Level& level); - LevelPtr m_previous; - DOM::Element m_element; - RtfFormatting m_text; - DestinationPtr m_destination; - AnalyserPtr m_analyser; + LevelPtr m_previous; // The previous level + DOM::Element m_element; // XML Element for this level + RtfFormatting m_text; // Formatting options for this level + DestinationPtr m_destination; // Destination for this level + AnalyserPtr m_analyser; // Analyser for this level }; - #endif //__RTFPARSEHELPERS_H__ diff --git a/src/xmlcomposer.cpp b/src/xmlcomposer.cpp index 7e74f70..6072375 100644 --- a/src/xmlcomposer.cpp +++ b/src/xmlcomposer.cpp @@ -42,20 +42,21 @@ #include "domhelpers.h" #include "tags.h" -////////////////////////////////////////////////////////////////////// -// Construction/Destruction -////////////////////////////////////////////////////////////////////// +/* ---------------------------------------------------------------------------------- + * CONSTRUCTION + */ -RtfParser::RtfParser(const RtfParserOptions& options) +XmlComposer::XmlComposer(const RtfParserOptions& options) { m_document = NULL; memcpy(&m_options, &options, sizeof(options)); + // All autocounters start at 1 for(int i = 0; i < AUTOCOUNT_MAX; i++) m_autocount[i] = 1; } -RtfParser::~RtfParser() +XmlComposer::~XmlComposer() { clear(); @@ -63,7 +64,7 @@ RtfParser::~RtfParser() m_impl.release(); } -void RtfParser::clear() +void XmlComposer::clear() { if(m_document != NULL) { @@ -78,9 +79,15 @@ void RtfParser::clear() LevelHandler::clear(); } -void RtfParser::startDocument(RtfReader* reader) + +/* ---------------------------------------------------------------------------------- + * HANDLER OVERRIDES + */ + +void XmlComposer::startDocument(RtfReader* reader) { LevelHandler::startDocument(reader); + ASSERT(m_curLevel != NULL); // Create a new document m_document = m_impl.createDocument("", kElDoc, DOM::DocumentType()); @@ -88,7 +95,7 @@ void RtfParser::startDocument(RtfReader* reader) // TODO: Throw error if document is null ASSERT(m_document != NULL); - ASSERT(m_curLevel != NULL); + // Hook up the top level element m_curLevel->setElement(m_document.getDocumentElement(), true); // Set the attributes on the top level @@ -98,59 +105,106 @@ void RtfParser::startDocument(RtfReader* reader) getTextFormatting().resetText(); } -void RtfParser::endDocument() +void XmlComposer::endDocument() { LevelHandler::endDocument(); - // Cleanup the tree + // Pass 0: Cleanup the tree RtfFixups::removeDuplicates(m_document); RtfFixups::consolidateStartTags(m_document); RtfFixups::consolidateEndTags(m_document); + + // Pass 1: Block breakout RtfFixups::breakTables(m_document); RtfFixups::breakTags(m_document, kElTable, kElRow); RtfFixups::breakTags(m_document, kElRow, kElCell); RtfFixups::wrapTags(m_document, kElCell, kElDest); RtfFixups::breakBlocks(m_document); RtfFixups::breakLists(m_document); + + // Pass 2: Fixups RtfFixups::fixLists(m_document); RtfFixups::fixStyles(m_document); RtfFixups::fixBlocks(m_document); RtfFixups::removeTags(m_document); RtfFixups::breakBreak(m_document, kElDoc, kElPage); RtfFixups::breakBreak(m_document, kElDoc, kElSect); + + // Pass 3: Final cleanup RtfFixups::removeDuplicates(m_document); return; } +void XmlComposer::charData(wstring data) +{ + ASSERT(m_curLevel != NULL); + DestinationPtr destination = m_curLevel->getDestination(); + if(destination) + { + destination->charData(data); + } + else + { + // TODO: Change this so it sends char data to new destination + // We should always have a destination + destination = DestinationPtr(new Content); + setDestination(destination); + } +} +void XmlComposer::controlWord(const string& cw, int flags, int param) +{ + ASSERT(m_curLevel != NULL); + AnalyserPtr analyser = m_curLevel->getAnalyser(); + if(analyser) + analyser->controlWord(cw, flags, param); +} +void XmlComposer::groupStart() +{ + LevelHandler::groupStart(); + ASSERT(m_curLevel != NULL); + AnalyserPtr analyser = m_curLevel->getAnalyser(); + if(analyser) + analyser->groupStart(); +} -// ----------------------------------------------------------------------- -// Helper functions +void XmlComposer::groupEnd() +{ + LevelHandler::groupEnd(); + + ASSERT(m_curLevel != NULL); + AnalyserPtr analyser = m_curLevel->getAnalyser(); + if(analyser) + analyser->groupEnd(); +} + + +/* ---------------------------------------------------------------------------------- + * HELPER FUNCTIONS + */ -DOM::Element RtfParser::createElement(const string& name) +DOM::Element XmlComposer::createElement(const string& name) { ASSERT(name.length() > 0); return m_document.createElement(name); - - // TODO: Throw exception here if necessary } -void RtfParser::replaceElement(const DOM::Element& element) +void XmlComposer::replaceElement(const DOM::Element& element) { ASSERT(m_curLevel != NULL); m_curLevel->setElement(element, true); } -void RtfParser::pushElement(const DOM::Element& element) +void XmlComposer::pushElement(const DOM::Element& element) { ASSERT(m_curLevel != NULL); getElement().appendChild(element); m_curLevel->setElement(element); } -DOM::Element RtfParser::popElement() +DOM::Element XmlComposer::popElement() { DOM::Element element = getElement(); ASSERT(m_curLevel != NULL); @@ -163,7 +217,7 @@ DOM::Element RtfParser::popElement() return element; } -void RtfParser::setAttribute(const string& name, const wstring& value, DOM::Element el) +void XmlComposer::setAttribute(const string& name, const wstring& value, DOM::Element el) { ASSERT(name.length() > 0); if(el == NULL) @@ -171,7 +225,7 @@ void RtfParser::setAttribute(const string& name, const wstring& value, DOM::Elem el.setAttribute(name, value); } -void RtfParser::setAttribute(const string& name, int value, DOM::Element el) +void XmlComposer::setAttribute(const string& name, int value, DOM::Element el) { ASSERT(name.length() > 0); if(el == NULL) @@ -179,153 +233,116 @@ void RtfParser::setAttribute(const string& name, int value, DOM::Element el) el.setAttribute(name, formatInt(value)); } -void RtfParser::setDestination(DestinationPtr dest) +void XmlComposer::setDestination(DestinationPtr dest) { ASSERT(m_curLevel); m_curLevel->setDestination(dest); - dest->m_parser = this; + dest->m_composer = this; dest->initialize(); } -DestinationPtr RtfParser::replaceDestination(DestinationPtr dest) +DestinationPtr XmlComposer::replaceDestination(DestinationPtr dest) { ASSERT(m_curLevel); DestinationPtr old = m_curLevel->getDestination(); m_curLevel->setDestination(dest, true); - dest->m_parser = this; + dest->m_composer = this; dest->initialize(); return old; } -void RtfParser::setAnalyser(AnalyserPtr analy) +void XmlComposer::setAnalyser(AnalyserPtr analy) { ASSERT(m_curLevel); ASSERT(analy != NULL); - analy->m_parser = this; + analy->m_composer = this; m_curLevel->setAnalyser(analy); analy->initialize(); } -AnalyserPtr RtfParser::getAnalyser() +AnalyserPtr XmlComposer::getAnalyser() { ASSERT(m_curLevel); return m_curLevel->getAnalyser(); } -DestinationPtr RtfParser::getDestination() +DestinationPtr XmlComposer::getDestination() { ASSERT(m_curLevel); return m_curLevel->getDestination(); } -RtfFormatting& RtfParser::getTextFormatting() +RtfFormatting& XmlComposer::getTextFormatting() { ASSERT(m_curLevel); return m_curLevel->getFormatting(); } -int RtfParser::getAutoCount(int type) +int XmlComposer::getAutoCount(int type) { ASSERT(type < AUTOCOUNT_MAX); return m_autocount[type]; } -void RtfParser::incrementAutoCount(int type) +void XmlComposer::incrementAutoCount(int type) { ASSERT(type < AUTOCOUNT_MAX); m_autocount[type]++; } -// --------------------------------------------------------------------------------- -// Pass this stuff on through to the appropriate analysers etc... - -void RtfParser::charData(wstring data) +wstring XmlComposer::formatInt(int num) { - ASSERT(m_curLevel != NULL); - DestinationPtr destination = m_curLevel->getDestination(); - if(destination) - { - destination->charData(data); - } - else - { - destination = DestinationPtr(new Content); - setDestination(destination); - } - -} + char buff[16]; -void RtfParser::controlWord(const string& cw, int flags, int param) -{ - ASSERT(m_curLevel != NULL); - AnalyserPtr analyser = m_curLevel->getAnalyser(); - if(analyser) - analyser->controlWord(cw, flags, param); -} + // Certain OSs don't support swprintf :( + sprintf(buff, "%d", num); -void RtfParser::groupStart() -{ - LevelHandler::groupStart(); + wstring n; + for(char* s = buff; *s; s++) + n.append(1, *s); - ASSERT(m_curLevel != NULL); - AnalyserPtr analyser = m_curLevel->getAnalyser(); - if(analyser) - analyser->groupStart(); + return n; } -void RtfParser::groupEnd() -{ - ASSERT(m_curLevel != NULL); - bool done = true; - LevelHandler::groupEnd(); - - AnalyserPtr analyser = m_curLevel->getAnalyser(); - if(analyser) - analyser->groupEnd(); -} +/* ---------------------------------------------------------------------------------- + * CONVENIENCE MACROS USED BELOW + */ -#define ON_INITIALIZE(cls) \ - void RtfParser::cls::initialize() -#define ON_CONTROLWORD(cls) \ - void RtfParser::cls::controlWord(const string& cw, int flags, int param) -#define ON_CHARDATA(cls) \ - void RtfParser::cls::charData(wstring data) -#define ON_GROUPSTART(cls) \ - void RtfParser::cls::groupStart() -#define ON_GROUPEND(cls) \ - void RtfParser::cls::groupEnd() -#define ON_DONE(cls) \ - void RtfParser::cls::done() #define AN_ELEMENT(name) \ - m_parser->pushElement(m_parser->createElement(name)) + m_composer->pushElement(m_composer->createElement(name)) #define AN_POP_ELEMENT() \ - m_parser->popElement() + m_composer->popElement() #define AN_ATTRIBUTE(name, value) \ - m_parser->setAttribute(name, value) + m_composer->setAttribute(name, value) #define AN_DESTINATION_ATTR(name) \ - m_parser->setDestination(new Attribute(name)) + m_composer->setDestination(new Attribute(name)) #define AN_DESTINATION(cls) \ - m_parser->setDestination(new cls) + m_composer->setDestination(new cls) #define AN_ANALYSER(cls) \ - m_parser->setAnalyser(AnalyserPtr(new cls)) + m_composer->setAnalyser(AnalyserPtr(new cls)) #define AN_SET_ANALYSER(cls) \ - m_parser->setAnalyser(AnalyserPtr(cls)) + m_composer->setAnalyser(AnalyserPtr(cls)) #define HAS_PARAM (flags & kHasParam) #define DEFAULT_CONTROLWORD processDefault(cw, flags, param) -#define DUMMY 1 == 1 -#define NUM_ATTR(n) m_parser->formatInt(n) +#define DUMMY 1 == 1 + + +/* ---------------------------------------------------------------------------------- + * BASE ANALYSER + */ -bool RtfParser::ParseAnalyser::processDefault(const string& cw, int flags, int param) +bool XmlComposer::BaseAnalyser::processDefault(const string& cw, int flags, int param) { + // Unicode blocks go to a special analyser if(cw == "upr") { - AnalyserPtr analy = m_parser->getAnalyser(); + AnalyserPtr analy = m_composer->getAnalyser(); ASSERT(analy != NULL); AN_SET_ANALYSER(new Upr(analy)); return true; @@ -334,41 +351,41 @@ bool RtfParser::ParseAnalyser::processDefault(const string& cw, int flags, int p return false; } -void RtfParser::ParseAnalyser::applyParaFormatting(RtfFormatting* format, - DOM::Element& el) +void XmlComposer::BaseAnalyser::applyParaFormatting(RtfFormatting* format, + DOM::Element& el) { if(format == NULL) - format = &(m_parser->getTextFormatting()); + format = &(m_composer->getTextFormatting()); wstring fix = kValPara; + // Is it a list? int list = format->paraList(); if(list != -1) - { - el.setAttribute(kAtList, NUM_ATTR(list)); - } + el.setAttribute(kAtList, list); else - { el.removeAttribute(kAtList); - } + // Is it a cell? if(format->paraInTable()) el.setAttribute(kAtCell, L"1"); else el.removeAttribute(kAtCell); + // Paragraph styles int style = format->paraStyle(); if(style != -1) - el.setAttribute(kElStyle, NUM_ATTR(style)); + el.setAttribute(kElStyle, style); else el.removeAttribute(kElStyle); + // These fix elements are later picked up in XmlFixups::fixBlocks el.setAttribute(kAtFix, fix); } -DOM::Element RtfParser::ParseAnalyser::getCurrentBlock() +DOM::Element XmlComposer::BaseAnalyser::getCurrentBlock() { - DOM::Node node = m_parser->getElement(); + DOM::Node node = m_composer->getElement(); if(node.hasChildNodes()) node = node.getLastChild(); @@ -377,97 +394,115 @@ DOM::Element RtfParser::ParseAnalyser::getCurrentBlock() } -bool RtfParser::ParseAnalyser::processTextContent(const string& cw, int flags, int param) +bool XmlComposer::BaseAnalyser::processTextContent(const string& cw, int flags, int param) { DOM::Element el; bool process = false; - RtfFormatting& format = m_parser->getTextFormatting(); + RtfFormatting& format = m_composer->getTextFormatting(); + // New paragraph if(cw == "par") { el = getCurrentBlock(); if(el != NULL) applyParaFormatting(&format, el); - el = m_parser->createElement(kElBlock); + el = m_composer->createElement(kElBlock); applyParaFormatting(&format, el); } + // Cells (used later in applyParaFormatting) else if(cw == "intbl") format.paraSetTable(true); + // Start of a cell else if(cw == "cell") { el = getCurrentBlock(); if(el != NULL) applyParaFormatting(&format, el); - el = m_parser->createElement(kElCell); - m_parser->pushElement(el); - m_parser->popElement(); - el = m_parser->createElement(kElBlock); + el = m_composer->createElement(kElCell); + m_composer->pushElement(el); + m_composer->popElement(); + el = m_composer->createElement(kElBlock); applyParaFormatting(&format, el); } + // Start of a row else if(cw == "trowd") - el = m_parser->createElement(kElRow); + el = m_composer->createElement(kElRow); + // A tab else if(cw == "tab") - el = m_parser->createElement(kElTab); + el = m_composer->createElement(kElTab); + // A section break else if(cw == "sect") - el = m_parser->createElement(kElSect); + el = m_composer->createElement(kElSect); + // A page break else if(cw == "page") - el = m_parser->createElement(kElPage); + el = m_composer->createElement(kElPage); + // A paragraph style else if(cw == "s" && HAS_PARAM) format.paraSetStyle(param); + // A line break else if(cw == "line") - el = m_parser->createElement(kElLine); + el = m_composer->createElement(kElLine); + // A page header (not implemented) else if(cw == "header") AN_ANALYSER(Skip); + + // A page footer (not implemented) else if(cw == "footer") AN_ANALYSER(Skip); + + // A bookmark (not implemented) else if(cw == "bkmkstart") AN_ANALYSER(Skip); + + // List text (not implemented) else if(cw == "listtext") AN_ANALYSER(Skip); + // Set list style (used in applyFormatting) else if(cw == "ls" && HAS_PARAM) format.paraSetList(param); if(el != NULL) { // This ensures that our content destination is open and ready - DestinationPtr dest = m_parser->getDestination(); + DestinationPtr dest = m_composer->getDestination(); ASSERT(dest != NULL); dest->charData(kValNull); - m_parser->pushElement(el); - m_parser->popElement(); + m_composer->pushElement(el); + m_composer->popElement(); } return (el != NULL) || process; - - /* TODO: cell, row, intbl, cellx, trowd*/ } -bool RtfParser::ParseAnalyser::processTextFormatting(const string& cw, int flags, +bool XmlComposer::BaseAnalyser::processTextFormatting(const string& cw, int flags, int param, RtfFormatting& format) { bool on = true; if(flags & HAS_PARAM && param == 0) on = false; + // Clears all paragraph formatting if(cw == "pard") { format.resetPara(); -// applyParaFormatting(); + // applyParaFormatting(); } + + // Rest are pretty much self-explanatory else if(cw == "plain") format.resetText(); else if(cw == "b") @@ -490,21 +525,22 @@ bool RtfParser::ParseAnalyser::processTextFormatting(const string& cw, int flags return true; } -bool RtfParser::ParseAnalyser::processTextFormatting(const string& cw, int flags, int param) +bool XmlComposer::BaseAnalyser::processTextFormatting(const string& cw, int flags, int param) { - return processTextFormatting(cw, flags, param, m_parser->getTextFormatting()); + return processTextFormatting(cw, flags, param, m_composer->getTextFormatting()); } -bool RtfParser::ParseAnalyser::processTextAutoContent(const string& cw, int flags, int param) +bool XmlComposer::BaseAnalyser::processTextAutoContent(const string& cw, int flags, int param) { - DestinationPtr dest = m_parser->getDestination(); + DestinationPtr dest = m_composer->getDestination(); ASSERT(dest != NULL); dest->charData(kValNull); // Auto generated content if(cw == "chftn") { - int ac = m_parser->getAutoCount(AUTOCOUNT_FOOTNOTE); + // Footnote auto numbering + int ac = m_composer->getAutoCount(AUTOCOUNT_FOOTNOTE); AN_ELEMENT(kElRef); AN_ATTRIBUTE(kAtType, kValFootNote); @@ -517,38 +553,72 @@ bool RtfParser::ParseAnalyser::processTextAutoContent(const string& cw, int flag return false; } +/* ---------------------------------------------------------------------------------- + * ANALYSER/DESTINATION DEFINITIONS + */ + +#define ON_INITIALIZE(cls) \ + void XmlComposer::cls::initialize() +#define ON_CONTROLWORD(cls) \ + void XmlComposer::cls::controlWord(const string& cw, int flags, int param) +#define ON_CHARDATA(cls) \ + void XmlComposer::cls::charData(wstring data) +#define ON_GROUPSTART(cls) \ + void XmlComposer::cls::groupStart() +#define ON_GROUPEND(cls) \ + void XmlComposer::cls::groupEnd() +#define ON_DONE(cls) \ + void XmlComposer::cls::done() + + +// Skip Analyser -------------------------------------------------------------------- + ON_INITIALIZE(Skip) { AN_DESTINATION(Null); } + ON_GROUPSTART(Skip) { AN_ANALYSER(Skip); } -RtfParser::Upr::Upr(AnalyserPtr prv) +// Upr Analyser --------------------------------------------------------------------- + +XmlComposer::Upr::Upr(AnalyserPtr prv) { ASSERT(prv); prev = prv; } + ON_GROUPSTART(Upr) - { AN_ANALYSER(Skip); } +{ + AN_ANALYSER(Skip); +} + ON_GROUPEND(Upr) { ASSERT(prev); - m_parser->setAnalyser(prev); + m_composer->setAnalyser(prev); prev = NULL; } +// Stylesheet Analyser -------------------------------------------------------------- + ON_INITIALIZE(Stylesheet) { AN_ELEMENT(kElStylesheet); } + ON_GROUPSTART(Stylesheet) { + // Each group should be a style AN_ANALYSER(Style); + + // Without any character data AN_DESTINATION(Null); } +// Stylesheet Style Analyser -------------------------------------------------------- ON_INITIALIZE(Style) { @@ -556,6 +626,7 @@ ON_INITIALIZE(Style) // so we can't always create haveStyle = false; } + ON_CONTROLWORD(Style) { // Get the style id @@ -565,6 +636,7 @@ ON_CONTROLWORD(Style) return; } + // Create the style tag if necessary if(!haveStyle) { AN_ELEMENT(kElStyle); @@ -572,9 +644,10 @@ ON_CONTROLWORD(Style) haveStyle = true; } + // The style id if(cw == "s" && flags & kHasParam) { - AN_ATTRIBUTE(kAtId, NUM_ATTR(param)); + AN_ATTRIBUTE(kAtId, param); } // Otherwise get as much formatting out of the tag as possible @@ -584,13 +657,17 @@ ON_CONTROLWORD(Style) else DEFAULT_CONTROLWORD; } + ON_GROUPSTART(Style) { AN_ANALYSER(Skip); } + ON_GROUPEND(Style) { - RtfFormatting& props = m_parser->getTextFormatting(); + RtfFormatting& props = m_composer->getTextFormatting(); + + // Dig out all the formatting attributes if(props.textIsBold()) AN_ATTRIBUTE(kAtBold, L"1"); if(props.textIsHidden()) @@ -601,42 +678,54 @@ ON_GROUPEND(Style) AN_ATTRIBUTE(kAtStrike, L"1"); if(props.textIsUnderline()) AN_ATTRIBUTE(kAtUnderline, L"1"); - if(props.textColor() != -1 && m_parser->getOptions().doColors) - AN_ATTRIBUTE(kAtColor, NUM_ATTR(props.textColor())); + if(props.textColor() != -1 && m_composer->getOptions().doColors) + AN_ATTRIBUTE(kAtColor, props.textColor()); } +// List Table Analyser -------------------------------------------------------------- ON_INITIALIZE(ListTable) { AN_ELEMENT(kElListtable); } + ON_GROUPSTART(ListTable) { + // Everything in here should be a list AN_ANALYSER(List); + + // Content doesn't matter AN_DESTINATION(Null); } +// List (in List Table) Analyser ---------------------------------------------------- ON_INITIALIZE(List) { - AN_ELEMENT(kElListdef); + // Create a default element + AN_ELEMENT(kElListdef); AN_ATTRIBUTE(kAtType, kValDisc); AN_ATTRIBUTE(kAtOrdered, L"0"); levelsSeen = 0; } + ON_CONTROLWORD(List) { + // The name if(cw == "listname") AN_DESTINATION_ATTR(kAtName); + + // The list id else if(cw == "listid" && HAS_PARAM) - AN_ATTRIBUTE(kAtId, NUM_ATTR(param)); + AN_ATTRIBUTE(kAtId, param); // We let listlevel in here too else if(cw == "levelstartat" && HAS_PARAM) - AN_ATTRIBUTE(kAtStart, NUM_ATTR(param)); + AN_ATTRIBUTE(kAtStart, param); + // The list type else if(cw == "levelnfc" && HAS_PARAM) { switch(param) @@ -679,27 +768,35 @@ ON_CONTROLWORD(List) else DEFAULT_CONTROLWORD; } + ON_GROUPSTART(List) { + // Skip internal groups and content + if(levelsSeen > 0) AN_ANALYSER(Skip); + levelsSeen++; } - +// The List Override Table ---------------------------------------------------------- ON_INITIALIZE(ListOverrideTable) { - DOM::Document document = m_parser->getDocument(); + // Get all of the current lists + DOM::Document document = m_composer->getDocument(); lists = document.getElementsByTagName(kElListdef); curList = NULL; lsId = -1; } + ON_GROUPSTART(ListOverrideTable) { + // Content doesn't matter AN_DESTINATION(Null); } + ON_CONTROLWORD(ListOverrideTable) { // New list override clear @@ -709,10 +806,11 @@ ON_CONTROLWORD(ListOverrideTable) // List id for current listoverride else if(cw == "listid" && HAS_PARAM) { - wstring id = NUM_ATTR(param); + wstring id = XmlComposer::formatInt(param); if(lists != NULL) { + // Find the list in question for(int i = 0; i < lists->getLength(); i++) { DOM::Node node = lists->item(i); @@ -737,7 +835,7 @@ ON_CONTROLWORD(ListOverrideTable) else if(cw == "levelstartat" && HAS_PARAM) { if(curList != NULL) - curList.setAttribute(kAtStart, NUM_ATTR(param)); + curList.setAttribute(kAtStart, param); } else @@ -755,7 +853,7 @@ ON_CONTROLWORD(ListOverrideTable) if(curList != NULL) { parent.appendChild(curList); - curList.setAttribute(kAtList, NUM_ATTR(lsId)); + curList.setAttribute(kAtList, lsId); } } @@ -763,13 +861,9 @@ ON_CONTROLWORD(ListOverrideTable) } } -ON_GROUPEND(ListOverrideTable) -{ - -} - +// Info Block Analyser -------------------------------------------------------------- ON_INITIALIZE(Info) { @@ -777,39 +871,45 @@ ON_INITIALIZE(Info) AN_ELEMENT(kElInfo); AN_DESTINATION(Null); } + ON_CONTROLWORD(Info) { - // The title if(cw == "title") { AN_ELEMENT(kElTitle); AN_DESTINATION(Raw); } + else if(cw == "author") { AN_ELEMENT(kElAuthor); AN_DESTINATION(Raw); } + else if(cw == "operator") { AN_ELEMENT(kElOperator); AN_DESTINATION(Raw); } + else if(flags & kAsterisk) AN_ANALYSER(Skip); + else DEFAULT_CONTROLWORD; } - +// Root Analyser -------------------------------------------------------------------- ON_INITIALIZE(Root) { } + ON_CONTROLWORD(Root) { + // All the main RTF sections if(cw == "stylesheet") AN_ANALYSER(Stylesheet); else if(cw == "listtable") @@ -842,23 +942,26 @@ ON_CONTROLWORD(Root) } +// Content Destination -------------------------------------------------------------- + ON_INITIALIZE(Content) { - parent = m_parser->getElement(); + parent = m_composer->getElement(); created = false; } + ON_CHARDATA(Content) { // Create the first time we get content if(!created) { - DOM::Element dest = m_parser->createElement(kElDest); + DOM::Element dest = m_composer->createElement(kElDest); parent.appendChild(dest); - m_parser->replaceElement(dest); + m_composer->replaceElement(dest); - DOM::Element el = m_parser->createElement(kElBlock); - m_parser->pushElement(el); - m_parser->popElement(); + DOM::Element el = m_composer->createElement(kElBlock); + m_composer->pushElement(el); + m_composer->popElement(); created = true; } @@ -867,7 +970,10 @@ ON_CHARDATA(Content) return; int elements = 0; - RtfFormatting& format = m_parser->getTextFormatting(); + RtfFormatting& format = m_composer->getTextFormatting(); + + // Extra elements written out here are consolidated in + // XmlFixups::combineDuplicates // Now do text Properties if necessary if(format.textIsBold()) @@ -875,37 +981,44 @@ ON_CHARDATA(Content) AN_ELEMENT(kElB); elements++; } + if(format.textIsHidden()) { AN_ELEMENT(kElHide); elements++; } + if(format.textIsItalic()) { AN_ELEMENT(kElI); elements++; } + if(format.textIsStrike()) { AN_ELEMENT(kElStrike); elements++; } + if(format.textIsUnderline()) { AN_ELEMENT(kElU); elements++; } - if(format.textColor() != -1 && m_parser->getOptions().doColors) + + if(format.textColor() != -1 && m_composer->getOptions().doColors) { AN_ELEMENT(kElColor); - AN_ATTRIBUTE(kAtIndex, NUM_ATTR(format.textColor())); + AN_ATTRIBUTE(kAtIndex, format.textColor()); elements++; } + if(format.textSuScript() == RtfFormatting::SUPERSCRIPT) { AN_ELEMENT(kElSuper); elements++; } + if(format.textSuScript() == RtfFormatting::SUBSCRIPT) { AN_ELEMENT(kElSub); @@ -913,8 +1026,8 @@ ON_CHARDATA(Content) } // Write the data to the element - m_parser->getElement().appendChild( - m_parser->getDocument().createTextNode(data)); + m_composer->getElement().appendChild( + m_composer->getDocument().createTextNode(data)); // Now drop out of all the above formatting while(elements-- > 0) @@ -922,22 +1035,25 @@ ON_CHARDATA(Content) } +// FootNote Analyser ---------------------------------------------------------------- + ON_INITIALIZE(FootNote) { - int ac = m_parser->getAutoCount(AUTOCOUNT_FOOTNOTE); + int ac = m_composer->getAutoCount(AUTOCOUNT_FOOTNOTE); AN_ELEMENT(kElFootNote); AN_ATTRIBUTE(kAtId, ac); AN_DESTINATION(Content); } + ON_CONTROLWORD(FootNote) { // Inside foot notes there's no link to the foot note if(cw == "chftn") { - DestinationPtr dest = m_parser->getDestination(); + DestinationPtr dest = m_composer->getDestination(); ASSERT(dest != NULL); - int ac = m_parser->getAutoCount(AUTOCOUNT_FOOTNOTE); + int ac = m_composer->getAutoCount(AUTOCOUNT_FOOTNOTE); dest->charData(formatInt(ac)); return; } @@ -952,27 +1068,28 @@ ON_CONTROLWORD(FootNote) else DEFAULT_CONTROLWORD; } + ON_DONE(FootNote) { - m_parser->incrementAutoCount(AUTOCOUNT_FOOTNOTE); + m_composer->incrementAutoCount(AUTOCOUNT_FOOTNOTE); } - +// Raw Destination ------------------------------------------------------------------ ON_CHARDATA(Raw) { // Write the data to the element - m_parser->getElement().appendChild( - m_parser->getDocument().createTextNode(data)); + m_composer->getElement().appendChild( + m_composer->getDocument().createTextNode(data)); } - +// Attribute Destination ------------------------------------------------------------ ON_INITIALIZE(Attribute) { - element = m_parser->getElement(); + element = m_composer->getElement(); ASSERT(element != NULL); } @@ -991,17 +1108,3 @@ ON_CHARDATA(Attribute) element.setAttribute(name, cur); } -wstring RtfParser::formatInt(int num) -{ - char buff[16]; - - // Certain OSs don't support swprintf :( - sprintf(buff, "%d", num); - - wstring n; - for(char* s = buff; *s; s++) - n.append(1, *s); - - return n; -} - diff --git a/src/xmlcomposer.h b/src/xmlcomposer.h index deba4ba..4e5c739 100644 --- a/src/xmlcomposer.h +++ b/src/xmlcomposer.h @@ -36,68 +36,102 @@ * */ +// RENAME: xmlcomposer.h + #ifndef __RTFPARSER_H__ #define __RTFPARSER_H__ #include "levelhandler.h" -struct RtfParserOptions +struct XmlComposerOptions { - RtfParserOptions() + XmlComposerOptions() { memset(this, 0, sizeof(*this)); } bool doColors; }; -class RtfParser : public LevelHandler +/* + * XmlComposer + * + * This is where the RTF gets initially converted to XML. RtfParser sends + * notifications to this class's RtfHandler interface. It forwards them to + * the current analysers and destinations which produce XML content. + * (see xmlcomposehelpers.h) + * + * Not all conversion is completed here. Because RTF is so very wierd we + * have to run lots of fixups are run in endDocument (see rtffixups.h) + */ +class XmlComposer : + public LevelHandler { public: - RtfParser(const RtfParserOptions& options); - virtual ~RtfParser(); + XmlComposer(const XmlComposerOptions& options); + virtual ~XmlComposer(); - virtual void startDocument(RtfReader* reader); + // Handler Overrides + virtual void startDocument(RtfParser* reader); virtual void endDocument(); virtual void controlWord(const string& cw, int flags, int param); virtual void groupStart(); virtual void groupEnd(); virtual void charData(wstring data); - // Element management functions + // Create an XML element with given name DOM::Element createElement(const string& name); + + // Push an XML element on the current level void pushElement(const DOM::Element& element); + + // Replace current XML element with given element void replaceElement(const DOM::Element& element); + + // Move up one XML element level without changing RTF level DOM::Element popElement(); + + // Set attributes on the current XML Element void setAttribute(const string& name, const wstring& value, DOM::Element el = DOM::Element()); void setAttribute(const string& name, int value, DOM::Element el = DOM::Element()); - // Changing the current parser functions + // The current analyser in use + AnalyserPtr getAnalyser(); void setAnalyser(AnalyserPtr analy); + + // The current destination in use + DestinationPtr getDestination(); void setDestination(DestinationPtr dest); + + // Replace the current destination (sets level deep) DestinationPtr replaceDestination(DestinationPtr dest); - // The types of auto counters + + // The types of auto numbering enum { AUTOCOUNT_FOOTNOTE, AUTOCOUNT_MAX }; - // Functions for auto numbering + // Functions for RTF auto numbering int getAutoCount(int type); void incrementAutoCount(int type); - // Current status functions + + // Get the current formatting options RtfFormatting& getTextFormatting(); - AnalyserPtr getAnalyser(); - DestinationPtr getDestination(); + DOM::Document getDocument() { return m_document; } const RtfParserOptions& getOptions() { return m_options; } + + // TODO: Should this be somewhere else? static wstring formatInt(int num); + +// LevelHandler override protected: virtual void clear(); @@ -110,12 +144,12 @@ protected: int m_autocount[AUTOCOUNT_MAX]; // Auto counters for the document - // Sub classes protected: + #define DESTINATION(cls) class cls : public Destination { public: #define END_DESTINATION }; - #define ANALYSER(cls) class cls : public ParseAnalyser { public: + #define ANALYSER(cls) class cls : public BaseAnalyser { public: #define END_ANALYSER }; #define DATA_PORTION protected: #define INITIALIZE virtual void initialize(); @@ -125,6 +159,7 @@ protected: #define GROUPEND virtual void groupEnd(); #define DONE virtual void done(); + // Main destination for document character content DESTINATION(Content) INITIALIZE CHARDATA @@ -133,14 +168,16 @@ protected: DOM::Element parent; END_DESTINATION - + // Discards character data DESTINATION(Null) END_DESTINATION + // Copies raw character data to output DESTINATION(Raw) CHARDATA END_DESTINATION + // Copies character data to an XML attribute DESTINATION(Attribute) Attribute(const string& nm) : name(nm) {} INITIALIZE @@ -150,7 +187,9 @@ protected: DOM::Element element; END_DESTINATION - class ParseAnalyser : + + // Base class for analysers with some helper functions + class BaseAnalyser : public Analyser { public: @@ -158,23 +197,34 @@ protected: { processDefault(cw, flags, param); } protected: - // Some helper functions + // Process a standard set of tags that can be found anywhere bool processDefault(const string& cw, int flags, int param); + + // Process text formatting tags bool processTextFormatting(const string& cw, int flags, int param, RtfFormatting& format); + bool processTextFormatting(const string& cw, int flags, int param); + + // Creates 'fix' tags for paragraph formatting in element + void applyParaFormatting(RtfFormatting* format, DOM::Element& el); + + // Process tags that are either text content, or change context bool processTextContent(const string& cw, int flags, int param); - bool processTextFormatting(const string& cw, int flags, int param); + + // Process tags that generate text content (like auto-numbering, fields) bool processTextAutoContent(const string& cw, int flags, int param); + // Convenience function DOM::Element getCurrentBlock(); - void applyParaFormatting(RtfFormatting* format, DOM::Element& el); }; + // Skip tags and groups ANALYSER(Skip) INITIALIZE GROUPSTART END_ANALYSER + // Unicode block analyser ANALYSER(Upr) Upr(AnalyserPtr prv); GROUPSTART @@ -183,11 +233,13 @@ protected: AnalyserPtr prev; END_ANALYSER + // Handle Stylesheets ANALYSER(Stylesheet) INITIALIZE GROUPSTART END_ANALYSER + // Handle a style in a stylesheet ANALYSER(Style) INITIALIZE CONTROLWORD @@ -197,11 +249,13 @@ protected: bool haveStyle; END_ANALYSER + // Handle the list definitions ANALYSER(ListTable) INITIALIZE GROUPSTART END_ANALYSER + // Handle a list in the list definitions ANALYSER(List) INITIALIZE CONTROLWORD @@ -210,27 +264,30 @@ protected: int levelsSeen; END_ANALYSER + // Handle list overrides ANALYSER(ListOverrideTable) INITIALIZE CONTROLWORD GROUPSTART - GROUPEND DATA_PORTION DOM::NodeList lists; int lsId; DOM::Element curList; END_ANALYSER + // Creates the info block ANALYSER(Info) INITIALIZE CONTROLWORD END_ANALYSER + // The main root analyser ANALYSER(Root) INITIALIZE CONTROLWORD END_ANALYSER + // Handles footnotes ANALYSER(FootNote) INITIALIZE CONTROLWORD diff --git a/src/xmlfixups.cpp b/src/xmlfixups.cpp index c3fd8b6..6efe6f4 100644 --- a/src/xmlfixups.cpp +++ b/src/xmlfixups.cpp @@ -59,636 +59,585 @@ static const char* kConsolidateEnd[] = static const char* kConsolidateStart[] = { kElStylesheet, kElInfo, NULL }; + void RtfFixups::breakBreak(DOM::Document& doc, const string& contain, const string& tag) { - DOM::NodeList els = doc.getElementsByTagName(tag); - if(els != NULL) + DOM::NodeList els = doc.getElementsByTagName(tag); + if(els != NULL) { - for(int i = 0; i < els->getLength(); i++) - { - DOM::Element el = (const DOM::Element&)els->item(i); -#if 0 - // See if parent node only has this break tag - // in it. If so then replace parent with this - - DOM::Node parent = el.getParentNode(); - - if(parent != NULL) - { - DOM::Node grandparent = parent.getParentNode(); - - if(grandparent != NULL && - el.getPreviousSibling() == NULL && - el.getNextSibling() == NULL) - { - grandparent.replaceChild(parent.removeChild(el), parent); - } - } -#endif - - breakElement(el, contain); - } - } + for(int i = 0; i < els->getLength(); i++) + { + DOM::Element el = (const DOM::Element&)els->item(i); + breakElement(el, contain); + } + } } -/** - * Breaks a paragraph up through a previous level. Calls itself - * recursively to break paragraphs totally free up to containing - * destination. - * - * For example: - * - * - * This is a - * test of your concentration. - * - * - * Becomes: - * - * - * This is a - * test of your concentration. - * - */ bool RtfFixups::breakElement(const DOM::Element& el, const string& contain) { - ASSERT(el != NULL); + ASSERT(el != NULL); - DOM::Element parent = (const DOM::Element&)el.getParentNode(); - DOM::Element grandparent; + DOM::Element parent = (const DOM::Element&)el.getParentNode(); + DOM::Element grandparent; - string s = el.getNodeName(); - s = parent.getNodeName(); + string s = el.getNodeName(); + s = parent.getNodeName(); - // Get the parent node - if(parent != NULL) - grandparent = (const DOM::Element&)parent.getParentNode(); + // Get the parent node + if(parent != NULL) + grandparent = (const DOM::Element&)parent.getParentNode(); // Make sure we have something to work with before continuing - if(grandparent == NULL || parent == NULL || - DOMHelpers::isElement(parent, contain)) + if(grandparent == NULL || parent == NULL || + DOMHelpers::isElement(parent, contain)) return true; - DOM::Node e; + DOM::Node e; - // Check to see if this is the first node in the parent. - // If so then just move out to before - if(el.getPreviousSibling() == NULL) + // Check to see if this is the first node in the parent. + // If so then just move out to before + if(el.getPreviousSibling() == NULL) { - e = grandparent.insertBefore(parent.removeChild(el), parent); - } + e = grandparent.insertBefore(parent.removeChild(el), parent); + } // Check to see if this is the last node in the parent. // If so then just move out to after the parent else if(el.getNextSibling() == NULL) - { - DOM::Node next = parent.getNextSibling(); - if(next == NULL) - e = grandparent.appendChild(parent.removeChild(el)); - else - e = grandparent.insertBefore(parent.removeChild(el), next); + { + DOM::Node next = parent.getNextSibling(); + if(next == NULL) + e = grandparent.appendChild(parent.removeChild(el)); + else + e = grandparent.insertBefore(parent.removeChild(el), next); } // Otherwise it's in the middle so split the parent // element etc... - else - { - // Clone it but not deep - DOM::Element parent2 = (const DOM::Element&)parent.cloneNode(false); + else + { + // Clone it but not deep + DOM::Element parent2 = (const DOM::Element&)parent.cloneNode(false); - if(parent2 == NULL) + if(parent2 == NULL) return false; - // Flag that tells us whether we moved anything up to parent - bool moved = false; + // Flag that tells us whether we moved anything up to parent + bool moved = false; - // Now move all nodes after this one to the second parent. - while((e = el.getNextSibling()) != NULL) - { - parent2.appendChild(parent.removeChild(e)); + // Now move all nodes after this one to the second parent. + while((e = el.getNextSibling()) != NULL) + { + parent2.appendChild(parent.removeChild(e)); moved = true; - } + } - // Remove the element from it's parent - e = parent.removeChild(el); + // Remove the element from it's parent + e = parent.removeChild(el); - // Okay now we move the paragraph up to the parent - DOMHelpers::insertAfter(grandparent, e, parent); - if(moved) - DOMHelpers::insertAfter(grandparent, parent2, e); - } + // Okay now we move the paragraph up to the parent + DOMHelpers::insertAfter(grandparent, e, parent); + if(moved) + DOMHelpers::insertAfter(grandparent, parent2, e); + } - // Now call it again with the paragraph in the new position - // untill everything's cut through! + // Now call it again with the paragraph in the new position + // until everything's cut through! return breakElement((DOM::Element&)e, contain); } -/** - * Changes from a marker based paragraph system to a contained - * paragraph system. Also applies paragraph attributes to the - * appropriate paragraph. - * - * For example: - * - * - * This is a - * test of your concentration. - * - * - * Becomes: - * - * This is a - * test of your concentration. - */ void RtfFixups::breakBlocks(DOM::Document& document) { // First break out all the paragraphs to the destination level DOM::NodeList blocks = document.getElementsByTagName(kElBlock); if(blocks != NULL) { - for(int i = 0; i < blocks->getLength(); i++) - { - DOM::Element block = (const DOM::Element&)blocks->item(i); + for(int i = 0; i < blocks->getLength(); i++) + { + DOM::Element block = (const DOM::Element&)blocks->item(i); // If it's the single closed style para then break it - if(block != NULL && !block.hasChildNodes()) + if(block != NULL && !block.hasChildNodes()) breakElement(block, kElDest); - } + } } // Now group stuff in destinations into paras or other blocks - DOM::NodeList destinations = document.getElementsByTagName(kElDest); - if(destinations != NULL) - { - for(int i = 0; i < destinations->getLength(); i++) - { - DOM::Element dest = (const DOM::Element&)destinations->item(i); - - // Sanity Check + DOM::NodeList destinations = document.getElementsByTagName(kElDest); + if(destinations != NULL) + { + for(int i = 0; i < destinations->getLength(); i++) + { + DOM::Element dest = (const DOM::Element&)destinations->item(i); + + // Sanity Check if(dest == NULL || !dest.hasChildNodes()) - continue; + continue; - // Go through the children of this destination - DOM::Node child = dest.getFirstChild(); + // Go through the children of this destination + DOM::Node child = dest.getFirstChild(); - DOM::Element block; + DOM::Element block; - while(child != NULL) - { - // If it's a block - if(DOMHelpers::isElement(child, kElBlock)) - { - block = (DOM::Element&)child; - child = child.getNextSibling(); + while(child != NULL) + { + // If it's a block + if(DOMHelpers::isElement(child, kElBlock)) + { + block = (DOM::Element&)child; + child = child.getNextSibling(); continue; - } - - // If it's already a real block element - for(const char** t = kBlockTags; *t != NULL; t++) - { - if(DOMHelpers::isElement(child, *t)) - { - block = NULL; - break; - } - } - - // If there's a block then add to it - if(block != NULL) - { - block.appendChild(dest.removeChild(child)); + } + + // If it's already a real block element + for(const char** t = kBlockTags; *t != NULL; t++) + { + if(DOMHelpers::isElement(child, *t)) + { + block = NULL; + break; + } + } + + // If there's a block then add to it + if(block != NULL) + { + block.appendChild(dest.removeChild(child)); child = block; - } + } - child = child.getNextSibling(); + child = child.getNextSibling(); } - } - } + } + } } void RtfFixups::wrapTags(DOM::Document& doc, const string& tagName, - const string& wrapName) + const string& wrapName) { - DOM::NodeList tags = doc.getElementsByTagName(tagName); + DOM::NodeList tags = doc.getElementsByTagName(tagName); if(tags != NULL) - { - for(int i = 0; i < tags->getLength(); i++) - { - DOM::Element tag = (const DOM::Element&)tags->item(i); + { + for(int i = 0; i < tags->getLength(); i++) + { + DOM::Element tag = (const DOM::Element&)tags->item(i); DOM::Element wrap = doc.createElement(wrapName); - while(tag.hasChildNodes()) - wrap.appendChild(tag.removeChild(tag.getFirstChild())); + while(tag.hasChildNodes()) + wrap.appendChild(tag.removeChild(tag.getFirstChild())); - tag.appendChild(wrap); - } - } + tag.appendChild(wrap); + } + } } void RtfFixups::breakTags(DOM::Document& doc, const string& parentName, - const string& tagName) + const string& tagName) { - DOM::NodeList parents = doc.getElementsByTagName(parentName); - if(parents != NULL) + DOM::NodeList parents = doc.getElementsByTagName(parentName); + if(parents != NULL) { - for(int i = 0; i < parents->getLength(); i++) - { - DOM::Element parent = (const DOM::Element&)parents->item(i); + for(int i = 0; i < parents->getLength(); i++) + { + DOM::Element parent = (const DOM::Element&)parents->item(i); - if(!parent.hasChildNodes()) + if(!parent.hasChildNodes()) continue; - DOM::NodeList tags = parent.getElementsByTagName(tagName); - if(tags != NULL) - { - for(int i = 0; i < tags->getLength(); i++) - breakElement((const DOM::Element&)tags->item(i), parentName); - } + // First perform the breaks + DOM::NodeList tags = parent.getElementsByTagName(tagName); + if(tags != NULL) + { + for(int i = 0; i < tags->getLength(); i++) + breakElement((const DOM::Element&)tags->item(i), parentName); + } - DOM::Node tag = doc.createElement(tagName); + DOM::Node tag = doc.createElement(tagName); parent.insertBefore(tag, parent.getFirstChild()); - DOM::Node child = tag; + DOM::Node child = tag; while(child != NULL && (child = child.getNextSibling()) != NULL) - { - if(DOMHelpers::isElement(child, kElBlock)) - { - DOM::Node next = child.getNextSibling(); - if(next == NULL) - { - parent.removeChild(child); - continue; - } - - if(DOMHelpers::isElement(next, tagName)) - { - DOM::Node twodown = next.getNextSibling(); - if(!DOMHelpers::isElement(twodown, kElBlock)) - { - child = parent.insertBefore(parent.removeChild(next), child); - } - else - { - parent.removeChild(child); - child = next; - } - } - } - - if(DOMHelpers::isElement(child, tagName)) - { - if(!tag.hasChildNodes()) - parent.removeChild(tag); - tag = child; - } - else - { - tag.appendChild(parent.removeChild(child)); + { + if(DOMHelpers::isElement(child, kElBlock)) + { + DOM::Node next = child.getNextSibling(); + if(next == NULL) + { + parent.removeChild(child); + continue; + } + + if(DOMHelpers::isElement(next, tagName)) + { + DOM::Node twodown = next.getNextSibling(); + if(!DOMHelpers::isElement(twodown, kElBlock)) + { + child = parent.insertBefore(parent.removeChild(next), child); + } + else + { + parent.removeChild(child); + child = next; + } + } + } + + if(DOMHelpers::isElement(child, tagName)) + { + if(!tag.hasChildNodes()) + parent.removeChild(tag); + tag = child; + } + else + { + tag.appendChild(parent.removeChild(child)); child = tag; - } - } + } + } - if(!tag.hasChildNodes()) - parent.removeChild(tag); - } - } + if(!tag.hasChildNodes()) + parent.removeChild(tag); + } + } - DOM::NodeList tags = doc.getElementsByTagName(tagName); + DOM::NodeList tags = doc.getElementsByTagName(tagName); if(tags != NULL) - { - for(int i = 0; i < tags->getLength(); i++) - { - DOM::Element tag = (const DOM::Element&)tags->item(i); - DOM::Node parent = tag.getParentNode(); - - if(parent != NULL && !DOMHelpers::isElement(parent, parentName)) - parent.removeChild(tag); + { + for(int i = 0; i < tags->getLength(); i++) + { + DOM::Element tag = (const DOM::Element&)tags->item(i); + DOM::Node parent = tag.getParentNode(); + if(parent != NULL && !DOMHelpers::isElement(parent, parentName)) + parent.removeChild(tag); #if 0 else if(tag.hasChildNodes()) - { - DOM::NodeList children = tag.getChildNodes(); - if(children != NULL && children->getLength() == 1) - { - DOM::Node child = children->item(0); - if(child != NULL && !child.hasChildNodes() && - DOMHelpers::isElement(child, kElBlock)) - parent.removeChild(tag); - } - } + { + DOM::NodeList children = tag.getChildNodes(); + if(children != NULL && children->getLength() == 1) + { + DOM::Node child = children->item(0); + if(child != NULL && !child.hasChildNodes() && + DOMHelpers::isElement(child, kElBlock)) + parent.removeChild(tag); + } + } #endif - - } - } + } + } } void RtfFixups::breakLists(DOM::Document& doc) { - // Now group stuff in destinations into tables - DOM::NodeList destinations = doc.getElementsByTagName(kElDest); + DOM::NodeList destinations = doc.getElementsByTagName(kElDest); if(destinations != NULL) - { - for(int i = 0; i < destinations->getLength(); i++) - { - DOM::Element dest = (const DOM::Element&)destinations->item(i); + { + for(int i = 0; i < destinations->getLength(); i++) + { + DOM::Element dest = (const DOM::Element&)destinations->item(i); - // Sanity Check + // Sanity Check if(dest == NULL) continue; - // Go through the children of this destination - DOM::Node child = dest.getFirstChild(); + // Go through the children of this destination + DOM::Node child = dest.getFirstChild(); - DOM::Element list; - DOM::Element e; + DOM::Element list; + DOM::Element e; - wstring previd; + wstring previd; - while(child != NULL) - { - // If it's a block and has a cell attribute + while(child != NULL) + { + // If it's a block ... if(DOMHelpers::isElement(child, kElBlock)) - { - e = (DOM::Element&)child; + { + e = (DOM::Element&)child; - // if it has a cell attribute - wstring listid = e.getAttribute(kAtList); - if(listid.length() > 0) + // ... and has a list attribute + wstring listid = e.getAttribute(kAtList); + if(listid.length() > 0) { - e.removeAttribute(kAtList); + e.removeAttribute(kAtList); - if(list == NULL || previd != listid) - { - list = doc.createElement(kElList); - list.setAttribute(kAtList, listid); + if(list == NULL || previd != listid) + { + list = doc.createElement(kElList); + list.setAttribute(kAtList, listid); dest.insertBefore(list, child); previd = listid; - } - } - else - { - list = NULL; - previd.erase(); + } } - } + else + { + list = NULL; + previd.erase(); + } + } - // It's not a block + // It's not a block if(list != NULL) - { - list.appendChild(dest.removeChild(child)); - child = list; - } + { + list.appendChild(dest.removeChild(child)); + child = list; + } - child = child.getNextSibling(); + child = child.getNextSibling(); } - } - } + } + } } void RtfFixups::fixStyles(const DOM::Document doc) { - DOM::NodeList styles = doc.getElementsByTagName(kElStyle); - if(styles != NULL) - { - DOM::NodeList blocks = doc.getElementsByTagName(kElBlock); - if(blocks != NULL) - { - for(int i = 0; i < blocks->getLength(); i++) - { - DOM::Element block = (const DOM::Element&)blocks->item(i); + // Get all stylesheet styles + DOM::NodeList styles = doc.getElementsByTagName(kElStyle); + if(styles != NULL) + { + // Get list of blocks in the document + DOM::NodeList blocks = doc.getElementsByTagName(kElBlock); + if(blocks != NULL) + { + for(int i = 0; i < blocks->getLength(); i++) + { + DOM::Element block = (const DOM::Element&)blocks->item(i); if(block == NULL || !block.hasAttribute(kElStyle)) - continue; - - for(int j = 0; j < styles->getLength(); j++) - { - DOM::Element style = (const DOM::Element&)styles->item(j); - if(style != NULL) - { - if(style.getAttribute(kAtId) == block.getAttribute(kElStyle)) - { - wstring name = style.getAttribute(kAtName); + continue; + + // Lookup block styles + for(int j = 0; j < styles->getLength(); j++) + { + DOM::Element style = (const DOM::Element&)styles->item(j); + if(style != NULL) + { + if(style.getAttribute(kAtId) == block.getAttribute(kElStyle)) + { + // And change to the name + wstring name = style.getAttribute(kAtName); if(name.length() > 0) - block.setAttribute(kElStyle, name); + block.setAttribute(kElStyle, name); } - } - } - } - } - - for(int i = 0; i < styles->getLength(); i++) - { - DOM::Element style = (const DOM::Element&)styles->item(i); - if(style != NULL) - style.removeAttribute(kAtId); - } - } - + } + } + } + } + // A little cleanup of the stylesheet styles + for(int i = 0; i < styles->getLength(); i++) + { + DOM::Element style = (const DOM::Element&)styles->item(i); + if(style != NULL) + style.removeAttribute(kAtId); + } + } } void RtfFixups::breakTables(DOM::Document& doc) { - DOM::NodeList rows = doc.getElementsByTagName(kElRow); - if(rows != NULL) - { - for(int i = 0; i < rows->getLength(); i++) - { - DOM::Element row = (const DOM::Element&)rows->item(i); - DOM::Node parent = row.getParentNode(); - - if(parent == NULL) - continue; - - if(DOMHelpers::isElement(parent, kElBlock)) + // Break rows out to destinations + DOM::NodeList rows = doc.getElementsByTagName(kElRow); + if(rows != NULL) + { + for(int i = 0; i < rows->getLength(); i++) + { + DOM::Element row = (const DOM::Element&)rows->item(i); + DOM::Node parent = row.getParentNode(); + + if(parent == NULL) + continue; + + if(DOMHelpers::isElement(parent, kElBlock)) { - DOM::Node grandparent = parent.getParentNode(); + DOM::Node grandparent = parent.getParentNode(); - if(grandparent != NULL && !row.hasChildNodes()) + if(grandparent != NULL && !row.hasChildNodes()) { - if(row.getPreviousSibling() == NULL) - grandparent.insertBefore(parent.removeChild(row), parent); - else if(row.getNextSibling() == NULL) - DOMHelpers::insertAfter(grandparent, parent.removeChild(row), parent); - } - } - - breakElement(row, kElDest); - } - } - + if(row.getPreviousSibling() == NULL) + grandparent.insertBefore(parent.removeChild(row), parent); + else if(row.getNextSibling() == NULL) + DOMHelpers::insertAfter(grandparent, parent.removeChild(row), parent); + } + } + breakElement(row, kElDest); + } + } - // Now group stuff in destinations into tables - DOM::NodeList destinations = doc.getElementsByTagName(kElDest); + // Now group stuff in destinations into tables + DOM::NodeList destinations = doc.getElementsByTagName(kElDest); if(destinations != NULL) - { - for(int i = 0; i < destinations->getLength(); i++) - { - DOM::Element dest = (const DOM::Element&)destinations->item(i); + { + for(int i = 0; i < destinations->getLength(); i++) + { + DOM::Element dest = (const DOM::Element&)destinations->item(i); - // Sanity Check + // Sanity Check if(dest == NULL) continue; - // Go through the children of this destination - DOM::Node child = dest.getFirstChild(); + // Go through the children of this destination + DOM::Node child = dest.getFirstChild(); - DOM::Element table; + DOM::Element table; DOM::Element e; - while(child != NULL) - { - // If it's a block and has a cell attribute + while(child != NULL) + { + // If it's a block and has a cell attribute if(DOMHelpers::isElement(child, kElBlock)) - { - e = (DOM::Element&)child; - - // if it has a cell attribute - if(e.getAttribute(kAtCell).length() > 0) - { - e.removeAttribute(kAtCell); - - if(table == NULL) - { - table = doc.createElement(kElTable); - dest.insertBefore(table, child); - } - } - else - { - table = NULL; - } - } - - // It's not a block + { + e = (DOM::Element&)child; + + // if it has a cell attribute + if(e.getAttribute(kAtCell).length() > 0) + { + e.removeAttribute(kAtCell); + + if(table == NULL) + { + table = doc.createElement(kElTable); + dest.insertBefore(table, child); + } + } + else + { + table = NULL; + } + } + + // It's not a block if(table != NULL) - { - table.appendChild(dest.removeChild(child)); + { + table.appendChild(dest.removeChild(child)); child = table; - } + } - child = child.getNextSibling(); + child = child.getNextSibling(); } - } - } + } + } } void RtfFixups::removeTags(const DOM::Document& doc) { - // Go through the list of nodes + // Go through the list of nodes for(const char** t = kRemoveTags; *t != NULL; t++) - { - DOM::NodeList elements = doc.getElementsByTagName(*t); - if(elements != NULL) - { - for(int j = 0; j < elements->getLength(); j++) - { - DOM::Element el = (const DOM::Element&)elements->item(j); - DOM::Node parent = el->getParentNode(); - - if(parent == NULL) - continue; - - while(el.hasChildNodes()) - parent.insertBefore(el.removeChild(el.getFirstChild()), el); - - parent.removeChild(el); + { + DOM::NodeList elements = doc.getElementsByTagName(*t); + if(elements != NULL) + { + for(int j = 0; j < elements->getLength(); j++) + { + DOM::Element el = (const DOM::Element&)elements->item(j); + DOM::Node parent = el->getParentNode(); + + if(parent == NULL) + continue; + + while(el.hasChildNodes()) + parent.insertBefore(el.removeChild(el.getFirstChild()), el); + + parent.removeChild(el); } - } - } + } + } } void RtfFixups::fixLists(const DOM::Document doc) { - DOM::NodeList lists = doc.getElementsByTagName(kElList); - if(lists != NULL) - { - DOM::NodeList listdefs = doc.getElementsByTagName(kElListdef); - if(listdefs != NULL) - { - for(int i = 0; i < listdefs->getLength(); i++) - { - DOM::Element listdef = (const DOM::Element&)listdefs->item(i); + // Get all the lists + DOM::NodeList lists = doc.getElementsByTagName(kElList); + if(lists != NULL) + { + // And all the list definitions + DOM::NodeList listdefs = doc.getElementsByTagName(kElListdef); + if(listdefs != NULL) + { + for(int i = 0; i < listdefs->getLength(); i++) + { + DOM::Element listdef = (const DOM::Element&)listdefs->item(i); if(listdef == NULL || !listdef.hasAttribute(kAtList)) - continue; + continue; - for(int j = 0; j < lists->getLength(); j++) + for(int j = 0; j < lists->getLength(); j++) { - DOM::Element list = (const DOM::Element&)lists->item(j); - if(list != NULL) - { - if(list.getAttribute(kAtList) == listdef.getAttribute(kAtList)) + DOM::Element list = (const DOM::Element&)lists->item(j); + if(list != NULL) + { + if(list.getAttribute(kAtList) == listdef.getAttribute(kAtList)) { - DOMHelpers::copyAttributes(listdef, list, kHideList); - list.removeAttribute(kAtList); - } - } - } - } - } - } + // And copy all the attributes from the list definition to the list + DOMHelpers::copyAttributes(listdef, list, kHideList); + list.removeAttribute(kAtList); + } + } + } + } + } + } } void RtfFixups::fixBlocks(const DOM::Document doc) { - // First break out all the paragraphs to the destination level - DOM::NodeList blocks = doc.getElementsByTagName(kElBlock); - if(blocks != NULL) - { - string fix; + // Get all the blocks + DOM::NodeList blocks = doc.getElementsByTagName(kElBlock); + if(blocks != NULL) + { + string fix; wstring val; - for(int i = 0; i < blocks->getLength(); i++) - { - DOM::Element block = (const DOM::Element&)blocks->item(i); - DOM::Node parent = block.getParentNode(); - - if(parent == NULL) - continue; + for(int i = 0; i < blocks->getLength(); i++) + { + DOM::Element block = (const DOM::Element&)blocks->item(i); + DOM::Node parent = block.getParentNode(); - fix.resize(0); - val.resize(0); + if(parent == NULL) + continue; - val = block.getAttribute(kAtFix); - if(val.length() > 0) - block.removeAttribute(kAtFix); + fix.resize(0); + val.resize(0); + // Figure out what kind of element they want block fixed to + val = block.getAttribute(kAtFix); + if(val.length() > 0) + block.removeAttribute(kAtFix); - if(val.length() > 0) - { - val = block.getAttributeNS("", kAtFix); + // BUG: Sablotron bug work around + if(val.length() > 0) + { + val = block.getAttributeNS("", kAtFix); if(val.length() > 0) - block.removeAttributeNS("", kAtFix); - } + block.removeAttributeNS("", kAtFix); + } - if(val.length() > 0) - DOM::transcode16to8(val, fix); + if(val.length() > 0) + DOM::transcode16to8(val, fix); if(fix.length() == 0) - fix = kElPara; + fix = kElPara; + // Create duplicate of the 'fix' element DOM::Element el = doc.createElement(fix); - DOMHelpers::copyAttributes(block, el, NULL); + DOMHelpers::copyAttributes(block, el, NULL); - while(block.hasChildNodes()) - el.appendChild(block.removeChild(block.getFirstChild())); + // Replace block with the given 'fix' element + while(block.hasChildNodes()) + el.appendChild(block.removeChild(block.getFirstChild())); - parent.replaceChild(el, block); + parent.replaceChild(el, block); } - } + } } -/** - * Consolidates a certain tag types at the end of the document - */ void RtfFixups::consolidateEndTags(DOM::Document& doc) { DOM::Element top = doc.getDocumentElement(); @@ -721,9 +670,6 @@ void RtfFixups::consolidateEndTags(DOM::Document& doc) } } -/** - * Consolidates a certain tag types at the start of the document - */ void RtfFixups::consolidateStartTags(DOM::Document& doc) { DOM::Element top = doc.getDocumentElement(); @@ -759,10 +705,7 @@ void RtfFixups::consolidateStartTags(DOM::Document& doc) } } -/** - * Removes adjacent duplicate nodes of certain names - */ -void RtfFixups::removeDuplicates(const DOM::Document& doc) +void RtfFixups::combineDuplicates(const DOM::Document& doc) { bool found; diff --git a/src/xmlfixups.h b/src/xmlfixups.h index a250c5a..1716925 100644 --- a/src/xmlfixups.h +++ b/src/xmlfixups.h @@ -41,23 +41,95 @@ #include "sablo.h" -class RtfFixups +/* + * XMLFixups + * + * Because RTF is so 'different' (read: brain dead) we need to do all sorts + * of antics to get it into a nice XML format. Some of the XML Composition + * is done in XMLComposer, but whatever can't be done there as we're parsing + * gets done here after the fact. + * + * These functions are called from XMLComposer::endDocument and massage the + * resulting XML DOM into shape. + */ +class XMLFixups { public: - // Cleanup Functions + // Replace blocks with 'fix' elements like paragraphs static void fixBlocks(DOM::Document doc); + + // Pass 2 list fixups static void fixLists(const DOM::Document doc); + + // Pass 2 style fixups static void fixStyles(const DOM::Document doc); + + /* + * Breaks a paragraph up through a previous level. Calls itself + * recursively to break paragraphs totally free up to containing + * destination. + * + * For example: + * + * + * This is a + * test of your concentration. + * + * + * Becomes: + * + * + * This is a + * test of your concentration. + * + */ static bool breakElement(const DOM::Element& el, const string& contain); + + // Break all tags of a given type to a previous level (see above) static void breakBreak(DOM::Document& doc, const string& contain, const string& tag); + + // Used to break tables cells and rows into blocks (but more complicated) + static void breakTags(DOM::Document& doc, const string& parentName, const string& tagName); + + // Fixes and combines list elements with the same id static void breakLists(DOM::Document& document); + + // Used to find and create tables and perform initial break out static void breakTables(DOM::Document& document); - static void breakTags(DOM::Document& doc, const string& parentName, const string& tagName); + + + /* + * Changes from a marker based paragraph system to a contained + * paragraph system. Also applies paragraph attributes to the + * appropriate paragraph. + * + * For example: + * + * + * This is a + * test of your concentration. + * + * + * Becomes: + * + * This is a + * test of your concentration. + */ static void breakBlocks(DOM::Document& document); + + // Wrap certain tags in a wrapper tag of given name static void wrapTags(DOM::Document& document, const string& tagName, const string& wrapName); + + // Remove certain tags from document static void removeTags(const DOM::Document& doc); - static void removeDuplicates(const DOM::Document& doc); + + // Combines certain adjacent duplicate tags + static void combineDuplicates(const DOM::Document& doc); + + // Consolidates a certain tag types at the beginning of the document static void consolidateStartTags(DOM::Document& doc); + + // Consolidates a certain tag types at the end of the document static void consolidateEndTags(DOM::Document& doc); }; -- cgit v1.2.3