diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/basehandler.cpp | 4 | ||||
| -rw-r--r-- | src/basehandler.h | 2 | ||||
| -rw-r--r-- | src/domhelpers.cpp | 167 | ||||
| -rw-r--r-- | src/domhelpers.h | 18 | ||||
| -rw-r--r-- | src/levelhandler.cpp | 26 | ||||
| -rw-r--r-- | src/levelhandler.h | 29 | ||||
| -rw-r--r-- | src/reference.h | 14 | ||||
| -rw-r--r-- | src/rtfformatting.h | 8 | ||||
| -rw-r--r-- | src/rtfparser.cpp | 294 | ||||
| -rw-r--r-- | src/rtfparser.h | 97 | ||||
| -rw-r--r-- | src/rtfx.cpp | 20 | ||||
| -rw-r--r-- | src/xmlcomposehelpers.h | 73 | ||||
| -rw-r--r-- | src/xmlcomposer.cpp | 475 | ||||
| -rw-r--r-- | src/xmlcomposer.h | 99 | ||||
| -rw-r--r-- | src/xmlfixups.cpp | 859 | ||||
| -rw-r--r-- | src/xmlfixups.h | 80 | 
16 files changed, 1298 insertions, 967 deletions
diff --git a/src/basehandler.cpp b/src/basehandler.cpp index 4820051..53e5a18 100644 --- a/src/basehandler.cpp +++ b/src/basehandler.cpp @@ -1,4 +1,4 @@ -/* +XXXXXXXXXXXXXXXXXXXXXXXXXXXx/*   * Copyright (c) 2004, Nate Nielsen   * All rights reserved.   * @@ -36,6 +36,8 @@   *   */ +// DELETE +  #include "usuals.h"  #include "basehandler.h" diff --git a/src/basehandler.h b/src/basehandler.h index 55c6e24..3becdf1 100644 --- a/src/basehandler.h +++ b/src/basehandler.h @@ -36,6 +36,8 @@   *   */ +// DELETE +  // BaseHandler  //  Implements an RtfHandler for other classes (LevelHandler  //  and RtfParser) to override. diff --git a/src/domhelpers.cpp b/src/domhelpers.cpp index 7b06f55..6cf8052 100644 --- a/src/domhelpers.cpp +++ b/src/domhelpers.cpp @@ -40,142 +40,139 @@  #include "domhelpers.h"  #include "tags.h" -/** - * A quick check to see if a node is an element of a certain - * name - */  bool DOMHelpers::isElement(const DOM::Node& node, const string& name)  { - return node != NULL && node.getNodeType() == DOM::Node::ELEMENT_NODE && -                node.getNodeName() == name; +    return node != NULL && node.getNodeType() == DOM::Node::ELEMENT_NODE && +              node.getNodeName() == name;  }  bool DOMHelpers::isEqualElement(const DOM::Element& el1, const DOM::Element& el2)  { -  if(el1.getNodeName() == el2.getNodeName()) -     return false; +    if(el1.getNodeName() == el2.getNodeName()) +        return false; - DOM::NamedNodeMap at1 = el1.getAttributes(); -   DOM::NamedNodeMap at2 = el2.getAttributes(); +    // Compare attributes +    DOM::NamedNodeMap at1 = el1.getAttributes(); +    DOM::NamedNodeMap at2 = el2.getAttributes(); -  if(at1 == NULL && at2 == NULL) -     return true; +    if(at1 == NULL && at2 == NULL) +        return true; -  if(at1 == NULL || at2 == NULL || -      at1->getLength() != at2->getLength()) -       return false; +    if(at1 == NULL || at2 == NULL || +       at1->getLength() != at2->getLength()) +        return false; - for(int i = 0; i < at1->getLength(); i++) -  { -      DOM::Attr attr1 = (const DOM::Attr&)at1->item(0); -      if(attr1 != NULL) -         return false; +    for(int i = 0; i < at1->getLength(); i++) +    { +        DOM::Attr attr1 = (const DOM::Attr&)at1->item(0); +        if(attr1 != NULL) +            return false; -     DOM::Attr attr2 = (const DOM::Attr&)at2->getNamedItem(attr1.getNodeName()); +        DOM::Attr attr2 = (const DOM::Attr&)at2->getNamedItem(attr1.getNodeName());          if(attr2 != NULL) -          return false; +            return false; -     if(attr1.getNodeValue() == attr2.getNodeValue()) -           return false; -  } +        if(attr1.getNodeValue() == attr2.getNodeValue()) +            return false; +    } -   return true; +    return true;  } -/** - * Gets the pertinent ancestor of this node, or returns null - * if not found. - */  DOM::Element DOMHelpers::getContainingElement(const DOM::Node& node, const string& name)  {      DOM::Node n = node; -   while(true) +    while(true)      { -      n = n.getParentNode(); -     if(n == NULL) -          break; +        n = n.getParentNode(); +        if(n == NULL) +            break; +        // Match parent to given name          if(isElement(n, name)) -         return (DOM::Element&)n; -   } +            return (DOM::Element&)n; +    } - return DOM::Element(); +    return DOM::Element();  }  bool isNsAttr(const string& name)  { -   return strncmp(name.c_str(), kNSPrefix, strlen(kNSPrefix)) ? false : true; +    // Check if this attribute is a xmlns: attribute +    return strncmp(name.c_str(), kNSPrefix, strlen(kNSPrefix)) ? false : true;  }  void DOMHelpers::copyAttributes(const DOM::Element& src, DOM::Element& dest, -                             const char** hideList) +                                const char** hideList)  { -    // Now get both sets of attributes - DOM::NamedNodeMap srcMap = src.getAttributes(); +    // Get both sets of attributes +    DOM::NamedNodeMap srcMap = src.getAttributes();      DOM::NamedNodeMap destMap = dest.getAttributes(); - if(srcMap == NULL || destMap == NULL) -      return; +    if(srcMap == NULL || destMap == NULL) +        return; -   // And copy them  from one to the other +    // And copy them  from one to the other      for(int j = 0; j < srcMap->getLength(); j++) -   { -      DOM::Node attr = srcMap->item(j); -      if(attr != NULL) -       { -          // BUG: Sablotron seems to have a bug in it's -          // setAttributeNode implementation. It always -          // adds a blank namespace -          // attr = attr.cloneNode(false); -           // if(attr != NULL) -            //   destMap.setNamedItem(attr); - -          string name = attr.getNodeName(); - -         if(hideList) -           { - -             for(const char** t = hideList; *t != NULL; t++) +    { +        DOM::Node attr = srcMap->item(j); +        if(attr != NULL) +        { +            string name = attr.getNodeName(); + +            if(hideList) +            { +                for(const char** t = hideList; *t != NULL; t++)                  { -                  if(name == *t) -                     name.erase(); -              } -          } - -         if(name.length() > 0 && !isNsAttr(name)) -               dest.setAttribute(attr.getNodeName(), attr.getNodeValue()); +                    if(name == *t) +                        name.erase(); +                } +            } + +            // BUG: Sablotron seems to have a bug in it's +            // setAttributeNode implementation. It always +            // adds a blank namespace +            // +            // attr = attr.cloneNode(false); +            // if(attr != NULL) +            //      destMap.setNamedItem(attr); + +            // We never copy xmlns: attributes +            if(name.length() > 0 && !isNsAttr(name)) +                dest.setAttribute(attr.getNodeName(), attr.getNodeValue());          } -  } +    }  }  DOM::Element DOMHelpers::getPriorElement(const DOM::Node& node, const string& name)  { - DOM::Node n = node; +    DOM::Node n = node; -   while(n != NULL) -   { -      if(isElement(n, name)) -         return (DOM::Element&)n; +    while(n != NULL) +    { +        // Note that we return ourselves if it matches +        if(isElement(n, name)) +            return (DOM::Element&)n; -      n = n.getPreviousSibling(); +        n = n.getPreviousSibling();      } - DOM::Node parent = node.getParentNode(); +    DOM::Node parent = node.getParentNode(); -  if(parent == NULL) -     return DOM::Element(); - else -       return getPriorElement(parent, name); +    if(parent == NULL) +        return DOM::Element(); +    else +        return getPriorElement(parent, name);  }  void DOMHelpers::insertAfter(DOM::Node& parent, const DOM::Node& node, -                         const DOM::Node& ref) +                             const DOM::Node& ref)  {      DOM::Node sibling = ref.getNextSibling(); -  if(sibling == NULL) + +    if(sibling == NULL)          parent.appendChild(node); -  else -       parent.insertBefore(node, sibling); +    else +        parent.insertBefore(node, sibling);  } - diff --git a/src/domhelpers.h b/src/domhelpers.h index 16afd79..043ffd4 100644 --- a/src/domhelpers.h +++ b/src/domhelpers.h @@ -41,16 +41,32 @@  #include "sablo.h" +/* + * DOMHelpers + * + * A collection of functions for doing some things with an XML DOM. + * Used mainly by XMLComposer. + */  class DOMHelpers  {  public: -    // DOM Helper Functions +    // Check if given node is an element with a certain name      static bool isElement(const DOM::Node& node, const string& name); + +    // Check if two elements have the same name and attributes      static bool isEqualElement(const DOM::Element& el1, const DOM::Element& el2); + +    // Copy attributes from one element to another optionaly ignoring some      static void copyAttributes(const DOM::Element& src, DOM::Element& dest, const char** hideList); + +    // Insert a child node after a given reference node      static void insertAfter(DOM::Node& parent, const DOM::Node& node, const DOM::Node& ref); + +    // Get containing element of a given name      static DOM::Element getContainingElement(const DOM::Node& node, const string& name); + +    // Get previous element (in XML flow) of a given name      static DOM::Element getPriorElement(const DOM::Node& node, const string& name);  }; diff --git a/src/levelhandler.cpp b/src/levelhandler.cpp index 7fc2dd6..cdd3337 100644 --- a/src/levelhandler.cpp +++ b/src/levelhandler.cpp @@ -39,9 +39,12 @@  #include "usuals.h"  #include "levelhandler.h" +/* ---------------------------------------------------------------------------------- + *  CONSTRUCTION + */  LevelHandler::LevelHandler()  { - +    m_reader = NULL;  }  LevelHandler::~LevelHandler() @@ -54,12 +57,18 @@ void LevelHandler::clear()  	m_curLevel.release();  	m_topLevel.release(); -	BaseHandler::clear(); +    m_parser = NULL;  } -void LevelHandler::startDocument(RtfReader* reader) +/* ---------------------------------------------------------------------------------- + *  OVERRIDES + */ + +void LevelHandler::startDocument(RtfParser* parser)  { -	BaseHandler::startDocument(reader); +    clear(); + +    m_parser = parser;  	m_topLevel = new Level;  	m_curLevel = m_topLevel; @@ -67,12 +76,11 @@ void LevelHandler::startDocument(RtfReader* reader)  void LevelHandler::endDocument()  { -	BaseHandler::endDocument(); +  }  void LevelHandler::groupStart()  { -	BaseHandler::groupStart();  	ASSERT(m_curLevel);  	pushLevel();  } @@ -81,9 +89,12 @@ void LevelHandler::groupEnd()  {  	ASSERT(m_curLevel);  	popLevel(); -	BaseHandler::groupEnd();  } +/* ---------------------------------------------------------------------------------- + *  OPERATIONS + */ +  DOM::Element LevelHandler::getElement()  {  	ASSERT(m_curLevel); @@ -101,7 +112,6 @@ void LevelHandler::popLevel()  	// Pull a level off the stack  	LevelPtr level = m_curLevel->getPrevious(); -	// TODO: report errors here  	if(level)  		m_curLevel = level;  } diff --git a/src/levelhandler.h b/src/levelhandler.h index 3077c13..bee1a8e 100644 --- a/src/levelhandler.h +++ b/src/levelhandler.h @@ -44,36 +44,49 @@  #define __LEVELHANDLER_H__  #include "rtfreader.h" -#include "basehandler.h" -#include "rtfparsehelpers.h"  #include "reference.h" +#include "rtfparsehelpers.h" +/* + * LevelHandler + * + * A base class that manages a set of Levels (see XMLComposeHelpers.cpp) + * based on the RTF groups seen. + */  class LevelHandler -	: public BaseHandler +	: public RTFHandler  {  public:  	LevelHandler();  	virtual ~LevelHandler(); -	virtual void startDocument(RtfReader* reader); +    // Overrides +	virtual void startDocument(RtfParser* parser);  	virtual void endDocument();  	virtual void groupStart();  	virtual void groupEnd(); +    // Convenience function to get XML element from current level  	virtual DOM::Element getElement(); +    // Create a new level on top of stack  	void pushLevel(); + +    // Pop top level and discard  	void popLevel(); + +    // Back out all the way past a given level  	void rewindLevel(LevelPtr ptr); -	LevelPtr getLevel(); +    // Get the current level +	LevelPtr getLevel();  protected:  	virtual void clear(); -	LevelPtr m_topLevel; -	LevelPtr m_curLevel; +	LevelPtr m_topLevel;       // First level +	LevelPtr m_curLevel;       // The current level +    RtfParser* m_parser;       // The parser we're listening to  }; -  #endif // __LEVELHANDLER_H__ diff --git a/src/reference.h b/src/reference.h index 1e68515..1a78d4c 100644 --- a/src/reference.h +++ b/src/reference.h @@ -39,6 +39,11 @@  #ifndef __REFERENCE_H__  #define __REFERENCE_H__ +/* + * Reference + * + * A basic reference counting pointer + */  template<typename C>  class Reference  { @@ -60,9 +65,7 @@ public:  	}  	~Reference() -	{ -		release(); -	} +    	{ release(); }  	Reference(const Reference& orig)  	{ @@ -126,6 +129,11 @@ private:  	C* m_ptr;  }; +/* + * Instance + * + * A basic reference counted object. + */  class Instance  {  public: diff --git a/src/rtfformatting.h b/src/rtfformatting.h index 6fbcf57..bb49cf1 100644 --- a/src/rtfformatting.h +++ b/src/rtfformatting.h @@ -39,6 +39,13 @@  #ifndef __RTFTEXPROPERTIES_H__  #define __RTFTEXPROPERTIES_H__ +/* + * RtfFormatting + * + * For keeping track of all the various transient formatting options + * within a given Rtf group. Any supported text options (not block) + * should be added here. + */  class RtfFormatting  {  public: @@ -161,7 +168,6 @@ protected:  	int m_list;  	bool m_inTbl; -  	// TODO: Character styles  }; diff --git a/src/rtfparser.cpp b/src/rtfparser.cpp index 6d07c80..78945b6 100644 --- a/src/rtfparser.cpp +++ b/src/rtfparser.cpp @@ -36,6 +36,8 @@   *   */ +// RENAME RTFParser.cpp +  #include "usuals.h"  #include <stdlib.h> @@ -43,11 +45,11 @@  #include "rtfreader.h" -const int RtfHandler::kAsterisk = 0x00000001; -const int RtfHandler::kHasParam = 0x00000002; -const int RtfHandler::kIsEncoded = 0x00000004; +/* ---------------------------------------------------------------------------------- + *  CONSTRUCTION + */ -RtfReader::RtfReader() +RTFParser::RTFParser()  {  	m_handler = NULL;  	m_depth = 0; @@ -57,12 +59,17 @@ RtfReader::RtfReader()  	m_uniEatStack.push(0);  } -RtfReader::~RtfReader() +RTFParser::~RTFParser()  {  } -bool RtfReader::parse(string fileName) + +/* ---------------------------------------------------------------------------------- + *  PUBLIC METHODS + */ + +bool RTFParser::parse(string fileName)  {  	FILE* file = fopen(fileName.c_str(), "r");  	if(!file) @@ -75,26 +82,134 @@ bool RtfReader::parse(string fileName)  	return ret;  } -void RtfReader::emptyData(RtfContext& cx) +bool RTFParser::parse(FILE* file) +{ +    int ch = 0; + +    // The group depth +    m_depth = 0; +    m_parseErrors = ""; + +    RtfContext cx; +    cx.isData = false; +    cx.file = file; +    cx.data = L""; + +    if(m_handler) +        m_handler->startDocument(this); + +    while(1) +    { +        ch = fgetc(file); +        if(ch == EOF) +            goto done; + +        // TODO: Do we need this ? +        if(!cx.isData) +        { +            switch(ch) +            { + +            // Starting a control word +            case '\\': +                if(!parseControlWord(cx)) +                    goto done; +                break; + +            // Starting an RTF group +            case '{': +                { +                    // Send all previous data +                    flushData(cx); + +                    // Handle any unicode destinations properly +                    m_uniEatStack.push(m_uniEatStack.top()); + +                    if(m_handler) +                        m_handler->groupStart(); + +                    m_depth++; +                } +                break; + +            case '}': +                { +                    // Send all previous data +                    flushData(cx); + +                    if(m_handler) +                        m_handler->groupEnd(); + +                    // Handle any unicode destinations properly +                    if(!m_uniEatStack.empty()) +                        m_uniEatStack.pop(); + +                    m_depth--; +                } +                break; + +            default: +                cx.isData = true; +                break; +            } +        } + +        if(cx.isData) +        { +            // We translate tabs into the appropriate control word +            if(ch == '\t') +                sendControlWord(cx, "tab", 0, -1); + +            // line endings aren't used +            else if(!strchr("\r\n", ch)) +               sendData(cx, ch); + +            cx.isData = false; +        } +    } + + +done: + +    if(m_depth != 0) +        m_parseErrors.append("unmatched braces\n"); + +    if(m_handler) +        m_handler->endDocument(); + +    // If any parse errors return failure +    return m_parseErrors.empty(); +} + + +/* ---------------------------------------------------------------------------------- + *  HANDLER CALLS + */ + +void RTFParser::flushData(RtfContext& cx)  {  	if(!cx.data.empty())  	{  		if(m_handler)  			m_handler->charData(cx.data); +  		cx.data.resize(0);  	}  } -void RtfReader::sendData(RtfContext& cx, wchar_t ch) +void RTFParser::sendData(RtfContext& cx, wchar_t ch)  { +    // Skip unicode chars we've been asked to  	if(m_uniEat > 0)  		m_uniEat--; +  	else  		cx.data.append(1, ch);  } -void RtfReader::sendData(RtfContext& cx, wstring data) +void RTFParser::sendData(RtfContext& cx, wstring data)  { +    // Skip any unicode chars we've been asked to  	if(m_uniEat > 0)  	{  		int len = data.size(); @@ -110,16 +225,24 @@ void RtfReader::sendData(RtfContext& cx, wstring data)  	}  } -void RtfReader::sendControlWord(RtfContext& cx, string cw, int flags, int param) +void RTFParser::sendControlWord(RtfContext& cx, string cw, int flags, int param)  { -	emptyData(cx); +	flushData(cx); +  	if(m_handler)  		m_handler->controlWord(cw, flags, param);  } -bool RtfReader::parseHexChar(RtfContext& cx, int num) + +/* ---------------------------------------------------------------------------------- + *  PARSE HELPERS + */ + +bool RTFParser::parseHexChar(RtfContext& cx, int num)  {  	string data; + +    // Get num chars and put them in the string  	for(int i = 0; i < num; i++)  	{  		char ch = fgetc(cx.file); @@ -139,6 +262,7 @@ bool RtfReader::parseHexChar(RtfContext& cx, int num)  		}  	} +    // If parsing hex, then convert to appropriate unicode  	if(m_parseHex)  	{  		char* end = NULL; @@ -148,6 +272,9 @@ bool RtfReader::parseHexChar(RtfContext& cx, int num)  		else  			m_parseErrors.append("invalid hex char: " + data + "\n");  	} + +    // TODO: Why would we ever want to do this? +    // Otherwise just send as a hex control word  	else  	{  		sendControlWord(cx, data, RtfHandler::kIsEncoded, -1); @@ -156,7 +283,7 @@ bool RtfReader::parseHexChar(RtfContext& cx, int num)  	return true;  } -bool RtfReader::parseControlWord(RtfContext& cx) +bool RTFParser::parseControlWord(RtfContext& cx)  {  	bool isAsterisk = false;  	string controlword; @@ -171,7 +298,7 @@ bool RtfReader::parseControlWord(RtfContext& cx)  		bool empty = controlword.empty();  		// Part of the name of a control word -		// NOTE: Although the RTF specification prohibits upercase +		// NOTE: Although the RTF specification prohibits uppercase  		// control words, MS Word uses them :-/  		if(ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z')  			controlword.append(1, (char)ch); @@ -211,16 +338,16 @@ bool RtfReader::parseControlWord(RtfContext& cx)  			break;  		} -    // Escaped braces -    else if(empty && ch == '{') -    { -      sendData(cx, L'{'); -    } +        // Escaped braces +        else if(empty && ch == '{') +        { +            sendData(cx, L'{'); +        } -    else if(empty && ch == '}') -    { -      sendData(cx, L'}'); -    } +        else if(empty && ch == '}') +        { +            sendData(cx, L'}'); +        }  		// Non breaking space  		else if(empty && ch == '~') @@ -242,18 +369,7 @@ bool RtfReader::parseControlWord(RtfContext& cx)  			param.append(1, (char)ch);  		} -		// TODO: This looks real hokey and acts that -		// way too -#if 0 -		// An enter as the first character of a control word -		// makes a paragraph -		else if(strchr("\n\r", ch)) -		{ -			controlword = "par"; -			break; -		} -#endif -		// Space end a rtf code (but get eaten) +		// Space at end a rtf code (it gets eaten)  		else if(strchr(" ", ch))  			break; @@ -331,111 +447,3 @@ bool RtfReader::parseControlWord(RtfContext& cx)  	return true;  } -bool RtfReader::parse(FILE* file) -{ -	m_depth = 0; -	m_parseErrors = ""; - -	int ch = 0; - -	RtfContext cx; -	cx.isData = false; -	cx.file = file; -	cx.data = L""; - -	if(m_handler) -		m_handler->startDocument(this); - -	while(1) -	{ -		ch = fgetc(file); -		if(ch == EOF) -			goto done; - -		// Type is undetermined so we figure it out here -		if(!cx.isData) -		{ -			switch(ch) -			{ -			case '\\': -				if(!parseControlWord(cx)) -					goto done; -				break; - -			case '{': -				{ -					emptyData(cx); - -					m_uniEatStack.push(m_uniEatStack.top()); - -					if(m_handler) -						m_handler->groupStart(); - -					m_depth++; -				} -				break; - -			case '}': -				{ -					emptyData(cx); - -					if(m_handler) -						m_handler->groupEnd(); - -					if(!m_uniEatStack.empty()) -						m_uniEatStack.pop(); - -					m_depth--; -				} -				break; - -			default: -				cx.isData = true; -				break; -			} -		} - -		if(cx.isData) -		{ -			// We translate tabs into the appropriate control -			// word -			if(ch == '\t') -				sendControlWord(cx, "tab", 0, -1); - -// Don't need this code, the XML outputter -// Takes care of it for us -#if 0 -			if(ch == '&') -				sendData(cx, L"&"); - -			else if(ch == '\'') -				sendData(cx, L"'"); - -			else if(ch == '"') -				sendData(cx, L"""); - -			else if(ch == '<') -				sendData(cx, L"<"); - -			else if(ch == '>') -				sendData(cx, L">"); -#endif - -			// enters a -			else if(!strchr("\r\n", ch)) -				sendData(cx, ch); - -			cx.isData = false; -		} -	} - -done: - -	if(m_depth != 0) -		m_parseErrors.append("unmatched braces\n"); - -	if(m_handler) -		m_handler->endDocument(); - -	return m_parseErrors.empty(); -} diff --git a/src/rtfparser.h b/src/rtfparser.h index bfa2e59..6b9e10d 100644 --- a/src/rtfparser.h +++ b/src/rtfparser.h @@ -36,6 +36,8 @@   *   */ +// RENAME RTFParser.h +  #ifndef __RTFREADER_H__  #define __RTFREADER_H__ @@ -43,29 +45,22 @@  #include <stack>  #include <stdio.h> -class RtfReader; +class RtfHandler; -class RtfHandler -{ -public: -	virtual void startDocument(RtfReader* reader) = 0; -	virtual void endDocument() = 0; -  virtual void controlWord(const string& cw, int flags, int param) = 0; -	virtual void groupStart() = 0; -	virtual void groupEnd() = 0; -	virtual void charData(wstring data) = 0; - -	static const int kAsterisk; -	static const int kHasParam; -	static const int kIsEncoded; -}; - -class RtfReader +/* + * RTFParser + * + * A class that parses the RTF into it's tags and groups etc... It feeds its + * parsed data into into a handler interface (see below) for processing. + * + * Performs some basic conversion and sanity checking (unicode chars etc...) + * as well. + */ +class RtfParser  {  public: -	RtfReader(); -	virtual ~RtfReader(); - +	RtfParser(); +	virtual ~RtfParser();  	bool parse(string fileName);  	bool parse(FILE* file); @@ -81,12 +76,13 @@ public:  	void setUnicode(bool unicode);  protected: -	RtfHandler* m_handler; -	int m_depth; -	bool m_parseHex; -	string m_parseErrors; +	RtfHandler* m_handler;         // The current handler +	int m_depth;                   // To keep track of group depth +	bool m_parseHex;               // Whether to parse hex chars or not +	string m_parseErrors;          // A list of all the RTF parsing errors -	// Unicode handling +    // TODO: Look at exactly what this is doing +    // Unicode char handling  	bool m_parseUnicode;  	typedef std::stack<int> StackInt;  	StackInt m_uniEatStack; @@ -94,19 +90,62 @@ protected:  private: +    // TODO: Why aren't these just members? +  	struct RtfContext  	{ -		FILE* file; -		bool isData; -		wstring data; +		FILE* file;       // The current file being parsed +		wstring data;     // Any data stored up ready to be sent to handler +        bool isData;      // TODO: Do we need this?  	}; +    // Parse helpers  	bool parseControlWord(RtfContext& cx);  	bool parseHexChar(RtfContext& cx, int num); + +    // Convenience functions for calling the handler  	void sendControlWord(RtfContext& cx, string cw, int flags, int param);  	void sendData(RtfContext& cx, wchar_t ch);  	void sendData(RtfContext& cx, wstring data); -	void emptyData(RtfContext& cx); +	void flushData(RtfContext& cx); +}; + +/* + * RTFHandler + * + * An interface called by RTFParser with tags and groups etc... parsed from + * an RTF file. + */ +class RtfHandler +{ +public: + +    // Called at the beginning of the document +    virtual void startDocument(RtfReader* reader) = 0; + +    // Called at the end of the document +    virtual void endDocument() = 0; + +    // Called when an RTF control word is hit. Flags below. +    // If control word has no param then param is -1 +    virtual void controlWord(const string& cw, int flags, int param) = 0; + +    // Called when an RTF group opened +    virtual void groupStart() = 0; + +    // Called when an RTF group is closed +    virtual void groupEnd() = 0; + +    // A block of character data encountered +    virtual void charData(wstring data) = 0; + +    // Flags for controlWord +    enum +    { +        kAsterisk = 0x00000001, +        kHasParam = 0x00000002, +        kIsEncoded = 0x00000004 +    };  };  #endif // __RTFREADER_H__ diff --git a/src/rtfx.cpp b/src/rtfx.cpp index 7576d51..a620498 100644 --- a/src/rtfx.cpp +++ b/src/rtfx.cpp @@ -57,19 +57,25 @@ int main(int argc, char* argv[])  	try  	{ +        // The input file  		FILE* file = fopen(argv[1], "rb");  		if(!file)  		{ -			fprintf(stderr, "rtfx: couldn't open file: %s: %s\n", argv[1], strerror(errno)); +			fprintf(stderr, "rtfx: couldn't open rtf file: %s: %s\n", argv[1], strerror(errno));  			return 1;  		} +        // Default options  		RtfParserOptions options; -		RtfParser handler(options); -		RtfReader rtf; -		rtf.setHandler(&handler); +        // Reads RTF tags and blocks +        RtfParser rtf; + +        // Interprets tags and blocks from RTFParser +		XMLComposer composer(options); +		rtf.setHandler(&composer); +  		bool ret = rtf.parse(file);  		fclose(file); @@ -79,8 +85,9 @@ int main(int argc, char* argv[])  			return 1;  		} - -		DOM::Document doc = handler.getDocument(); +        // TODO: This is disgusting. We need to bug the sablotron guys +        // for a better way to serialize a document. +		DOM::Document doc = composer.getDocument();  		string xml = doc.serialize();  		FILE* out = fopen(argv[2], "wb"); @@ -93,7 +100,6 @@ int main(int argc, char* argv[])  		fwrite(xml.c_str(), 1, xml.length(), out);  		fclose(out);  		return 0; -  	}  	catch(DOM::DOMException& e)  	{ diff --git a/src/xmlcomposehelpers.h b/src/xmlcomposehelpers.h index f91923e..9d36ef5 100644 --- a/src/xmlcomposehelpers.h +++ b/src/xmlcomposehelpers.h @@ -36,6 +36,9 @@   *   */ +// RENAME: XMLComposeHelpers.h +// Possibly merge with XMLComposer.h +  #ifndef __RTFPARSEHELPERS_H__  #define __RTFPARSEHELPERS_H__ @@ -44,8 +47,15 @@  #include "sablo.h"  #include "rtfformatting.h" -class RtfParser; +class XMLComposer; +/* + * Destination + * + * A destination is a small class that handles the character data found + * in the RTF document. Depending on the current context in the RTF + * different destinations are used. + */  class Destination :  	public Instance  { @@ -58,11 +68,17 @@ public:  	virtual void done() {};  protected: -	RtfParser* m_parser; -	friend class RtfParser; +	XMLComposer* m_composer; +	friend class XMLComposer;  }; - +/* + * Analyser + * + * An analyser is a small class that handles the RTF control words. + * Depending on the current context in the RTF different analysers + * are used. + */  class Analyser :  	public Instance  { @@ -79,16 +95,40 @@ public:  	virtual void done() {};  protected: -	RtfParser* m_parser; -	friend class RtfParser; +	XMLComposer* m_composer; +	friend class XMLComposer;  };  class Level; +// Reference counted pointers  typedef Reference<Destination> DestinationPtr;  typedef Reference<Analyser> AnalyserPtr;  typedef Reference<Level> LevelPtr; +/* + * Level + * + * A level is a combination of a Destination, Analyser, XML Element and + * some other options. They're used in a stack to push and pop these as + * RTF groups are found. + * + * About the stack: + * Not each level has it's own options. If a certain option isn't found + * in the current level the previous one is looked up. That's what all + * the 'deep' stuff is about below: + * + * get* methods: + * When 'deep' is set look to previous levels for the given object if not + * found at the current level. When not set returns object in current level + * or null when none exists here. + * + * set* methods: + * When 'deep' is set then replace the object currently being used at it's + * own level. So if get* would return an object from a previous level, with + * deep set to true it would replace that object in the given level. When + * not set, then the object is set in the current level. + */  class Level :  	public Instance  { @@ -99,24 +139,33 @@ public:  	LevelPtr getPrevious();  	LevelPtr pushLevel(); +    // The current XML Element +    // TODO: Add deep semantics here  	DOM::Element getElement();  	void setElement(DOM::Element element, bool deep = false); + +    // The current Analyser  	AnalyserPtr getAnalyser(bool deep = true);  	void setAnalyser(AnalyserPtr analyser, bool deep = false); + +    // The current Destination  	DestinationPtr getDestination(bool deep = true);  	void setDestination(DestinationPtr destination, bool deep = false); + +    // The current formatting options  	RtfFormatting& getFormatting();  	void setTextProperties(RtfFormatting& textProperties);  protected: + +    // Constructor for stacking levels  	Level(const Level& level); -	LevelPtr m_previous; -	DOM::Element m_element; -	RtfFormatting m_text; -	DestinationPtr m_destination; -	AnalyserPtr m_analyser; +	LevelPtr m_previous;           // The previous level +	DOM::Element m_element;        // XML Element for this level +	RtfFormatting m_text;          // Formatting options for this level +	DestinationPtr m_destination;  // Destination for this level +	AnalyserPtr m_analyser;        // Analyser for this level  }; -  #endif //__RTFPARSEHELPERS_H__ diff --git a/src/xmlcomposer.cpp b/src/xmlcomposer.cpp index 7e74f70..6072375 100644 --- a/src/xmlcomposer.cpp +++ b/src/xmlcomposer.cpp @@ -42,20 +42,21 @@  #include "domhelpers.h"  #include "tags.h" -////////////////////////////////////////////////////////////////////// -// Construction/Destruction -////////////////////////////////////////////////////////////////////// +/* ---------------------------------------------------------------------------------- + *  CONSTRUCTION + */ -RtfParser::RtfParser(const RtfParserOptions& options) +XmlComposer::XmlComposer(const RtfParserOptions& options)  {  	m_document = NULL;  	memcpy(&m_options, &options, sizeof(options)); +    // All autocounters start at 1      for(int i = 0; i < AUTOCOUNT_MAX; i++)          m_autocount[i] = 1;  } -RtfParser::~RtfParser() +XmlComposer::~XmlComposer()  {  	clear(); @@ -63,7 +64,7 @@ RtfParser::~RtfParser()  		m_impl.release();  } -void RtfParser::clear() +void XmlComposer::clear()  {  	if(m_document != NULL)  	{ @@ -78,9 +79,15 @@ void RtfParser::clear()  	LevelHandler::clear();  } -void RtfParser::startDocument(RtfReader* reader) + +/* ---------------------------------------------------------------------------------- + *  HANDLER OVERRIDES + */ + +void XmlComposer::startDocument(RtfReader* reader)  {  	LevelHandler::startDocument(reader); +    ASSERT(m_curLevel != NULL);  	// Create a new document  	m_document = m_impl.createDocument("", kElDoc, DOM::DocumentType()); @@ -88,7 +95,7 @@ void RtfParser::startDocument(RtfReader* reader)  	// TODO: Throw error if document is null  	ASSERT(m_document != NULL); -	ASSERT(m_curLevel != NULL); +    // Hook up the top level element  	m_curLevel->setElement(m_document.getDocumentElement(), true);  	// Set the attributes on the top level @@ -98,59 +105,106 @@ void RtfParser::startDocument(RtfReader* reader)  	getTextFormatting().resetText();  } -void RtfParser::endDocument() +void XmlComposer::endDocument()  {  	LevelHandler::endDocument(); -	// Cleanup the tree +	// Pass 0: Cleanup the tree  	RtfFixups::removeDuplicates(m_document);  	RtfFixups::consolidateStartTags(m_document);      RtfFixups::consolidateEndTags(m_document); + +    // Pass 1: Block breakout  	RtfFixups::breakTables(m_document);  	RtfFixups::breakTags(m_document, kElTable, kElRow);  	RtfFixups::breakTags(m_document, kElRow, kElCell);  	RtfFixups::wrapTags(m_document, kElCell, kElDest);  	RtfFixups::breakBlocks(m_document);  	RtfFixups::breakLists(m_document); + +    // Pass 2: Fixups  	RtfFixups::fixLists(m_document);  	RtfFixups::fixStyles(m_document);  	RtfFixups::fixBlocks(m_document);  	RtfFixups::removeTags(m_document);  	RtfFixups::breakBreak(m_document, kElDoc, kElPage);  	RtfFixups::breakBreak(m_document, kElDoc, kElSect); + +    // Pass 3: Final cleanup      RtfFixups::removeDuplicates(m_document);  	return;  } +void XmlComposer::charData(wstring data) +{ +    ASSERT(m_curLevel != NULL); +    DestinationPtr destination = m_curLevel->getDestination(); +    if(destination) +    { +        destination->charData(data); +    } +    else +    { +        // TODO: Change this so it sends char data to new destination +        // We should always have a destination +        destination = DestinationPtr(new Content); +        setDestination(destination); +    } +} +void XmlComposer::controlWord(const string& cw, int flags, int param) +{ +    ASSERT(m_curLevel != NULL); +    AnalyserPtr analyser = m_curLevel->getAnalyser(); +    if(analyser) +        analyser->controlWord(cw, flags, param); +} +void XmlComposer::groupStart() +{ +    LevelHandler::groupStart(); +    ASSERT(m_curLevel != NULL); +    AnalyserPtr analyser = m_curLevel->getAnalyser(); +    if(analyser) +        analyser->groupStart(); +} -// ----------------------------------------------------------------------- -//  Helper functions +void XmlComposer::groupEnd() +{ +    LevelHandler::groupEnd(); + +    ASSERT(m_curLevel != NULL); +    AnalyserPtr analyser = m_curLevel->getAnalyser(); +    if(analyser) +        analyser->groupEnd(); +} + + +/* ---------------------------------------------------------------------------------- + *  HELPER FUNCTIONS + */ -DOM::Element RtfParser::createElement(const string& name) +DOM::Element XmlComposer::createElement(const string& name)  {  	ASSERT(name.length() > 0);  	return m_document.createElement(name); - -	// TODO: Throw exception here if necessary  } -void RtfParser::replaceElement(const DOM::Element& element) +void XmlComposer::replaceElement(const DOM::Element& element)  {  	ASSERT(m_curLevel != NULL);  	m_curLevel->setElement(element, true);  } -void RtfParser::pushElement(const DOM::Element& element) +void XmlComposer::pushElement(const DOM::Element& element)  {  	ASSERT(m_curLevel != NULL);  	getElement().appendChild(element);  	m_curLevel->setElement(element);  } -DOM::Element RtfParser::popElement() +DOM::Element XmlComposer::popElement()  {  	DOM::Element element = getElement();  	ASSERT(m_curLevel != NULL); @@ -163,7 +217,7 @@ DOM::Element RtfParser::popElement()  	return element;  } -void RtfParser::setAttribute(const string& name, const wstring& value, DOM::Element el) +void XmlComposer::setAttribute(const string& name, const wstring& value, DOM::Element el)  {  	ASSERT(name.length() > 0);  	if(el == NULL) @@ -171,7 +225,7 @@ void RtfParser::setAttribute(const string& name, const wstring& value, DOM::Elem  	el.setAttribute(name, value);  } -void RtfParser::setAttribute(const string& name, int value, DOM::Element el) +void XmlComposer::setAttribute(const string& name, int value, DOM::Element el)  {      ASSERT(name.length() > 0);      if(el == NULL) @@ -179,153 +233,116 @@ void RtfParser::setAttribute(const string& name, int value, DOM::Element el)      el.setAttribute(name, formatInt(value));  } -void RtfParser::setDestination(DestinationPtr dest) +void XmlComposer::setDestination(DestinationPtr dest)  {  	ASSERT(m_curLevel);  	m_curLevel->setDestination(dest); -	dest->m_parser = this; +	dest->m_composer = this;  	dest->initialize();  } -DestinationPtr RtfParser::replaceDestination(DestinationPtr dest) +DestinationPtr XmlComposer::replaceDestination(DestinationPtr dest)  {  	ASSERT(m_curLevel);  	DestinationPtr old = m_curLevel->getDestination();  	m_curLevel->setDestination(dest, true); -	dest->m_parser = this; +	dest->m_composer = this;  	dest->initialize();  	return old;  } -void RtfParser::setAnalyser(AnalyserPtr analy) +void XmlComposer::setAnalyser(AnalyserPtr analy)  {  	ASSERT(m_curLevel);  	ASSERT(analy != NULL); -	analy->m_parser = this; +	analy->m_composer = this;  	m_curLevel->setAnalyser(analy);  	analy->initialize();  } -AnalyserPtr RtfParser::getAnalyser() +AnalyserPtr XmlComposer::getAnalyser()  {  	ASSERT(m_curLevel);  	return m_curLevel->getAnalyser();  } -DestinationPtr RtfParser::getDestination() +DestinationPtr XmlComposer::getDestination()  {  	ASSERT(m_curLevel);  	return m_curLevel->getDestination();  } -RtfFormatting& RtfParser::getTextFormatting() +RtfFormatting& XmlComposer::getTextFormatting()  {  	ASSERT(m_curLevel);  	return m_curLevel->getFormatting();  } -int RtfParser::getAutoCount(int type) +int XmlComposer::getAutoCount(int type)  {      ASSERT(type < AUTOCOUNT_MAX);      return m_autocount[type];  } -void RtfParser::incrementAutoCount(int type) +void XmlComposer::incrementAutoCount(int type)  {      ASSERT(type < AUTOCOUNT_MAX);      m_autocount[type]++;  } -// --------------------------------------------------------------------------------- -//  Pass this stuff on through to the appropriate analysers etc... - -void RtfParser::charData(wstring data) +wstring XmlComposer::formatInt(int num)  { -	ASSERT(m_curLevel != NULL); -	DestinationPtr destination = m_curLevel->getDestination(); -	if(destination) -	{ -		destination->charData(data); -	} -	else -	{ -		destination = DestinationPtr(new Content); -		setDestination(destination); -	} - -} +  char buff[16]; -void RtfParser::controlWord(const string& cw, int flags, int param) -{ -	ASSERT(m_curLevel != NULL); -	AnalyserPtr analyser = m_curLevel->getAnalyser(); -	if(analyser) -		analyser->controlWord(cw, flags, param); -} +  // Certain OSs don't support swprintf :( +  sprintf(buff, "%d", num); -void RtfParser::groupStart() -{ -	LevelHandler::groupStart(); +  wstring n; +  for(char* s = buff; *s; s++) +    n.append(1, *s); -	ASSERT(m_curLevel != NULL); -	AnalyserPtr analyser = m_curLevel->getAnalyser(); -	if(analyser) -		analyser->groupStart(); +  return n;  } -void RtfParser::groupEnd() -{ -	ASSERT(m_curLevel != NULL); -    bool done = true; -	LevelHandler::groupEnd(); - -	AnalyserPtr analyser = m_curLevel->getAnalyser(); -    if(analyser) -		analyser->groupEnd(); -} +/* ---------------------------------------------------------------------------------- + *  CONVENIENCE MACROS USED BELOW + */ -#define ON_INITIALIZE(cls) \ -	void RtfParser::cls::initialize() -#define ON_CONTROLWORD(cls) \ -	void RtfParser::cls::controlWord(const string& cw, int flags, int param) -#define ON_CHARDATA(cls) \ -	void RtfParser::cls::charData(wstring data) -#define ON_GROUPSTART(cls) \ -	void RtfParser::cls::groupStart() -#define ON_GROUPEND(cls) \ -	void RtfParser::cls::groupEnd() -#define ON_DONE(cls) \ -	void RtfParser::cls::done()  #define AN_ELEMENT(name) \ -	m_parser->pushElement(m_parser->createElement(name)) + m_composer->pushElement(m_composer->createElement(name))  #define AN_POP_ELEMENT() \ -	m_parser->popElement() +    m_composer->popElement()  #define AN_ATTRIBUTE(name, value) \ -	m_parser->setAttribute(name, value) +   m_composer->setAttribute(name, value)  #define AN_DESTINATION_ATTR(name) \ -	m_parser->setDestination(new Attribute(name)) +  m_composer->setDestination(new Attribute(name))  #define AN_DESTINATION(cls) \ -	m_parser->setDestination(new cls) +  m_composer->setDestination(new cls)  #define AN_ANALYSER(cls) \ -	m_parser->setAnalyser(AnalyserPtr(new cls)) + m_composer->setAnalyser(AnalyserPtr(new cls))  #define AN_SET_ANALYSER(cls) \ -	m_parser->setAnalyser(AnalyserPtr(cls)) +   m_composer->setAnalyser(AnalyserPtr(cls))  #define HAS_PARAM (flags & kHasParam)  #define DEFAULT_CONTROLWORD processDefault(cw, flags, param) -#define DUMMY	1 == 1 -#define NUM_ATTR(n) m_parser->formatInt(n) +#define DUMMY  1 == 1 + + +/* ---------------------------------------------------------------------------------- + *  BASE ANALYSER + */ -bool RtfParser::ParseAnalyser::processDefault(const string& cw, int flags, int param) +bool XmlComposer::BaseAnalyser::processDefault(const string& cw, int flags, int param)  { +    // Unicode blocks go to a special analyser  	if(cw == "upr")  	{ -		AnalyserPtr analy = m_parser->getAnalyser(); +		AnalyserPtr analy = m_composer->getAnalyser();  		ASSERT(analy != NULL);  		AN_SET_ANALYSER(new Upr(analy));  		return true; @@ -334,41 +351,41 @@ bool RtfParser::ParseAnalyser::processDefault(const string& cw, int flags, int p  	return false;  } -void RtfParser::ParseAnalyser::applyParaFormatting(RtfFormatting* format, -												   DOM::Element& el) +void XmlComposer::BaseAnalyser::applyParaFormatting(RtfFormatting* format, +												    DOM::Element& el)  {  	if(format == NULL) -		format = &(m_parser->getTextFormatting()); +		format = &(m_composer->getTextFormatting());  	wstring fix = kValPara; +    // Is it a list?  	int list = format->paraList();  	if(list != -1) -	{ -		el.setAttribute(kAtList, NUM_ATTR(list)); -	} +		el.setAttribute(kAtList, list);  	else -	{  		el.removeAttribute(kAtList); -	} +    // Is it a cell?  	if(format->paraInTable())  		el.setAttribute(kAtCell, L"1");  	else  		el.removeAttribute(kAtCell); +    // Paragraph styles  	int style = format->paraStyle();  	if(style != -1) -		el.setAttribute(kElStyle, NUM_ATTR(style)); +		el.setAttribute(kElStyle, style);  	else  		el.removeAttribute(kElStyle); +    // These fix elements are later picked up in XmlFixups::fixBlocks  	el.setAttribute(kAtFix, fix);  } -DOM::Element RtfParser::ParseAnalyser::getCurrentBlock() +DOM::Element XmlComposer::BaseAnalyser::getCurrentBlock()  { -	DOM::Node node = m_parser->getElement(); +	DOM::Node node = m_composer->getElement();  	if(node.hasChildNodes())  		node = node.getLastChild(); @@ -377,97 +394,115 @@ DOM::Element RtfParser::ParseAnalyser::getCurrentBlock()  } -bool RtfParser::ParseAnalyser::processTextContent(const string& cw, int flags, int param) +bool XmlComposer::BaseAnalyser::processTextContent(const string& cw, int flags, int param)  {  	DOM::Element el;  	bool process = false; -	RtfFormatting& format = m_parser->getTextFormatting(); +	RtfFormatting& format = m_composer->getTextFormatting(); +    // New paragraph  	if(cw == "par")  	{  		el = getCurrentBlock();  		if(el != NULL)  			applyParaFormatting(&format, el); -		el = m_parser->createElement(kElBlock); +		el = m_composer->createElement(kElBlock);  		applyParaFormatting(&format, el);  	} +    // Cells (used later in applyParaFormatting)  	else if(cw == "intbl")  		format.paraSetTable(true); +    // Start of a cell  	else if(cw == "cell")  	{  		el = getCurrentBlock();  		if(el != NULL)  			applyParaFormatting(&format, el); -		el = m_parser->createElement(kElCell); -		m_parser->pushElement(el); -		m_parser->popElement(); -		el = m_parser->createElement(kElBlock); +		el = m_composer->createElement(kElCell); +		m_composer->pushElement(el); +		m_composer->popElement(); +		el = m_composer->createElement(kElBlock);  		applyParaFormatting(&format, el);  	} +    // Start of a row  	else if(cw == "trowd") -		el = m_parser->createElement(kElRow); +		el = m_composer->createElement(kElRow); +    // A tab  	else if(cw == "tab") -		el = m_parser->createElement(kElTab); +		el = m_composer->createElement(kElTab); +    // A section break  	else if(cw == "sect") -		el = m_parser->createElement(kElSect); +		el = m_composer->createElement(kElSect); +    // A page break  	else if(cw == "page") -		el = m_parser->createElement(kElPage); +		el = m_composer->createElement(kElPage); +    // A paragraph style  	else if(cw == "s" && HAS_PARAM)  		format.paraSetStyle(param); +    // A line break  	else if(cw == "line") -		el = m_parser->createElement(kElLine); +		el = m_composer->createElement(kElLine); +    // A page header (not implemented)  	else if(cw == "header")  		AN_ANALYSER(Skip); + +    // A page footer (not implemented)  	else if(cw == "footer")  		AN_ANALYSER(Skip); + +    // A bookmark (not implemented)  	else if(cw == "bkmkstart")  		AN_ANALYSER(Skip); + +    // List text (not implemented)  	else if(cw == "listtext")  		AN_ANALYSER(Skip); +    // Set list style (used in applyFormatting)  	else if(cw == "ls" && HAS_PARAM)  		format.paraSetList(param);  	if(el != NULL)  	{  		// This ensures that our content destination is open and ready -        DestinationPtr dest = m_parser->getDestination(); +        DestinationPtr dest = m_composer->getDestination();  		ASSERT(dest != NULL);  		dest->charData(kValNull); -		m_parser->pushElement(el); -		m_parser->popElement(); +		m_composer->pushElement(el); +		m_composer->popElement();  	}  	return (el != NULL) || process; - -	/* TODO: cell, row, intbl, cellx, trowd*/  } -bool RtfParser::ParseAnalyser::processTextFormatting(const string& cw, int flags, +bool XmlComposer::BaseAnalyser::processTextFormatting(const string& cw, int flags,  												  int param, RtfFormatting& format)  {  	bool on = true;  	if(flags & HAS_PARAM && param == 0)  		on = false; +    // Clears all paragraph formatting  	if(cw == "pard")  	{  		format.resetPara(); -//		applyParaFormatting(); +        // applyParaFormatting();  	} + +    // Rest are pretty much self-explanatory  	else if(cw == "plain")  		format.resetText();  	else if(cw == "b") @@ -490,21 +525,22 @@ bool RtfParser::ParseAnalyser::processTextFormatting(const string& cw, int flags  	return true;  } -bool RtfParser::ParseAnalyser::processTextFormatting(const string& cw, int flags, int param) +bool XmlComposer::BaseAnalyser::processTextFormatting(const string& cw, int flags, int param)  { -	return processTextFormatting(cw, flags, param, m_parser->getTextFormatting()); +	return processTextFormatting(cw, flags, param, m_composer->getTextFormatting());  } -bool RtfParser::ParseAnalyser::processTextAutoContent(const string& cw, int flags, int param) +bool XmlComposer::BaseAnalyser::processTextAutoContent(const string& cw, int flags, int param)  { -    DestinationPtr dest = m_parser->getDestination(); +    DestinationPtr dest = m_composer->getDestination();      ASSERT(dest != NULL);      dest->charData(kValNull);      // Auto generated content      if(cw == "chftn")      { -        int ac = m_parser->getAutoCount(AUTOCOUNT_FOOTNOTE); +        // Footnote auto numbering +        int ac = m_composer->getAutoCount(AUTOCOUNT_FOOTNOTE);          AN_ELEMENT(kElRef);          AN_ATTRIBUTE(kAtType, kValFootNote); @@ -517,38 +553,72 @@ bool RtfParser::ParseAnalyser::processTextAutoContent(const string& cw, int flag      return false;  } +/* ---------------------------------------------------------------------------------- + *  ANALYSER/DESTINATION DEFINITIONS + */ + +#define ON_INITIALIZE(cls) \ +   void XmlComposer::cls::initialize() +#define ON_CONTROLWORD(cls) \ +  void XmlComposer::cls::controlWord(const string& cw, int flags, int param) +#define ON_CHARDATA(cls) \ +  void XmlComposer::cls::charData(wstring data) +#define ON_GROUPSTART(cls) \ + void XmlComposer::cls::groupStart() +#define ON_GROUPEND(cls) \ + void XmlComposer::cls::groupEnd() +#define ON_DONE(cls) \ +   void XmlComposer::cls::done() + + +// Skip Analyser -------------------------------------------------------------------- +  ON_INITIALIZE(Skip)  	{ AN_DESTINATION(Null); } +  ON_GROUPSTART(Skip)  	{ AN_ANALYSER(Skip); } -RtfParser::Upr::Upr(AnalyserPtr prv) +// Upr Analyser --------------------------------------------------------------------- + +XmlComposer::Upr::Upr(AnalyserPtr prv)  {  	ASSERT(prv);  	prev = prv;  } +  ON_GROUPSTART(Upr) -	{ AN_ANALYSER(Skip); } +{ +    AN_ANALYSER(Skip); +} +  ON_GROUPEND(Upr)  {  	ASSERT(prev); -	m_parser->setAnalyser(prev); +	m_composer->setAnalyser(prev);  	prev = NULL;  } +// Stylesheet Analyser -------------------------------------------------------------- +  ON_INITIALIZE(Stylesheet)  {  	AN_ELEMENT(kElStylesheet);  } +  ON_GROUPSTART(Stylesheet)  { +    // Each group should be a style  	AN_ANALYSER(Style); + +    // Without any character data  	AN_DESTINATION(Null);  } +// Stylesheet Style Analyser --------------------------------------------------------  ON_INITIALIZE(Style)  { @@ -556,6 +626,7 @@ ON_INITIALIZE(Style)  	// so we can't always create  	haveStyle = false;  } +  ON_CONTROLWORD(Style)  {  	// Get the style id @@ -565,6 +636,7 @@ ON_CONTROLWORD(Style)  		return;  	} +    // Create the style tag if necessary  	if(!haveStyle)  	{  		AN_ELEMENT(kElStyle); @@ -572,9 +644,10 @@ ON_CONTROLWORD(Style)  		haveStyle = true;  	} +    // The style id  	if(cw == "s" && flags & kHasParam)  	{ -		AN_ATTRIBUTE(kAtId, NUM_ATTR(param)); +		AN_ATTRIBUTE(kAtId, param);  	}  	// Otherwise get as much formatting out of the tag as possible @@ -584,13 +657,17 @@ ON_CONTROLWORD(Style)  	else  		DEFAULT_CONTROLWORD;  } +  ON_GROUPSTART(Style)  {  	AN_ANALYSER(Skip);  } +  ON_GROUPEND(Style)  { -	RtfFormatting& props = m_parser->getTextFormatting(); +	RtfFormatting& props = m_composer->getTextFormatting(); + +    // Dig out all the formatting attributes  	if(props.textIsBold())  		AN_ATTRIBUTE(kAtBold, L"1");  	if(props.textIsHidden()) @@ -601,42 +678,54 @@ ON_GROUPEND(Style)  		AN_ATTRIBUTE(kAtStrike, L"1");  	if(props.textIsUnderline())  		AN_ATTRIBUTE(kAtUnderline, L"1"); -	if(props.textColor() != -1 && m_parser->getOptions().doColors) -		AN_ATTRIBUTE(kAtColor, NUM_ATTR(props.textColor())); +	if(props.textColor() != -1 && m_composer->getOptions().doColors) +		AN_ATTRIBUTE(kAtColor, props.textColor());  } +// List Table Analyser --------------------------------------------------------------  ON_INITIALIZE(ListTable)  {  	AN_ELEMENT(kElListtable);  } +  ON_GROUPSTART(ListTable)  { +    // Everything in here should be a list  	AN_ANALYSER(List); + +    // Content doesn't matter  	AN_DESTINATION(Null);  } +// List (in List Table) Analyser ----------------------------------------------------  ON_INITIALIZE(List)  { -	AN_ELEMENT(kElListdef); +    // Create a default element +  	AN_ELEMENT(kElListdef);  	AN_ATTRIBUTE(kAtType, kValDisc);  	AN_ATTRIBUTE(kAtOrdered, L"0");  	levelsSeen = 0;  } +  ON_CONTROLWORD(List)  { +    // The name  	if(cw == "listname")  		AN_DESTINATION_ATTR(kAtName); + +    // The list id  	else if(cw == "listid" && HAS_PARAM) -		AN_ATTRIBUTE(kAtId, NUM_ATTR(param)); +		AN_ATTRIBUTE(kAtId, param);  	// We let listlevel in here too  	else if(cw == "levelstartat" && HAS_PARAM) -		AN_ATTRIBUTE(kAtStart, NUM_ATTR(param)); +		AN_ATTRIBUTE(kAtStart, param); +    // The list type  	else if(cw == "levelnfc" && HAS_PARAM)  	{  		switch(param) @@ -679,27 +768,35 @@ ON_CONTROLWORD(List)  	else  		DEFAULT_CONTROLWORD;  } +  ON_GROUPSTART(List)  { +    // Skip internal groups and content +  	if(levelsSeen > 0)  		AN_ANALYSER(Skip); +  	levelsSeen++;  } - +// The List Override Table ----------------------------------------------------------  ON_INITIALIZE(ListOverrideTable)  { -	DOM::Document document = m_parser->getDocument(); +    // Get all of the current lists +	DOM::Document document = m_composer->getDocument();  	lists = document.getElementsByTagName(kElListdef);  	curList = NULL;  	lsId = -1;  } +  ON_GROUPSTART(ListOverrideTable)  { +    // Content doesn't matter  	AN_DESTINATION(Null);  } +  ON_CONTROLWORD(ListOverrideTable)  {  	// New list override clear @@ -709,10 +806,11 @@ ON_CONTROLWORD(ListOverrideTable)  	// List id for current listoverride  	else if(cw == "listid" && HAS_PARAM)  	{ -		wstring id = NUM_ATTR(param); +		wstring id = XmlComposer::formatInt(param);  		if(lists != NULL)  		{ +            // Find the list in question  			for(int i = 0; i < lists->getLength(); i++)  			{  				DOM::Node node = lists->item(i); @@ -737,7 +835,7 @@ ON_CONTROLWORD(ListOverrideTable)  	else if(cw == "levelstartat" && HAS_PARAM)  	{  		if(curList != NULL) -			curList.setAttribute(kAtStart, NUM_ATTR(param)); +			curList.setAttribute(kAtStart, param);  	}  	else @@ -755,7 +853,7 @@ ON_CONTROLWORD(ListOverrideTable)  			if(curList != NULL)  			{  				parent.appendChild(curList); -				curList.setAttribute(kAtList, NUM_ATTR(lsId)); +				curList.setAttribute(kAtList, lsId);  			}  		} @@ -763,13 +861,9 @@ ON_CONTROLWORD(ListOverrideTable)  	}  } -ON_GROUPEND(ListOverrideTable) -{ - -} - +// Info Block Analyser --------------------------------------------------------------  ON_INITIALIZE(Info)  { @@ -777,39 +871,45 @@ ON_INITIALIZE(Info)  	AN_ELEMENT(kElInfo);  	AN_DESTINATION(Null);  } +  ON_CONTROLWORD(Info)  { -	// The title  	if(cw == "title")  	{  		AN_ELEMENT(kElTitle);  		AN_DESTINATION(Raw);  	} +  	else if(cw == "author")  	{  		AN_ELEMENT(kElAuthor);  		AN_DESTINATION(Raw);  	} +  	else if(cw == "operator")  	{  		AN_ELEMENT(kElOperator);  		AN_DESTINATION(Raw);  	} +  	else if(flags & kAsterisk)  		AN_ANALYSER(Skip); +  	else  		DEFAULT_CONTROLWORD;  } - +// Root Analyser --------------------------------------------------------------------  ON_INITIALIZE(Root)  {  } +  ON_CONTROLWORD(Root)  { +    // All the main RTF sections  	if(cw == "stylesheet")  		AN_ANALYSER(Stylesheet);  	else if(cw == "listtable") @@ -842,23 +942,26 @@ ON_CONTROLWORD(Root)  } +// Content Destination -------------------------------------------------------------- +  ON_INITIALIZE(Content)  { -	parent = m_parser->getElement(); +	parent = m_composer->getElement();  	created = false;  } +  ON_CHARDATA(Content)  {  	// Create the first time we get content  	if(!created)  	{ -		DOM::Element dest = m_parser->createElement(kElDest); +		DOM::Element dest = m_composer->createElement(kElDest);  		parent.appendChild(dest); -		m_parser->replaceElement(dest); +		m_composer->replaceElement(dest); -		DOM::Element el = m_parser->createElement(kElBlock); -		m_parser->pushElement(el); -		m_parser->popElement(); +		DOM::Element el = m_composer->createElement(kElBlock); +		m_composer->pushElement(el); +		m_composer->popElement();  		created = true;  	} @@ -867,7 +970,10 @@ ON_CHARDATA(Content)  		return;  	int elements = 0; -	RtfFormatting& format = m_parser->getTextFormatting(); +	RtfFormatting& format = m_composer->getTextFormatting(); + +    // Extra elements written out here are consolidated in +    // XmlFixups::combineDuplicates  	// Now do text Properties if necessary  	if(format.textIsBold()) @@ -875,37 +981,44 @@ ON_CHARDATA(Content)  		AN_ELEMENT(kElB);  		elements++;  	} +  	if(format.textIsHidden())  	{  		AN_ELEMENT(kElHide);  		elements++;  	} +  	if(format.textIsItalic())  	{  		AN_ELEMENT(kElI);  		elements++;  	} +  	if(format.textIsStrike())  	{  		AN_ELEMENT(kElStrike);  		elements++;  	} +  	if(format.textIsUnderline())  	{  		AN_ELEMENT(kElU);  		elements++;  	} -	if(format.textColor() != -1 && m_parser->getOptions().doColors) + +	if(format.textColor() != -1 && m_composer->getOptions().doColors)  	{  		AN_ELEMENT(kElColor); -		AN_ATTRIBUTE(kAtIndex, NUM_ATTR(format.textColor())); +		AN_ATTRIBUTE(kAtIndex, format.textColor());  		elements++;  	} +      if(format.textSuScript() == RtfFormatting::SUPERSCRIPT)      {          AN_ELEMENT(kElSuper);          elements++;      } +      if(format.textSuScript() == RtfFormatting::SUBSCRIPT)      {          AN_ELEMENT(kElSub); @@ -913,8 +1026,8 @@ ON_CHARDATA(Content)      }  	// Write the data to the element -	m_parser->getElement().appendChild( -		m_parser->getDocument().createTextNode(data)); +	m_composer->getElement().appendChild( +		m_composer->getDocument().createTextNode(data));  	// Now drop out of all the above formatting  	while(elements-- > 0) @@ -922,22 +1035,25 @@ ON_CHARDATA(Content)  } +// FootNote Analyser ---------------------------------------------------------------- +  ON_INITIALIZE(FootNote)  { -    int ac = m_parser->getAutoCount(AUTOCOUNT_FOOTNOTE); +    int ac = m_composer->getAutoCount(AUTOCOUNT_FOOTNOTE);      AN_ELEMENT(kElFootNote);      AN_ATTRIBUTE(kAtId, ac);      AN_DESTINATION(Content);  } +  ON_CONTROLWORD(FootNote)  {      // Inside foot notes there's no link to the foot note      if(cw == "chftn")      { -        DestinationPtr dest = m_parser->getDestination(); +        DestinationPtr dest = m_composer->getDestination();          ASSERT(dest != NULL); -        int ac = m_parser->getAutoCount(AUTOCOUNT_FOOTNOTE); +        int ac = m_composer->getAutoCount(AUTOCOUNT_FOOTNOTE);          dest->charData(formatInt(ac));          return;      } @@ -952,27 +1068,28 @@ ON_CONTROLWORD(FootNote)      else          DEFAULT_CONTROLWORD;  } +  ON_DONE(FootNote)  { -    m_parser->incrementAutoCount(AUTOCOUNT_FOOTNOTE); +    m_composer->incrementAutoCount(AUTOCOUNT_FOOTNOTE);  } - +// Raw Destination ------------------------------------------------------------------  ON_CHARDATA(Raw)  {  	// Write the data to the element -	m_parser->getElement().appendChild( -		m_parser->getDocument().createTextNode(data)); +	m_composer->getElement().appendChild( +		m_composer->getDocument().createTextNode(data));  } - +// Attribute Destination ------------------------------------------------------------  ON_INITIALIZE(Attribute)  { -	element = m_parser->getElement(); +	element = m_composer->getElement();  	ASSERT(element != NULL);  } @@ -991,17 +1108,3 @@ ON_CHARDATA(Attribute)  	element.setAttribute(name, cur);  } -wstring RtfParser::formatInt(int num) -{ -  char buff[16]; - -  // Certain OSs don't support swprintf :( -  sprintf(buff, "%d", num); - -  wstring n; -  for(char* s = buff; *s; s++) -    n.append(1, *s); - -  return n; -} - diff --git a/src/xmlcomposer.h b/src/xmlcomposer.h index deba4ba..4e5c739 100644 --- a/src/xmlcomposer.h +++ b/src/xmlcomposer.h @@ -36,68 +36,102 @@   *   */ +// RENAME: xmlcomposer.h +  #ifndef __RTFPARSER_H__  #define __RTFPARSER_H__  #include "levelhandler.h" -struct RtfParserOptions +struct XmlComposerOptions  { -	RtfParserOptions() +	XmlComposerOptions()  		{ memset(this, 0, sizeof(*this)); }  	bool doColors;  }; -class RtfParser : public LevelHandler +/* + * XmlComposer + * + * This is where the RTF gets initially converted to XML. RtfParser sends + * notifications to this class's RtfHandler interface. It forwards them to + * the current analysers and destinations which produce XML content. + * (see xmlcomposehelpers.h) + * + * Not all conversion is completed here. Because RTF is so very wierd we + * have to run lots of fixups are run in endDocument (see rtffixups.h) + */ +class XmlComposer : +    public LevelHandler  {  public: -	RtfParser(const RtfParserOptions& options); -	virtual ~RtfParser(); +	XmlComposer(const XmlComposerOptions& options); +	virtual ~XmlComposer(); -	virtual void startDocument(RtfReader* reader); +    // Handler Overrides +	virtual void startDocument(RtfParser* reader);  	virtual void endDocument();  	virtual void controlWord(const string& cw, int flags, int param);  	virtual void groupStart();  	virtual void groupEnd();  	virtual void charData(wstring data); -	// Element management functions +	// Create an XML element with given name  	DOM::Element createElement(const string& name); + +    // Push an XML element on the current level  	void pushElement(const DOM::Element& element); + +    // Replace current XML element with given element  	void replaceElement(const DOM::Element& element); + +    // Move up one XML element level without changing RTF level  	DOM::Element popElement(); + +    // Set attributes on the current XML Element  	void setAttribute(const string& name, const wstring& value, DOM::Element el = DOM::Element());      void setAttribute(const string& name, int value, DOM::Element el = DOM::Element()); -	// Changing the current parser functions +	// The current analyser in use +    AnalyserPtr getAnalyser();  	void setAnalyser(AnalyserPtr analy); + +    // The current destination in use +    DestinationPtr getDestination();  	void setDestination(DestinationPtr dest); + +    // Replace the current destination (sets level deep)  	DestinationPtr replaceDestination(DestinationPtr dest); -    // The types of auto counters + +    // The types of auto numbering      enum      {         AUTOCOUNT_FOOTNOTE,         AUTOCOUNT_MAX      }; -    // Functions for auto numbering +    // Functions for RTF auto numbering      int getAutoCount(int type);      void incrementAutoCount(int type); -	// Current status functions + +	// Get the current formatting options  	RtfFormatting& getTextFormatting(); -	AnalyserPtr getAnalyser(); -	DestinationPtr getDestination(); +  	DOM::Document getDocument()  		{ return m_document; }  	const RtfParserOptions& getOptions()  		{ return m_options; } + +    // TODO: Should this be somewhere else?      static wstring formatInt(int num); + +// LevelHandler override  protected:  	virtual void clear(); @@ -110,12 +144,12 @@ protected:      int m_autocount[AUTOCOUNT_MAX];        // Auto counters for the document -  // Sub classes  protected: +  	#define DESTINATION(cls) class cls : public Destination { public:  	#define END_DESTINATION }; -	#define ANALYSER(cls) class cls : public ParseAnalyser { public: +	#define ANALYSER(cls) class cls : public BaseAnalyser { public:  	#define END_ANALYSER };  	#define DATA_PORTION protected:  	#define INITIALIZE virtual void initialize(); @@ -125,6 +159,7 @@ protected:  	#define GROUPEND virtual void groupEnd();  	#define DONE virtual void done(); +    // Main destination for document character content  	DESTINATION(Content)  		INITIALIZE  		CHARDATA @@ -133,14 +168,16 @@ protected:  		DOM::Element parent;  	END_DESTINATION - +    // Discards character data  	DESTINATION(Null)  	END_DESTINATION +    // Copies raw character data to output  	DESTINATION(Raw)  		CHARDATA  	END_DESTINATION +    // Copies character data to an XML attribute  	DESTINATION(Attribute)  		Attribute(const string& nm) : name(nm) {}  		INITIALIZE @@ -150,7 +187,9 @@ protected:  		DOM::Element element;  	END_DESTINATION -	class ParseAnalyser : + +    // Base class for analysers with some helper functions +	class BaseAnalyser :  		public Analyser  	{  	public: @@ -158,23 +197,34 @@ protected:  			{ processDefault(cw, flags, param); }  	protected: -		// Some helper functions +        // Process a standard set of tags that can be found anywhere  		bool processDefault(const string& cw, int flags, int param); + +        // Process text formatting tags  		bool processTextFormatting(const string& cw, int flags, int param, RtfFormatting& format); +        bool processTextFormatting(const string& cw, int flags, int param); + +        // Creates 'fix' tags for paragraph formatting in element +        void applyParaFormatting(RtfFormatting* format, DOM::Element& el); + +        // Process tags that are either text content, or change context  		bool processTextContent(const string& cw, int flags, int param); -		bool processTextFormatting(const string& cw, int flags, int param); + +        // Process tags that generate text content (like auto-numbering, fields)          bool processTextAutoContent(const string& cw, int flags, int param); +        // Convenience function  		DOM::Element getCurrentBlock(); -		void applyParaFormatting(RtfFormatting* format, DOM::Element& el);  	}; +    // Skip tags and groups  	ANALYSER(Skip)  		INITIALIZE  		GROUPSTART  	END_ANALYSER +    // Unicode block analyser  	ANALYSER(Upr)  		Upr(AnalyserPtr prv);  		GROUPSTART @@ -183,11 +233,13 @@ protected:  		AnalyserPtr prev;  	END_ANALYSER +    // Handle Stylesheets  	ANALYSER(Stylesheet)  		INITIALIZE  		GROUPSTART  	END_ANALYSER +    // Handle a style in a stylesheet  	ANALYSER(Style)  		INITIALIZE  		CONTROLWORD @@ -197,11 +249,13 @@ protected:  		bool haveStyle;  	END_ANALYSER +    // Handle the list definitions  	ANALYSER(ListTable)  		INITIALIZE  		GROUPSTART  	END_ANALYSER +    // Handle a list in the list definitions  	ANALYSER(List)  		INITIALIZE  		CONTROLWORD @@ -210,27 +264,30 @@ protected:  		int levelsSeen;  	END_ANALYSER +    // Handle list overrides  	ANALYSER(ListOverrideTable)  		INITIALIZE  		CONTROLWORD  		GROUPSTART -		GROUPEND  	DATA_PORTION  		DOM::NodeList lists;  		int lsId;  		DOM::Element curList;  	END_ANALYSER +    // Creates the info block  	ANALYSER(Info)  		INITIALIZE  		CONTROLWORD  	END_ANALYSER +    // The main root analyser  	ANALYSER(Root)  		INITIALIZE  		CONTROLWORD  	END_ANALYSER +    // Handles footnotes      ANALYSER(FootNote)          INITIALIZE          CONTROLWORD diff --git a/src/xmlfixups.cpp b/src/xmlfixups.cpp index c3fd8b6..6efe6f4 100644 --- a/src/xmlfixups.cpp +++ b/src/xmlfixups.cpp @@ -59,636 +59,585 @@ static const char* kConsolidateEnd[] =  static const char* kConsolidateStart[] =      { kElStylesheet, kElInfo, NULL }; +  void RtfFixups::breakBreak(DOM::Document& doc, const string& contain,                                const string& tag)  { - DOM::NodeList els = doc.getElementsByTagName(tag); - if(els != NULL) +    DOM::NodeList els = doc.getElementsByTagName(tag); +    if(els != NULL)      { -      for(int i = 0; i < els->getLength(); i++) -      { -          DOM::Element el = (const DOM::Element&)els->item(i); -#if 0 -         // See if parent node only has this break tag -          // in it. If so then replace parent with this - -           DOM::Node parent = el.getParentNode(); - -            if(parent != NULL) -         { -              DOM::Node grandparent = parent.getParentNode(); - -               if(grandparent != NULL && -                el.getPreviousSibling() == NULL && -                 el.getNextSibling() == NULL) -                { -                  grandparent.replaceChild(parent.removeChild(el), parent); -              } -          } -#endif - -          breakElement(el, contain); -     } -  } +        for(int i = 0; i < els->getLength(); i++) +        { +            DOM::Element el = (const DOM::Element&)els->item(i); +            breakElement(el, contain); +        } +    }  } -/** - * Breaks a paragraph up through a previous level. Calls itself - * recursively to break paragraphs totally free up to containing - * destination. - * - * For example: - * - * <dest> - *     This is <b> a <block fix="para"/> - *    test of </b> your concentration. - *  </dest> - * - * Becomes: - * - * <dest> - *     This is <b> a </b><block fix="para"/> - *   <b>test of </b> your concentration. - * </dest> - */  bool RtfFixups::breakElement(const DOM::Element& el, const string& contain)  { -   ASSERT(el != NULL); +    ASSERT(el != NULL); -   DOM::Element parent = (const DOM::Element&)el.getParentNode(); - DOM::Element grandparent; +    DOM::Element parent = (const DOM::Element&)el.getParentNode(); +    DOM::Element grandparent; - string s = el.getNodeName(); -   s = parent.getNodeName(); +    string s = el.getNodeName(); +    s = parent.getNodeName(); - // Get the parent node - if(parent != NULL) -     grandparent = (const DOM::Element&)parent.getParentNode(); +    // Get the parent node +    if(parent != NULL) +        grandparent = (const DOM::Element&)parent.getParentNode();      // Make sure we have something to work with before continuing -  if(grandparent == NULL || parent == NULL || -      DOMHelpers::isElement(parent, contain)) +    if(grandparent == NULL || parent == NULL || +       DOMHelpers::isElement(parent, contain))          return true; -  DOM::Node e; +    DOM::Node e; -  // Check to see if this is the first node in the parent. -   // If so then just move out to before -  if(el.getPreviousSibling() == NULL) +    // Check to see if this is the first node in the parent. +    // If so then just move out to before +    if(el.getPreviousSibling() == NULL)      { -      e = grandparent.insertBefore(parent.removeChild(el), parent); -  } +        e = grandparent.insertBefore(parent.removeChild(el), parent); +    }      // Check to see if this is the last node in the parent.      // If so then just move out to after the parent      else if(el.getNextSibling() == NULL) -   { -      DOM::Node next = parent.getNextSibling(); -      if(next == NULL) -           e = grandparent.appendChild(parent.removeChild(el)); -       else -           e = grandparent.insertBefore(parent.removeChild(el), next); +    { +        DOM::Node next = parent.getNextSibling(); +        if(next == NULL) +            e = grandparent.appendChild(parent.removeChild(el)); +        else +            e = grandparent.insertBefore(parent.removeChild(el), next);      }      // Otherwise it's in the middle so split the parent      // element etc... - else -   { -      // Clone it but not deep -       DOM::Element parent2 = (const DOM::Element&)parent.cloneNode(false); +    else +    { +        // Clone it but not deep +        DOM::Element parent2 = (const DOM::Element&)parent.cloneNode(false); -      if(parent2 == NULL) +        if(parent2 == NULL)              return false; -     // Flag that tells us whether we moved anything up to parent -       bool moved = false; +        // Flag that tells us whether we moved anything up to parent +        bool moved = false; -       // Now move all nodes after this one to the second parent. -     while((e = el.getNextSibling()) != NULL) -       { -          parent2.appendChild(parent.removeChild(e)); +        // Now move all nodes after this one to the second parent. +        while((e = el.getNextSibling()) != NULL) +        { +            parent2.appendChild(parent.removeChild(e));              moved = true; -      } +        } -     // Remove the element from it's parent -     e = parent.removeChild(el); +        // Remove the element from it's parent +        e = parent.removeChild(el); -       // Okay now we move the paragraph up to the parent -       DOMHelpers::insertAfter(grandparent, e, parent); -       if(moved) -          DOMHelpers::insertAfter(grandparent, parent2, e); -  } +        // Okay now we move the paragraph up to the parent +        DOMHelpers::insertAfter(grandparent, e, parent); +        if(moved) +            DOMHelpers::insertAfter(grandparent, parent2, e); +    } - // Now call it again with the paragraph in the new position -    // untill everything's cut through! +    // Now call it again with the paragraph in the new position +    // until everything's cut through!      return breakElement((DOM::Element&)e, contain);  } -/** - * Changes from a marker based paragraph system to a contained - * paragraph system. Also applies paragraph attributes to the - * appropriate paragraph. - * - * For example: - * - * <dest> - *   This <blockattr style="10"> is <b> a <block fix="para"/> - *     test of </b> your concentration. - *  </dest> - * - * Becomes: - * - * <para style="10"> This is <b> a </b></para> - * <para><b>test of </b> your concentration.</para> - */  void RtfFixups::breakBlocks(DOM::Document& document)  {      // First break out all the paragraphs to the destination level      DOM::NodeList blocks = document.getElementsByTagName(kElBlock);      if(blocks != NULL)      { -      for(int i = 0; i < blocks->getLength(); i++) -       { -          DOM::Element block = (const DOM::Element&)blocks->item(i); +        for(int i = 0; i < blocks->getLength(); i++) +        { +            DOM::Element block = (const DOM::Element&)blocks->item(i);              // If it's the single closed style para then break it -          if(block != NULL && !block.hasChildNodes()) +            if(block != NULL && !block.hasChildNodes())                  breakElement(block, kElDest); -      } +        }      }      // Now group stuff in destinations into paras or other blocks -  DOM::NodeList destinations = document.getElementsByTagName(kElDest); -   if(destinations != NULL) -   { -      for(int i = 0; i < destinations->getLength(); i++) -     { -          DOM::Element dest = (const DOM::Element&)destinations->item(i); - -           // Sanity Check +    DOM::NodeList destinations = document.getElementsByTagName(kElDest); +    if(destinations != NULL) +    { +        for(int i = 0; i < destinations->getLength(); i++) +        { +            DOM::Element dest = (const DOM::Element&)destinations->item(i); + +            // Sanity Check              if(dest == NULL || !dest.hasChildNodes()) -              continue; +                continue; -         // Go through the children of this destination -         DOM::Node child = dest.getFirstChild(); +            // Go through the children of this destination +            DOM::Node child = dest.getFirstChild(); -           DOM::Element block; +            DOM::Element block; -           while(child != NULL) -           { -              // If it's a block -             if(DOMHelpers::isElement(child, kElBlock)) -             { -                  block = (DOM::Element&)child; -                  child = child.getNextSibling(); +            while(child != NULL) +            { +                // If it's a block +                if(DOMHelpers::isElement(child, kElBlock)) +                { +                    block = (DOM::Element&)child; +                    child = child.getNextSibling();                      continue; -              } - -             // If it's already a real block element -             for(const char** t = kBlockTags; *t != NULL; t++) -              { -                  if(DOMHelpers::isElement(child, *t)) -                   { -                      block = NULL; -                      break; -                 } -              } - -             // If there's a block then add to it -               if(block != NULL) -              { -                  block.appendChild(dest.removeChild(child)); +                } + +                // If it's already a real block element +                for(const char** t = kBlockTags; *t != NULL; t++) +                { +                    if(DOMHelpers::isElement(child, *t)) +                    { +                        block = NULL; +                        break; +                    } +                } + +                // If there's a block then add to it +                if(block != NULL) +                { +                    block.appendChild(dest.removeChild(child));                      child = block; -             } +                } -             child = child.getNextSibling(); +                child = child.getNextSibling();              } -      } -  } +        } +    }  }  void RtfFixups::wrapTags(DOM::Document& doc, const string& tagName, -                        const string& wrapName) +                         const string& wrapName)  { - DOM::NodeList tags = doc.getElementsByTagName(tagName); +    DOM::NodeList tags = doc.getElementsByTagName(tagName);      if(tags != NULL) -   { -      for(int i = 0; i < tags->getLength(); i++) -     { -          DOM::Element tag = (const DOM::Element&)tags->item(i); +    { +        for(int i = 0; i < tags->getLength(); i++) +        { +            DOM::Element tag = (const DOM::Element&)tags->item(i);              DOM::Element wrap = doc.createElement(wrapName); -           while(tag.hasChildNodes()) -             wrap.appendChild(tag.removeChild(tag.getFirstChild())); +            while(tag.hasChildNodes()) +                wrap.appendChild(tag.removeChild(tag.getFirstChild())); -           tag.appendChild(wrap); -     } -  } +            tag.appendChild(wrap); +        } +    }  }  void RtfFixups::breakTags(DOM::Document& doc, const string& parentName, -                         const string& tagName) +                          const string& tagName)  { - DOM::NodeList parents = doc.getElementsByTagName(parentName); -  if(parents != NULL) +    DOM::NodeList parents = doc.getElementsByTagName(parentName); +    if(parents != NULL)      { -      for(int i = 0; i < parents->getLength(); i++) -      { -          DOM::Element parent = (const DOM::Element&)parents->item(i); +        for(int i = 0; i < parents->getLength(); i++) +        { +            DOM::Element parent = (const DOM::Element&)parents->item(i); -          if(!parent.hasChildNodes()) +            if(!parent.hasChildNodes())                  continue; -         DOM::NodeList tags = parent.getElementsByTagName(tagName); -         if(tags != NULL) -           { -              for(int i = 0; i < tags->getLength(); i++) -                 breakElement((const DOM::Element&)tags->item(i), parentName); -          } +            // First perform the breaks +            DOM::NodeList tags = parent.getElementsByTagName(tagName); +            if(tags != NULL) +            { +                for(int i = 0; i < tags->getLength(); i++) +                    breakElement((const DOM::Element&)tags->item(i), parentName); +            } -         DOM::Node tag = doc.createElement(tagName); +            DOM::Node tag = doc.createElement(tagName);              parent.insertBefore(tag, parent.getFirstChild()); -         DOM::Node child = tag; +            DOM::Node child = tag;              while(child != NULL && (child = child.getNextSibling()) != NULL) -           { -              if(DOMHelpers::isElement(child, kElBlock)) -             { -                  DOM::Node next = child.getNextSibling(); -                   if(next == NULL) -                   { -                      parent.removeChild(child); -                     continue; -                  } - -                 if(DOMHelpers::isElement(next, tagName)) -                   { -                      DOM::Node twodown = next.getNextSibling(); -                     if(!DOMHelpers::isElement(twodown, kElBlock)) -                      { -                          child = parent.insertBefore(parent.removeChild(next), child); -                      } -                      else -                       { -                          parent.removeChild(child); -                         child = next; -                      } -                  } -              } - -             if(DOMHelpers::isElement(child, tagName)) -              { -                  if(!tag.hasChildNodes()) -                       parent.removeChild(tag); -                   tag = child; -               } -              else -               { -                  tag.appendChild(parent.removeChild(child)); +            { +                if(DOMHelpers::isElement(child, kElBlock)) +                { +                    DOM::Node next = child.getNextSibling(); +                    if(next == NULL) +                    { +                        parent.removeChild(child); +                        continue; +                    } + +                    if(DOMHelpers::isElement(next, tagName)) +                    { +                        DOM::Node twodown = next.getNextSibling(); +                        if(!DOMHelpers::isElement(twodown, kElBlock)) +                        { +                            child = parent.insertBefore(parent.removeChild(next), child); +                        } +                        else +                        { +                            parent.removeChild(child); +                            child = next; +                        } +                    } +                } + +                if(DOMHelpers::isElement(child, tagName)) +                { +                    if(!tag.hasChildNodes()) +                        parent.removeChild(tag); +                    tag = child; +                } +                else +                { +                    tag.appendChild(parent.removeChild(child));                      child = tag; -               } -          } +                } +            } -         if(!tag.hasChildNodes()) -               parent.removeChild(tag); -       } -  } +            if(!tag.hasChildNodes()) +                parent.removeChild(tag); +        } +    } - DOM::NodeList tags = doc.getElementsByTagName(tagName); +    DOM::NodeList tags = doc.getElementsByTagName(tagName);      if(tags != NULL) -   { -      for(int i = 0; i < tags->getLength(); i++) -     { -          DOM::Element tag = (const DOM::Element&)tags->item(i); -         DOM::Node parent = tag.getParentNode(); - -           if(parent != NULL && !DOMHelpers::isElement(parent, parentName)) -               parent.removeChild(tag); +    { +        for(int i = 0; i < tags->getLength(); i++) +        { +            DOM::Element tag = (const DOM::Element&)tags->item(i); +            DOM::Node parent = tag.getParentNode(); +            if(parent != NULL && !DOMHelpers::isElement(parent, parentName)) +                parent.removeChild(tag);  #if 0              else if(tag.hasChildNodes()) -           { -              DOM::NodeList children = tag.getChildNodes(); -              if(children != NULL && children->getLength() == 1) -             { -                  DOM::Node child = children->item(0); -                   if(child != NULL && !child.hasChildNodes() && -                    DOMHelpers::isElement(child, kElBlock)) -                     parent.removeChild(tag); -               } -          } +            { +                DOM::NodeList children = tag.getChildNodes(); +                if(children != NULL && children->getLength() == 1) +                { +                    DOM::Node child = children->item(0); +                    if(child != NULL && !child.hasChildNodes() && +                       DOMHelpers::isElement(child, kElBlock)) +                        parent.removeChild(tag); +                } +            }  #endif - -      } -  } +        } +    }  }  void RtfFixups::breakLists(DOM::Document& doc)  { -  // Now group stuff in destinations into tables - DOM::NodeList destinations = doc.getElementsByTagName(kElDest); +    DOM::NodeList destinations = doc.getElementsByTagName(kElDest);      if(destinations != NULL) -   { -      for(int i = 0; i < destinations->getLength(); i++) -     { -          DOM::Element dest = (const DOM::Element&)destinations->item(i); +    { +        for(int i = 0; i < destinations->getLength(); i++) +        { +            DOM::Element dest = (const DOM::Element&)destinations->item(i); -           // Sanity Check +            // Sanity Check              if(dest == NULL)                 continue; -         // Go through the children of this destination -         DOM::Node child = dest.getFirstChild(); +            // Go through the children of this destination +            DOM::Node child = dest.getFirstChild(); -           DOM::Element list; -         DOM::Element e; +            DOM::Element list; +            DOM::Element e; -           wstring previd; +            wstring previd; -           while(child != NULL) -           { -              // If it's a block and has a cell attribute +            while(child != NULL) +            { +                // If it's a block ...                  if(DOMHelpers::isElement(child, kElBlock)) -             { -                  e = (DOM::Element&)child; +                { +                    e = (DOM::Element&)child; -                 // if it has a cell attribute -                  wstring listid = e.getAttribute(kAtList); -                  if(listid.length() > 0) +                    // ... and has a list attribute +                    wstring listid = e.getAttribute(kAtList); +                    if(listid.length() > 0)                      { -                      e.removeAttribute(kAtList); +                        e.removeAttribute(kAtList); -                       if(list == NULL || previd != listid) -                       { -                          list = doc.createElement(kElList); -                         list.setAttribute(kAtList, listid); +                        if(list == NULL || previd != listid) +                        { +                            list = doc.createElement(kElList); +                            list.setAttribute(kAtList, listid);                              dest.insertBefore(list, child);                              previd = listid; -                       } -                   } -                  else -                   { -                      list = NULL; -                       previd.erase(); +                        }                      } -              } +                    else +                    { +                        list = NULL; +                        previd.erase(); +                    } +                } -             // It's not a block +                // It's not a block                  if(list != NULL) -               { -                  list.appendChild(dest.removeChild(child)); -                 child = list; -              } +                { +                    list.appendChild(dest.removeChild(child)); +                    child = list; +                } -             child = child.getNextSibling(); +                child = child.getNextSibling();              } -      } -  } +        } +    }  }  void RtfFixups::fixStyles(const DOM::Document doc)  { -   DOM::NodeList styles = doc.getElementsByTagName(kElStyle); - if(styles != NULL) - { -      DOM::NodeList blocks = doc.getElementsByTagName(kElBlock); -     if(blocks != NULL) -     { -          for(int i = 0; i < blocks->getLength(); i++) -           { -              DOM::Element block = (const DOM::Element&)blocks->item(i); +    // Get all stylesheet styles +    DOM::NodeList styles = doc.getElementsByTagName(kElStyle); +    if(styles != NULL) +    { +        // Get list of blocks in the document +        DOM::NodeList blocks = doc.getElementsByTagName(kElBlock); +        if(blocks != NULL) +        { +            for(int i = 0; i < blocks->getLength(); i++) +            { +                DOM::Element block = (const DOM::Element&)blocks->item(i);                  if(block == NULL || !block.hasAttribute(kElStyle)) -                 continue; - -             for(int j = 0; j < styles->getLength(); j++) -               { -                  DOM::Element style = (const DOM::Element&)styles->item(j); -                 if(style != NULL) -                  { -                      if(style.getAttribute(kAtId) == block.getAttribute(kElStyle)) -                      { -                          wstring name = style.getAttribute(kAtName); +                    continue; + +                // Lookup block styles +                for(int j = 0; j < styles->getLength(); j++) +                { +                    DOM::Element style = (const DOM::Element&)styles->item(j); +                    if(style != NULL) +                    { +                        if(style.getAttribute(kAtId) == block.getAttribute(kElStyle)) +                        { +                            // And change to the name +                            wstring name = style.getAttribute(kAtName);                              if(name.length() > 0) -                              block.setAttribute(kElStyle, name); +                                block.setAttribute(kElStyle, name);                          } -                  } -              } -          } -      } - -     for(int i = 0; i < styles->getLength(); i++) -       { -          DOM::Element style = (const DOM::Element&)styles->item(i); -         if(style != NULL) -              style.removeAttribute(kAtId); -      } -   } - +                    } +                } +            } +        } +        // A little cleanup of the stylesheet styles +        for(int i = 0; i < styles->getLength(); i++) +        { +            DOM::Element style = (const DOM::Element&)styles->item(i); +            if(style != NULL) +                style.removeAttribute(kAtId); +        } +   }  }  void RtfFixups::breakTables(DOM::Document& doc)  { -   DOM::NodeList rows = doc.getElementsByTagName(kElRow); - if(rows != NULL) -   { -      for(int i = 0; i < rows->getLength(); i++) -     { -          DOM::Element row = (const DOM::Element&)rows->item(i); -         DOM::Node parent = row.getParentNode(); - -           if(parent == NULL) -             continue; - -         if(DOMHelpers::isElement(parent, kElBlock)) +    // Break rows out to destinations +    DOM::NodeList rows = doc.getElementsByTagName(kElRow); +    if(rows != NULL) +    { +        for(int i = 0; i < rows->getLength(); i++) +        { +            DOM::Element row = (const DOM::Element&)rows->item(i); +            DOM::Node parent = row.getParentNode(); + +            if(parent == NULL) +                continue; + +            if(DOMHelpers::isElement(parent, kElBlock))              { -              DOM::Node grandparent = parent.getParentNode(); +                DOM::Node grandparent = parent.getParentNode(); -               if(grandparent != NULL && !row.hasChildNodes()) +                if(grandparent != NULL && !row.hasChildNodes())                  { -                  if(row.getPreviousSibling() == NULL) -                       grandparent.insertBefore(parent.removeChild(row), parent); -                 else if(row.getNextSibling() == NULL) -                      DOMHelpers::insertAfter(grandparent, parent.removeChild(row), parent); -             } -          } - -         breakElement(row, kElDest); -        } -  } - +                    if(row.getPreviousSibling() == NULL) +                        grandparent.insertBefore(parent.removeChild(row), parent); +                    else if(row.getNextSibling() == NULL) +                        DOMHelpers::insertAfter(grandparent, parent.removeChild(row), parent); +                } +            } +            breakElement(row, kElDest); +        } +    } -  // Now group stuff in destinations into tables - DOM::NodeList destinations = doc.getElementsByTagName(kElDest); +    // Now group stuff in destinations into tables +    DOM::NodeList destinations = doc.getElementsByTagName(kElDest);      if(destinations != NULL) -   { -      for(int i = 0; i < destinations->getLength(); i++) -     { -          DOM::Element dest = (const DOM::Element&)destinations->item(i); +    { +        for(int i = 0; i < destinations->getLength(); i++) +        { +            DOM::Element dest = (const DOM::Element&)destinations->item(i); -           // Sanity Check +            // Sanity Check              if(dest == NULL)                 continue; -         // Go through the children of this destination -         DOM::Node child = dest.getFirstChild(); +            // Go through the children of this destination +            DOM::Node child = dest.getFirstChild(); -           DOM::Element table; +            DOM::Element table;              DOM::Element e; -           while(child != NULL) -           { -              // If it's a block and has a cell attribute +            while(child != NULL) +            { +                // If it's a block and has a cell attribute                  if(DOMHelpers::isElement(child, kElBlock)) -             { -                  e = (DOM::Element&)child; - -                 // if it has a cell attribute -                  if(e.getAttribute(kAtCell).length() > 0) -                   { -                      e.removeAttribute(kAtCell); - -                       if(table == NULL) -                      { -                          table = doc.createElement(kElTable); -                           dest.insertBefore(table, child); -                       } -                   } -                  else -                   { -                      table = NULL; -                  } -              } - -             // It's not a block +                { +                    e = (DOM::Element&)child; + +                    // if it has a cell attribute +                    if(e.getAttribute(kAtCell).length() > 0) +                    { +                        e.removeAttribute(kAtCell); + +                        if(table == NULL) +                        { +                            table = doc.createElement(kElTable); +                            dest.insertBefore(table, child); +                        } +                    } +                    else +                    { +                        table = NULL; +                    } +                } + +                // It's not a block                  if(table != NULL) -              { -                  table.appendChild(dest.removeChild(child)); +                { +                    table.appendChild(dest.removeChild(child));                      child = table; -             } +                } -             child = child.getNextSibling(); +                child = child.getNextSibling();              } -      } -  } +        } +    }  }  void RtfFixups::removeTags(const DOM::Document& doc)  { -  // Go through the list of nodes +    // Go through the list of nodes      for(const char** t = kRemoveTags; *t != NULL; t++) - { -      DOM::NodeList elements = doc.getElementsByTagName(*t); -     if(elements != NULL) -       { -          for(int j = 0; j < elements->getLength(); j++) -         { -              DOM::Element el = (const DOM::Element&)elements->item(j); -              DOM::Node parent = el->getParentNode(); - -               if(parent == NULL) -                 continue; - -             while(el.hasChildNodes()) -                  parent.insertBefore(el.removeChild(el.getFirstChild()), el); - -              parent.removeChild(el); +    { +        DOM::NodeList elements = doc.getElementsByTagName(*t); +        if(elements != NULL) +        { +            for(int j = 0; j < elements->getLength(); j++) +            { +                DOM::Element el = (const DOM::Element&)elements->item(j); +                DOM::Node parent = el->getParentNode(); + +                if(parent == NULL) +                    continue; + +                while(el.hasChildNodes()) +                    parent.insertBefore(el.removeChild(el.getFirstChild()), el); + +                parent.removeChild(el);              } -      } -  } +        } +    }  }  void RtfFixups::fixLists(const DOM::Document doc)  { -   DOM::NodeList lists = doc.getElementsByTagName(kElList); -   if(lists != NULL) -  { -      DOM::NodeList listdefs = doc.getElementsByTagName(kElListdef); -     if(listdefs != NULL) -       { -          for(int i = 0; i < listdefs->getLength(); i++) -         { -              DOM::Element listdef = (const DOM::Element&)listdefs->item(i); +    // Get all the lists +    DOM::NodeList lists = doc.getElementsByTagName(kElList); +    if(lists != NULL) +    { +        // And all the list definitions +        DOM::NodeList listdefs = doc.getElementsByTagName(kElListdef); +        if(listdefs != NULL) +        { +            for(int i = 0; i < listdefs->getLength(); i++) +            { +                DOM::Element listdef = (const DOM::Element&)listdefs->item(i);                  if(listdef == NULL || !listdef.hasAttribute(kAtList)) -                  continue; +                    continue; -             for(int j = 0; j < lists->getLength(); j++) +                for(int j = 0; j < lists->getLength(); j++)                  { -                  DOM::Element list = (const DOM::Element&)lists->item(j); -                   if(list != NULL) -                   { -                      if(list.getAttribute(kAtList) == listdef.getAttribute(kAtList)) +                    DOM::Element list = (const DOM::Element&)lists->item(j); +                    if(list != NULL) +                    { +                        if(list.getAttribute(kAtList) == listdef.getAttribute(kAtList))                          { -                          DOMHelpers::copyAttributes(listdef, list, kHideList); -                          list.removeAttribute(kAtList); -                     } -                  } -              } -          } -      } -  } +                            // And copy all the attributes from the list definition to the list +                            DOMHelpers::copyAttributes(listdef, list, kHideList); +                            list.removeAttribute(kAtList); +                        } +                    } +                } +            } +        } +    }  }  void RtfFixups::fixBlocks(const DOM::Document doc)  { -   // First break out all the paragraphs to the destination level - DOM::NodeList blocks = doc.getElementsByTagName(kElBlock); - if(blocks != NULL) - { -      string fix; +    // Get all the blocks +    DOM::NodeList blocks = doc.getElementsByTagName(kElBlock); +    if(blocks != NULL) +    { +        string fix;          wstring val; -      for(int i = 0; i < blocks->getLength(); i++) -       { -          DOM::Element block = (const DOM::Element&)blocks->item(i); -         DOM::Node parent = block.getParentNode(); - -         if(parent == NULL) -             continue; +        for(int i = 0; i < blocks->getLength(); i++) +        { +            DOM::Element block = (const DOM::Element&)blocks->item(i); +            DOM::Node parent = block.getParentNode(); -         fix.resize(0); -         val.resize(0); +            if(parent == NULL) +                continue; -           val = block.getAttribute(kAtFix); -          if(val.length() > 0) -               block.removeAttribute(kAtFix); +            fix.resize(0); +            val.resize(0); +            // Figure out what kind of element they want block fixed to +            val = block.getAttribute(kAtFix); +            if(val.length() > 0) +                block.removeAttribute(kAtFix); -           if(val.length() > 0) -           { -              val = block.getAttributeNS("", kAtFix); +            // BUG: Sablotron bug work around +            if(val.length() > 0) +            { +                val = block.getAttributeNS("", kAtFix);                  if(val.length() > 0) -                   block.removeAttributeNS("", kAtFix); -           } +                    block.removeAttributeNS("", kAtFix); +            } -         if(val.length() > 0) -               DOM::transcode16to8(val, fix); +            if(val.length() > 0) +                DOM::transcode16to8(val, fix);              if(fix.length() == 0) -              fix = kElPara; +                fix = kElPara; +            // Create duplicate of the 'fix' element              DOM::Element el = doc.createElement(fix); -           DOMHelpers::copyAttributes(block, el, NULL); +            DOMHelpers::copyAttributes(block, el, NULL); -          while(block.hasChildNodes()) -               el.appendChild(block.removeChild(block.getFirstChild())); +            // Replace block with the given 'fix' element +            while(block.hasChildNodes()) +                el.appendChild(block.removeChild(block.getFirstChild())); -         parent.replaceChild(el, block); +            parent.replaceChild(el, block);          } -  } +    }  } -/** - * Consolidates a certain tag types at the end of the document - */  void RtfFixups::consolidateEndTags(DOM::Document& doc)  {      DOM::Element top = doc.getDocumentElement(); @@ -721,9 +670,6 @@ void RtfFixups::consolidateEndTags(DOM::Document& doc)      }  } -/** - * Consolidates a certain tag types at the start of the document - */  void RtfFixups::consolidateStartTags(DOM::Document& doc)  {      DOM::Element top = doc.getDocumentElement(); @@ -759,10 +705,7 @@ void RtfFixups::consolidateStartTags(DOM::Document& doc)      }  } -/** - * Removes adjacent duplicate nodes of certain names - */ -void RtfFixups::removeDuplicates(const DOM::Document& doc) +void RtfFixups::combineDuplicates(const DOM::Document& doc)  {      bool found; diff --git a/src/xmlfixups.h b/src/xmlfixups.h index a250c5a..1716925 100644 --- a/src/xmlfixups.h +++ b/src/xmlfixups.h @@ -41,23 +41,95 @@  #include "sablo.h" -class RtfFixups +/* + * XMLFixups + * + * Because RTF is so 'different' (read: brain dead) we need to do all sorts + * of antics to get it into a nice XML format. Some of the XML Composition + * is done in XMLComposer, but whatever can't be done there as we're parsing + * gets done here after the fact. + * + * These functions are called from XMLComposer::endDocument and massage the + * resulting XML DOM into shape. + */ +class XMLFixups  {  public: -    // Cleanup Functions +    // Replace blocks with 'fix' elements like paragraphs      static void fixBlocks(DOM::Document doc); + +    // Pass 2 list fixups      static void fixLists(const DOM::Document doc); + +    // Pass 2 style fixups      static void fixStyles(const DOM::Document doc); + +    /* +     * Breaks a paragraph up through a previous level. Calls itself +     * recursively to break paragraphs totally free up to containing +     * destination. +     * +     * For example: +     * +     * <dest> +     *     This is <b> a <block fix="para"/> +     *    test of </b> your concentration. +     *  </dest> +     * +     * Becomes: +     * +     * <dest> +     *     This is <b> a </b><block fix="para"/> +     *   <b>test of </b> your concentration. +     * </dest> +     */      static bool breakElement(const DOM::Element& el, const string& contain); + +    // Break all tags of a given type to a previous level (see above)      static void breakBreak(DOM::Document& doc, const string& contain, const string& tag); + +    // Used to break tables cells and rows into blocks (but more complicated) +    static void breakTags(DOM::Document& doc, const string& parentName, const string& tagName); + +    // Fixes and combines list elements with the same id      static void breakLists(DOM::Document& document); + +    // Used to find and create tables and perform initial break out      static void breakTables(DOM::Document& document); -    static void breakTags(DOM::Document& doc, const string& parentName, const string& tagName); + + +    /* +     * Changes from a marker based paragraph system to a contained +     * paragraph system. Also applies paragraph attributes to the +     * appropriate paragraph. +     * +     * For example: +     * +     * <dest> +     *   This <blockattr style="10"> is <b> a <block fix="para"/> +     *     test of </b> your concentration. +     *  </dest> +     * +     * Becomes: +     * +     * <para style="10"> This is <b> a </b></para> +     * <para><b>test of </b> your concentration.</para> +     */      static void breakBlocks(DOM::Document& document); + +    // Wrap certain tags in a wrapper tag of given name      static void wrapTags(DOM::Document& document, const string& tagName, const string& wrapName); + +    // Remove certain tags from document      static void removeTags(const DOM::Document& doc); -    static void removeDuplicates(const DOM::Document& doc); + +    // Combines certain adjacent duplicate tags +    static void combineDuplicates(const DOM::Document& doc); + +    // Consolidates a certain tag types at the beginning of the document      static void consolidateStartTags(DOM::Document& doc); + +    // Consolidates a certain tag types at the end of the document      static void consolidateEndTags(DOM::Document& doc);  };  | 
