summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStef <stef@ws.local>2004-07-22 22:30:48 +0000
committerStef <stef@ws.local>2004-07-22 22:30:48 +0000
commitae28df20927567f2d62b575ed4aef2d127569265 (patch)
tree81dba160573131e476cad13e8421aff07ccb5d11
parentf2b46a176e02637d9e8ab65433ac4bdb2dd9d8d5 (diff)
- Comments and formatting changes.
-rw-r--r--src/basehandler.cpp4
-rw-r--r--src/basehandler.h2
-rw-r--r--src/domhelpers.cpp167
-rw-r--r--src/domhelpers.h18
-rw-r--r--src/levelhandler.cpp26
-rw-r--r--src/levelhandler.h29
-rw-r--r--src/reference.h14
-rw-r--r--src/rtfformatting.h8
-rw-r--r--src/rtfparser.cpp294
-rw-r--r--src/rtfparser.h97
-rw-r--r--src/rtfx.cpp20
-rw-r--r--src/xmlcomposehelpers.h73
-rw-r--r--src/xmlcomposer.cpp475
-rw-r--r--src/xmlcomposer.h99
-rw-r--r--src/xmlfixups.cpp859
-rw-r--r--src/xmlfixups.h80
16 files changed, 1298 insertions, 967 deletions
diff --git a/src/basehandler.cpp b/src/basehandler.cpp
index 4820051..53e5a18 100644
--- a/src/basehandler.cpp
+++ b/src/basehandler.cpp
@@ -1,4 +1,4 @@
-/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXx/*
* Copyright (c) 2004, Nate Nielsen
* All rights reserved.
*
@@ -36,6 +36,8 @@
*
*/
+// DELETE
+
#include "usuals.h"
#include "basehandler.h"
diff --git a/src/basehandler.h b/src/basehandler.h
index 55c6e24..3becdf1 100644
--- a/src/basehandler.h
+++ b/src/basehandler.h
@@ -36,6 +36,8 @@
*
*/
+// DELETE
+
// BaseHandler
// Implements an RtfHandler for other classes (LevelHandler
// and RtfParser) to override.
diff --git a/src/domhelpers.cpp b/src/domhelpers.cpp
index 7b06f55..6cf8052 100644
--- a/src/domhelpers.cpp
+++ b/src/domhelpers.cpp
@@ -40,142 +40,139 @@
#include "domhelpers.h"
#include "tags.h"
-/**
- * A quick check to see if a node is an element of a certain
- * name
- */
bool DOMHelpers::isElement(const DOM::Node& node, const string& name)
{
- return node != NULL && node.getNodeType() == DOM::Node::ELEMENT_NODE &&
- node.getNodeName() == name;
+ return node != NULL && node.getNodeType() == DOM::Node::ELEMENT_NODE &&
+ node.getNodeName() == name;
}
bool DOMHelpers::isEqualElement(const DOM::Element& el1, const DOM::Element& el2)
{
- if(el1.getNodeName() == el2.getNodeName())
- return false;
+ if(el1.getNodeName() == el2.getNodeName())
+ return false;
- DOM::NamedNodeMap at1 = el1.getAttributes();
- DOM::NamedNodeMap at2 = el2.getAttributes();
+ // Compare attributes
+ DOM::NamedNodeMap at1 = el1.getAttributes();
+ DOM::NamedNodeMap at2 = el2.getAttributes();
- if(at1 == NULL && at2 == NULL)
- return true;
+ if(at1 == NULL && at2 == NULL)
+ return true;
- if(at1 == NULL || at2 == NULL ||
- at1->getLength() != at2->getLength())
- return false;
+ if(at1 == NULL || at2 == NULL ||
+ at1->getLength() != at2->getLength())
+ return false;
- for(int i = 0; i < at1->getLength(); i++)
- {
- DOM::Attr attr1 = (const DOM::Attr&)at1->item(0);
- if(attr1 != NULL)
- return false;
+ for(int i = 0; i < at1->getLength(); i++)
+ {
+ DOM::Attr attr1 = (const DOM::Attr&)at1->item(0);
+ if(attr1 != NULL)
+ return false;
- DOM::Attr attr2 = (const DOM::Attr&)at2->getNamedItem(attr1.getNodeName());
+ DOM::Attr attr2 = (const DOM::Attr&)at2->getNamedItem(attr1.getNodeName());
if(attr2 != NULL)
- return false;
+ return false;
- if(attr1.getNodeValue() == attr2.getNodeValue())
- return false;
- }
+ if(attr1.getNodeValue() == attr2.getNodeValue())
+ return false;
+ }
- return true;
+ return true;
}
-/**
- * Gets the pertinent ancestor of this node, or returns null
- * if not found.
- */
DOM::Element DOMHelpers::getContainingElement(const DOM::Node& node, const string& name)
{
DOM::Node n = node;
- while(true)
+ while(true)
{
- n = n.getParentNode();
- if(n == NULL)
- break;
+ n = n.getParentNode();
+ if(n == NULL)
+ break;
+ // Match parent to given name
if(isElement(n, name))
- return (DOM::Element&)n;
- }
+ return (DOM::Element&)n;
+ }
- return DOM::Element();
+ return DOM::Element();
}
bool isNsAttr(const string& name)
{
- return strncmp(name.c_str(), kNSPrefix, strlen(kNSPrefix)) ? false : true;
+ // Check if this attribute is a xmlns: attribute
+ return strncmp(name.c_str(), kNSPrefix, strlen(kNSPrefix)) ? false : true;
}
void DOMHelpers::copyAttributes(const DOM::Element& src, DOM::Element& dest,
- const char** hideList)
+ const char** hideList)
{
- // Now get both sets of attributes
- DOM::NamedNodeMap srcMap = src.getAttributes();
+ // Get both sets of attributes
+ DOM::NamedNodeMap srcMap = src.getAttributes();
DOM::NamedNodeMap destMap = dest.getAttributes();
- if(srcMap == NULL || destMap == NULL)
- return;
+ if(srcMap == NULL || destMap == NULL)
+ return;
- // And copy them from one to the other
+ // And copy them from one to the other
for(int j = 0; j < srcMap->getLength(); j++)
- {
- DOM::Node attr = srcMap->item(j);
- if(attr != NULL)
- {
- // BUG: Sablotron seems to have a bug in it's
- // setAttributeNode implementation. It always
- // adds a blank namespace
- // attr = attr.cloneNode(false);
- // if(attr != NULL)
- // destMap.setNamedItem(attr);
-
- string name = attr.getNodeName();
-
- if(hideList)
- {
-
- for(const char** t = hideList; *t != NULL; t++)
+ {
+ DOM::Node attr = srcMap->item(j);
+ if(attr != NULL)
+ {
+ string name = attr.getNodeName();
+
+ if(hideList)
+ {
+ for(const char** t = hideList; *t != NULL; t++)
{
- if(name == *t)
- name.erase();
- }
- }
-
- if(name.length() > 0 && !isNsAttr(name))
- dest.setAttribute(attr.getNodeName(), attr.getNodeValue());
+ if(name == *t)
+ name.erase();
+ }
+ }
+
+ // BUG: Sablotron seems to have a bug in it's
+ // setAttributeNode implementation. It always
+ // adds a blank namespace
+ //
+ // attr = attr.cloneNode(false);
+ // if(attr != NULL)
+ // destMap.setNamedItem(attr);
+
+ // We never copy xmlns: attributes
+ if(name.length() > 0 && !isNsAttr(name))
+ dest.setAttribute(attr.getNodeName(), attr.getNodeValue());
}
- }
+ }
}
DOM::Element DOMHelpers::getPriorElement(const DOM::Node& node, const string& name)
{
- DOM::Node n = node;
+ DOM::Node n = node;
- while(n != NULL)
- {
- if(isElement(n, name))
- return (DOM::Element&)n;
+ while(n != NULL)
+ {
+ // Note that we return ourselves if it matches
+ if(isElement(n, name))
+ return (DOM::Element&)n;
- n = n.getPreviousSibling();
+ n = n.getPreviousSibling();
}
- DOM::Node parent = node.getParentNode();
+ DOM::Node parent = node.getParentNode();
- if(parent == NULL)
- return DOM::Element();
- else
- return getPriorElement(parent, name);
+ if(parent == NULL)
+ return DOM::Element();
+ else
+ return getPriorElement(parent, name);
}
void DOMHelpers::insertAfter(DOM::Node& parent, const DOM::Node& node,
- const DOM::Node& ref)
+ const DOM::Node& ref)
{
DOM::Node sibling = ref.getNextSibling();
- if(sibling == NULL)
+
+ if(sibling == NULL)
parent.appendChild(node);
- else
- parent.insertBefore(node, sibling);
+ else
+ parent.insertBefore(node, sibling);
}
-
diff --git a/src/domhelpers.h b/src/domhelpers.h
index 16afd79..043ffd4 100644
--- a/src/domhelpers.h
+++ b/src/domhelpers.h
@@ -41,16 +41,32 @@
#include "sablo.h"
+/*
+ * DOMHelpers
+ *
+ * A collection of functions for doing some things with an XML DOM.
+ * Used mainly by XMLComposer.
+ */
class DOMHelpers
{
public:
- // DOM Helper Functions
+ // Check if given node is an element with a certain name
static bool isElement(const DOM::Node& node, const string& name);
+
+ // Check if two elements have the same name and attributes
static bool isEqualElement(const DOM::Element& el1, const DOM::Element& el2);
+
+ // Copy attributes from one element to another optionaly ignoring some
static void copyAttributes(const DOM::Element& src, DOM::Element& dest, const char** hideList);
+
+ // Insert a child node after a given reference node
static void insertAfter(DOM::Node& parent, const DOM::Node& node, const DOM::Node& ref);
+
+ // Get containing element of a given name
static DOM::Element getContainingElement(const DOM::Node& node, const string& name);
+
+ // Get previous element (in XML flow) of a given name
static DOM::Element getPriorElement(const DOM::Node& node, const string& name);
};
diff --git a/src/levelhandler.cpp b/src/levelhandler.cpp
index 7fc2dd6..cdd3337 100644
--- a/src/levelhandler.cpp
+++ b/src/levelhandler.cpp
@@ -39,9 +39,12 @@
#include "usuals.h"
#include "levelhandler.h"
+/* ----------------------------------------------------------------------------------
+ * CONSTRUCTION
+ */
LevelHandler::LevelHandler()
{
-
+ m_reader = NULL;
}
LevelHandler::~LevelHandler()
@@ -54,12 +57,18 @@ void LevelHandler::clear()
m_curLevel.release();
m_topLevel.release();
- BaseHandler::clear();
+ m_parser = NULL;
}
-void LevelHandler::startDocument(RtfReader* reader)
+/* ----------------------------------------------------------------------------------
+ * OVERRIDES
+ */
+
+void LevelHandler::startDocument(RtfParser* parser)
{
- BaseHandler::startDocument(reader);
+ clear();
+
+ m_parser = parser;
m_topLevel = new Level;
m_curLevel = m_topLevel;
@@ -67,12 +76,11 @@ void LevelHandler::startDocument(RtfReader* reader)
void LevelHandler::endDocument()
{
- BaseHandler::endDocument();
+
}
void LevelHandler::groupStart()
{
- BaseHandler::groupStart();
ASSERT(m_curLevel);
pushLevel();
}
@@ -81,9 +89,12 @@ void LevelHandler::groupEnd()
{
ASSERT(m_curLevel);
popLevel();
- BaseHandler::groupEnd();
}
+/* ----------------------------------------------------------------------------------
+ * OPERATIONS
+ */
+
DOM::Element LevelHandler::getElement()
{
ASSERT(m_curLevel);
@@ -101,7 +112,6 @@ void LevelHandler::popLevel()
// Pull a level off the stack
LevelPtr level = m_curLevel->getPrevious();
- // TODO: report errors here
if(level)
m_curLevel = level;
}
diff --git a/src/levelhandler.h b/src/levelhandler.h
index 3077c13..bee1a8e 100644
--- a/src/levelhandler.h
+++ b/src/levelhandler.h
@@ -44,36 +44,49 @@
#define __LEVELHANDLER_H__
#include "rtfreader.h"
-#include "basehandler.h"
-#include "rtfparsehelpers.h"
#include "reference.h"
+#include "rtfparsehelpers.h"
+/*
+ * LevelHandler
+ *
+ * A base class that manages a set of Levels (see XMLComposeHelpers.cpp)
+ * based on the RTF groups seen.
+ */
class LevelHandler
- : public BaseHandler
+ : public RTFHandler
{
public:
LevelHandler();
virtual ~LevelHandler();
- virtual void startDocument(RtfReader* reader);
+ // Overrides
+ virtual void startDocument(RtfParser* parser);
virtual void endDocument();
virtual void groupStart();
virtual void groupEnd();
+ // Convenience function to get XML element from current level
virtual DOM::Element getElement();
+ // Create a new level on top of stack
void pushLevel();
+
+ // Pop top level and discard
void popLevel();
+
+ // Back out all the way past a given level
void rewindLevel(LevelPtr ptr);
- LevelPtr getLevel();
+ // Get the current level
+ LevelPtr getLevel();
protected:
virtual void clear();
- LevelPtr m_topLevel;
- LevelPtr m_curLevel;
+ LevelPtr m_topLevel; // First level
+ LevelPtr m_curLevel; // The current level
+ RtfParser* m_parser; // The parser we're listening to
};
-
#endif // __LEVELHANDLER_H__
diff --git a/src/reference.h b/src/reference.h
index 1e68515..1a78d4c 100644
--- a/src/reference.h
+++ b/src/reference.h
@@ -39,6 +39,11 @@
#ifndef __REFERENCE_H__
#define __REFERENCE_H__
+/*
+ * Reference
+ *
+ * A basic reference counting pointer
+ */
template<typename C>
class Reference
{
@@ -60,9 +65,7 @@ public:
}
~Reference()
- {
- release();
- }
+ { release(); }
Reference(const Reference& orig)
{
@@ -126,6 +129,11 @@ private:
C* m_ptr;
};
+/*
+ * Instance
+ *
+ * A basic reference counted object.
+ */
class Instance
{
public:
diff --git a/src/rtfformatting.h b/src/rtfformatting.h
index 6fbcf57..bb49cf1 100644
--- a/src/rtfformatting.h
+++ b/src/rtfformatting.h
@@ -39,6 +39,13 @@
#ifndef __RTFTEXPROPERTIES_H__
#define __RTFTEXPROPERTIES_H__
+/*
+ * RtfFormatting
+ *
+ * For keeping track of all the various transient formatting options
+ * within a given Rtf group. Any supported text options (not block)
+ * should be added here.
+ */
class RtfFormatting
{
public:
@@ -161,7 +168,6 @@ protected:
int m_list;
bool m_inTbl;
-
// TODO: Character styles
};
diff --git a/src/rtfparser.cpp b/src/rtfparser.cpp
index 6d07c80..78945b6 100644
--- a/src/rtfparser.cpp
+++ b/src/rtfparser.cpp
@@ -36,6 +36,8 @@
*
*/
+// RENAME RTFParser.cpp
+
#include "usuals.h"
#include <stdlib.h>
@@ -43,11 +45,11 @@
#include "rtfreader.h"
-const int RtfHandler::kAsterisk = 0x00000001;
-const int RtfHandler::kHasParam = 0x00000002;
-const int RtfHandler::kIsEncoded = 0x00000004;
+/* ----------------------------------------------------------------------------------
+ * CONSTRUCTION
+ */
-RtfReader::RtfReader()
+RTFParser::RTFParser()
{
m_handler = NULL;
m_depth = 0;
@@ -57,12 +59,17 @@ RtfReader::RtfReader()
m_uniEatStack.push(0);
}
-RtfReader::~RtfReader()
+RTFParser::~RTFParser()
{
}
-bool RtfReader::parse(string fileName)
+
+/* ----------------------------------------------------------------------------------
+ * PUBLIC METHODS
+ */
+
+bool RTFParser::parse(string fileName)
{
FILE* file = fopen(fileName.c_str(), "r");
if(!file)
@@ -75,26 +82,134 @@ bool RtfReader::parse(string fileName)
return ret;
}
-void RtfReader::emptyData(RtfContext& cx)
+bool RTFParser::parse(FILE* file)
+{
+ int ch = 0;
+
+ // The group depth
+ m_depth = 0;
+ m_parseErrors = "";
+
+ RtfContext cx;
+ cx.isData = false;
+ cx.file = file;
+ cx.data = L"";
+
+ if(m_handler)
+ m_handler->startDocument(this);
+
+ while(1)
+ {
+ ch = fgetc(file);
+ if(ch == EOF)
+ goto done;
+
+ // TODO: Do we need this ?
+ if(!cx.isData)
+ {
+ switch(ch)
+ {
+
+ // Starting a control word
+ case '\\':
+ if(!parseControlWord(cx))
+ goto done;
+ break;
+
+ // Starting an RTF group
+ case '{':
+ {
+ // Send all previous data
+ flushData(cx);
+
+ // Handle any unicode destinations properly
+ m_uniEatStack.push(m_uniEatStack.top());
+
+ if(m_handler)
+ m_handler->groupStart();
+
+ m_depth++;
+ }
+ break;
+
+ case '}':
+ {
+ // Send all previous data
+ flushData(cx);
+
+ if(m_handler)
+ m_handler->groupEnd();
+
+ // Handle any unicode destinations properly
+ if(!m_uniEatStack.empty())
+ m_uniEatStack.pop();
+
+ m_depth--;
+ }
+ break;
+
+ default:
+ cx.isData = true;
+ break;
+ }
+ }
+
+ if(cx.isData)
+ {
+ // We translate tabs into the appropriate control word
+ if(ch == '\t')
+ sendControlWord(cx, "tab", 0, -1);
+
+ // line endings aren't used
+ else if(!strchr("\r\n", ch))
+ sendData(cx, ch);
+
+ cx.isData = false;
+ }
+ }
+
+
+done:
+
+ if(m_depth != 0)
+ m_parseErrors.append("unmatched braces\n");
+
+ if(m_handler)
+ m_handler->endDocument();
+
+ // If any parse errors return failure
+ return m_parseErrors.empty();
+}
+
+
+/* ----------------------------------------------------------------------------------
+ * HANDLER CALLS
+ */
+
+void RTFParser::flushData(RtfContext& cx)
{
if(!cx.data.empty())
{
if(m_handler)
m_handler->charData(cx.data);
+
cx.data.resize(0);
}
}
-void RtfReader::sendData(RtfContext& cx, wchar_t ch)
+void RTFParser::sendData(RtfContext& cx, wchar_t ch)
{
+ // Skip unicode chars we've been asked to
if(m_uniEat > 0)
m_uniEat--;
+
else
cx.data.append(1, ch);
}
-void RtfReader::sendData(RtfContext& cx, wstring data)
+void RTFParser::sendData(RtfContext& cx, wstring data)
{
+ // Skip any unicode chars we've been asked to
if(m_uniEat > 0)
{
int len = data.size();
@@ -110,16 +225,24 @@ void RtfReader::sendData(RtfContext& cx, wstring data)
}
}
-void RtfReader::sendControlWord(RtfContext& cx, string cw, int flags, int param)
+void RTFParser::sendControlWord(RtfContext& cx, string cw, int flags, int param)
{
- emptyData(cx);
+ flushData(cx);
+
if(m_handler)
m_handler->controlWord(cw, flags, param);
}
-bool RtfReader::parseHexChar(RtfContext& cx, int num)
+
+/* ----------------------------------------------------------------------------------
+ * PARSE HELPERS
+ */
+
+bool RTFParser::parseHexChar(RtfContext& cx, int num)
{
string data;
+
+ // Get num chars and put them in the string
for(int i = 0; i < num; i++)
{
char ch = fgetc(cx.file);
@@ -139,6 +262,7 @@ bool RtfReader::parseHexChar(RtfContext& cx, int num)
}
}
+ // If parsing hex, then convert to appropriate unicode
if(m_parseHex)
{
char* end = NULL;
@@ -148,6 +272,9 @@ bool RtfReader::parseHexChar(RtfContext& cx, int num)
else
m_parseErrors.append("invalid hex char: " + data + "\n");
}
+
+ // TODO: Why would we ever want to do this?
+ // Otherwise just send as a hex control word
else
{
sendControlWord(cx, data, RtfHandler::kIsEncoded, -1);
@@ -156,7 +283,7 @@ bool RtfReader::parseHexChar(RtfContext& cx, int num)
return true;
}
-bool RtfReader::parseControlWord(RtfContext& cx)
+bool RTFParser::parseControlWord(RtfContext& cx)
{
bool isAsterisk = false;
string controlword;
@@ -171,7 +298,7 @@ bool RtfReader::parseControlWord(RtfContext& cx)
bool empty = controlword.empty();
// Part of the name of a control word
- // NOTE: Although the RTF specification prohibits upercase
+ // NOTE: Although the RTF specification prohibits uppercase
// control words, MS Word uses them :-/
if(ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z')
controlword.append(1, (char)ch);
@@ -211,16 +338,16 @@ bool RtfReader::parseControlWord(RtfContext& cx)
break;
}
- // Escaped braces
- else if(empty && ch == '{')
- {
- sendData(cx, L'{');
- }
+ // Escaped braces
+ else if(empty && ch == '{')
+ {
+ sendData(cx, L'{');
+ }
- else if(empty && ch == '}')
- {
- sendData(cx, L'}');
- }
+ else if(empty && ch == '}')
+ {
+ sendData(cx, L'}');
+ }
// Non breaking space
else if(empty && ch == '~')
@@ -242,18 +369,7 @@ bool RtfReader::parseControlWord(RtfContext& cx)
param.append(1, (char)ch);
}
- // TODO: This looks real hokey and acts that
- // way too
-#if 0
- // An enter as the first character of a control word
- // makes a paragraph
- else if(strchr("\n\r", ch))
- {
- controlword = "par";
- break;
- }
-#endif
- // Space end a rtf code (but get eaten)
+ // Space at end a rtf code (it gets eaten)
else if(strchr(" ", ch))
break;
@@ -331,111 +447,3 @@ bool RtfReader::parseControlWord(RtfContext& cx)
return true;
}
-bool RtfReader::parse(FILE* file)
-{
- m_depth = 0;
- m_parseErrors = "";
-
- int ch = 0;
-
- RtfContext cx;
- cx.isData = false;
- cx.file = file;
- cx.data = L"";
-
- if(m_handler)
- m_handler->startDocument(this);
-
- while(1)
- {
- ch = fgetc(file);
- if(ch == EOF)
- goto done;
-
- // Type is undetermined so we figure it out here
- if(!cx.isData)
- {
- switch(ch)
- {
- case '\\':
- if(!parseControlWord(cx))
- goto done;
- break;
-
- case '{':
- {
- emptyData(cx);
-
- m_uniEatStack.push(m_uniEatStack.top());
-
- if(m_handler)
- m_handler->groupStart();
-
- m_depth++;
- }
- break;
-
- case '}':
- {
- emptyData(cx);
-
- if(m_handler)
- m_handler->groupEnd();
-
- if(!m_uniEatStack.empty())
- m_uniEatStack.pop();
-
- m_depth--;
- }
- break;
-
- default:
- cx.isData = true;
- break;
- }
- }
-
- if(cx.isData)
- {
- // We translate tabs into the appropriate control
- // word
- if(ch == '\t')
- sendControlWord(cx, "tab", 0, -1);
-
-// Don't need this code, the XML outputter
-// Takes care of it for us
-#if 0
- if(ch == '&')
- sendData(cx, L"&amp;");
-
- else if(ch == '\'')
- sendData(cx, L"&apos;");
-
- else if(ch == '"')
- sendData(cx, L"&quot;");
-
- else if(ch == '<')
- sendData(cx, L"&lt;");
-
- else if(ch == '>')
- sendData(cx, L"&gt;");
-#endif
-
- // enters a
- else if(!strchr("\r\n", ch))
- sendData(cx, ch);
-
- cx.isData = false;
- }
- }
-
-done:
-
- if(m_depth != 0)
- m_parseErrors.append("unmatched braces\n");
-
- if(m_handler)
- m_handler->endDocument();
-
- return m_parseErrors.empty();
-}
diff --git a/src/rtfparser.h b/src/rtfparser.h
index bfa2e59..6b9e10d 100644
--- a/src/rtfparser.h
+++ b/src/rtfparser.h
@@ -36,6 +36,8 @@
*
*/
+// RENAME RTFParser.h
+
#ifndef __RTFREADER_H__
#define __RTFREADER_H__
@@ -43,29 +45,22 @@
#include <stack>
#include <stdio.h>
-class RtfReader;
+class RtfHandler;
-class RtfHandler
-{
-public:
- virtual void startDocument(RtfReader* reader) = 0;
- virtual void endDocument() = 0;
- virtual void controlWord(const string& cw, int flags, int param) = 0;
- virtual void groupStart() = 0;
- virtual void groupEnd() = 0;
- virtual void charData(wstring data) = 0;
-
- static const int kAsterisk;
- static const int kHasParam;
- static const int kIsEncoded;
-};
-
-class RtfReader
+/*
+ * RTFParser
+ *
+ * A class that parses the RTF into it's tags and groups etc... It feeds its
+ * parsed data into into a handler interface (see below) for processing.
+ *
+ * Performs some basic conversion and sanity checking (unicode chars etc...)
+ * as well.
+ */
+class RtfParser
{
public:
- RtfReader();
- virtual ~RtfReader();
-
+ RtfParser();
+ virtual ~RtfParser();
bool parse(string fileName);
bool parse(FILE* file);
@@ -81,12 +76,13 @@ public:
void setUnicode(bool unicode);
protected:
- RtfHandler* m_handler;
- int m_depth;
- bool m_parseHex;
- string m_parseErrors;
+ RtfHandler* m_handler; // The current handler
+ int m_depth; // To keep track of group depth
+ bool m_parseHex; // Whether to parse hex chars or not
+ string m_parseErrors; // A list of all the RTF parsing errors
- // Unicode handling
+ // TODO: Look at exactly what this is doing
+ // Unicode char handling
bool m_parseUnicode;
typedef std::stack<int> StackInt;
StackInt m_uniEatStack;
@@ -94,19 +90,62 @@ protected:
private:
+ // TODO: Why aren't these just members?
+
struct RtfContext
{
- FILE* file;
- bool isData;
- wstring data;
+ FILE* file; // The current file being parsed
+ wstring data; // Any data stored up ready to be sent to handler
+ bool isData; // TODO: Do we need this?
};
+ // Parse helpers
bool parseControlWord(RtfContext& cx);
bool parseHexChar(RtfContext& cx, int num);
+
+ // Convenience functions for calling the handler
void sendControlWord(RtfContext& cx, string cw, int flags, int param);
void sendData(RtfContext& cx, wchar_t ch);
void sendData(RtfContext& cx, wstring data);
- void emptyData(RtfContext& cx);
+ void flushData(RtfContext& cx);
+};
+
+/*
+ * RTFHandler
+ *
+ * An interface called by RTFParser with tags and groups etc... parsed from
+ * an RTF file.
+ */
+class RtfHandler
+{
+public:
+
+ // Called at the beginning of the document
+ virtual void startDocument(RtfReader* reader) = 0;
+
+ // Called at the end of the document
+ virtual void endDocument() = 0;
+
+ // Called when an RTF control word is hit. Flags below.
+ // If control word has no param then param is -1
+ virtual void controlWord(const string& cw, int flags, int param) = 0;
+
+ // Called when an RTF group opened
+ virtual void groupStart() = 0;
+
+ // Called when an RTF group is closed
+ virtual void groupEnd() = 0;
+
+ // A block of character data encountered
+ virtual void charData(wstring data) = 0;
+
+ // Flags for controlWord
+ enum
+ {
+ kAsterisk = 0x00000001,
+ kHasParam = 0x00000002,
+ kIsEncoded = 0x00000004
+ };
};
#endif // __RTFREADER_H__
diff --git a/src/rtfx.cpp b/src/rtfx.cpp
index 7576d51..a620498 100644
--- a/src/rtfx.cpp
+++ b/src/rtfx.cpp
@@ -57,19 +57,25 @@ int main(int argc, char* argv[])
try
{
+ // The input file
FILE* file = fopen(argv[1], "rb");
if(!file)
{
- fprintf(stderr, "rtfx: couldn't open file: %s: %s\n", argv[1], strerror(errno));
+ fprintf(stderr, "rtfx: couldn't open rtf file: %s: %s\n", argv[1], strerror(errno));
return 1;
}
+ // Default options
RtfParserOptions options;
- RtfParser handler(options);
- RtfReader rtf;
- rtf.setHandler(&handler);
+ // Reads RTF tags and blocks
+ RtfParser rtf;
+
+ // Interprets tags and blocks from RTFParser
+ XMLComposer composer(options);
+ rtf.setHandler(&composer);
+
bool ret = rtf.parse(file);
fclose(file);
@@ -79,8 +85,9 @@ int main(int argc, char* argv[])
return 1;
}
-
- DOM::Document doc = handler.getDocument();
+ // TODO: This is disgusting. We need to bug the sablotron guys
+ // for a better way to serialize a document.
+ DOM::Document doc = composer.getDocument();
string xml = doc.serialize();
FILE* out = fopen(argv[2], "wb");
@@ -93,7 +100,6 @@ int main(int argc, char* argv[])
fwrite(xml.c_str(), 1, xml.length(), out);
fclose(out);
return 0;
-
}
catch(DOM::DOMException& e)
{
diff --git a/src/xmlcomposehelpers.h b/src/xmlcomposehelpers.h
index f91923e..9d36ef5 100644
--- a/src/xmlcomposehelpers.h
+++ b/src/xmlcomposehelpers.h
@@ -36,6 +36,9 @@
*
*/
+// RENAME: XMLComposeHelpers.h
+// Possibly merge with XMLComposer.h
+
#ifndef __RTFPARSEHELPERS_H__
#define __RTFPARSEHELPERS_H__
@@ -44,8 +47,15 @@
#include "sablo.h"
#include "rtfformatting.h"
-class RtfParser;
+class XMLComposer;
+/*
+ * Destination
+ *
+ * A destination is a small class that handles the character data found
+ * in the RTF document. Depending on the current context in the RTF
+ * different destinations are used.
+ */
class Destination :
public Instance
{
@@ -58,11 +68,17 @@ public:
virtual void done() {};
protected:
- RtfParser* m_parser;
- friend class RtfParser;
+ XMLComposer* m_composer;
+ friend class XMLComposer;
};
-
+/*
+ * Analyser
+ *
+ * An analyser is a small class that handles the RTF control words.
+ * Depending on the current context in the RTF different analysers
+ * are used.
+ */
class Analyser :
public Instance
{
@@ -79,16 +95,40 @@ public:
virtual void done() {};
protected:
- RtfParser* m_parser;
- friend class RtfParser;
+ XMLComposer* m_composer;
+ friend class XMLComposer;
};
class Level;
+// Reference counted pointers
typedef Reference<Destination> DestinationPtr;
typedef Reference<Analyser> AnalyserPtr;
typedef Reference<Level> LevelPtr;
+/*
+ * Level
+ *
+ * A level is a combination of a Destination, Analyser, XML Element and
+ * some other options. They're used in a stack to push and pop these as
+ * RTF groups are found.
+ *
+ * About the stack:
+ * Not each level has it's own options. If a certain option isn't found
+ * in the current level the previous one is looked up. That's what all
+ * the 'deep' stuff is about below:
+ *
+ * get* methods:
+ * When 'deep' is set look to previous levels for the given object if not
+ * found at the current level. When not set returns object in current level
+ * or null when none exists here.
+ *
+ * set* methods:
+ * When 'deep' is set then replace the object currently being used at it's
+ * own level. So if get* would return an object from a previous level, with
+ * deep set to true it would replace that object in the given level. When
+ * not set, then the object is set in the current level.
+ */
class Level :
public Instance
{
@@ -99,24 +139,33 @@ public:
LevelPtr getPrevious();
LevelPtr pushLevel();
+ // The current XML Element
+ // TODO: Add deep semantics here
DOM::Element getElement();
void setElement(DOM::Element element, bool deep = false);
+
+ // The current Analyser
AnalyserPtr getAnalyser(bool deep = true);
void setAnalyser(AnalyserPtr analyser, bool deep = false);
+
+ // The current Destination
DestinationPtr getDestination(bool deep = true);
void setDestination(DestinationPtr destination, bool deep = false);
+
+ // The current formatting options
RtfFormatting& getFormatting();
void setTextProperties(RtfFormatting& textProperties);
protected:
+
+ // Constructor for stacking levels
Level(const Level& level);
- LevelPtr m_previous;
- DOM::Element m_element;
- RtfFormatting m_text;
- DestinationPtr m_destination;
- AnalyserPtr m_analyser;
+ LevelPtr m_previous; // The previous level
+ DOM::Element m_element; // XML Element for this level
+ RtfFormatting m_text; // Formatting options for this level
+ DestinationPtr m_destination; // Destination for this level
+ AnalyserPtr m_analyser; // Analyser for this level
};
-
#endif //__RTFPARSEHELPERS_H__
diff --git a/src/xmlcomposer.cpp b/src/xmlcomposer.cpp
index 7e74f70..6072375 100644
--- a/src/xmlcomposer.cpp
+++ b/src/xmlcomposer.cpp
@@ -42,20 +42,21 @@
#include "domhelpers.h"
#include "tags.h"
-//////////////////////////////////////////////////////////////////////
-// Construction/Destruction
-//////////////////////////////////////////////////////////////////////
+/* ----------------------------------------------------------------------------------
+ * CONSTRUCTION
+ */
-RtfParser::RtfParser(const RtfParserOptions& options)
+XmlComposer::XmlComposer(const RtfParserOptions& options)
{
m_document = NULL;
memcpy(&m_options, &options, sizeof(options));
+ // All autocounters start at 1
for(int i = 0; i < AUTOCOUNT_MAX; i++)
m_autocount[i] = 1;
}
-RtfParser::~RtfParser()
+XmlComposer::~XmlComposer()
{
clear();
@@ -63,7 +64,7 @@ RtfParser::~RtfParser()
m_impl.release();
}
-void RtfParser::clear()
+void XmlComposer::clear()
{
if(m_document != NULL)
{
@@ -78,9 +79,15 @@ void RtfParser::clear()
LevelHandler::clear();
}
-void RtfParser::startDocument(RtfReader* reader)
+
+/* ----------------------------------------------------------------------------------
+ * HANDLER OVERRIDES
+ */
+
+void XmlComposer::startDocument(RtfReader* reader)
{
LevelHandler::startDocument(reader);
+ ASSERT(m_curLevel != NULL);
// Create a new document
m_document = m_impl.createDocument("", kElDoc, DOM::DocumentType());
@@ -88,7 +95,7 @@ void RtfParser::startDocument(RtfReader* reader)
// TODO: Throw error if document is null
ASSERT(m_document != NULL);
- ASSERT(m_curLevel != NULL);
+ // Hook up the top level element
m_curLevel->setElement(m_document.getDocumentElement(), true);
// Set the attributes on the top level
@@ -98,59 +105,106 @@ void RtfParser::startDocument(RtfReader* reader)
getTextFormatting().resetText();
}
-void RtfParser::endDocument()
+void XmlComposer::endDocument()
{
LevelHandler::endDocument();
- // Cleanup the tree
+ // Pass 0: Cleanup the tree
RtfFixups::removeDuplicates(m_document);
RtfFixups::consolidateStartTags(m_document);
RtfFixups::consolidateEndTags(m_document);
+
+ // Pass 1: Block breakout
RtfFixups::breakTables(m_document);
RtfFixups::breakTags(m_document, kElTable, kElRow);
RtfFixups::breakTags(m_document, kElRow, kElCell);
RtfFixups::wrapTags(m_document, kElCell, kElDest);
RtfFixups::breakBlocks(m_document);
RtfFixups::breakLists(m_document);
+
+ // Pass 2: Fixups
RtfFixups::fixLists(m_document);
RtfFixups::fixStyles(m_document);
RtfFixups::fixBlocks(m_document);
RtfFixups::removeTags(m_document);
RtfFixups::breakBreak(m_document, kElDoc, kElPage);
RtfFixups::breakBreak(m_document, kElDoc, kElSect);
+
+ // Pass 3: Final cleanup
RtfFixups::removeDuplicates(m_document);
return;
}
+void XmlComposer::charData(wstring data)
+{
+ ASSERT(m_curLevel != NULL);
+ DestinationPtr destination = m_curLevel->getDestination();
+ if(destination)
+ {
+ destination->charData(data);
+ }
+ else
+ {
+ // TODO: Change this so it sends char data to new destination
+ // We should always have a destination
+ destination = DestinationPtr(new Content);
+ setDestination(destination);
+ }
+}
+void XmlComposer::controlWord(const string& cw, int flags, int param)
+{
+ ASSERT(m_curLevel != NULL);
+ AnalyserPtr analyser = m_curLevel->getAnalyser();
+ if(analyser)
+ analyser->controlWord(cw, flags, param);
+}
+void XmlComposer::groupStart()
+{
+ LevelHandler::groupStart();
+ ASSERT(m_curLevel != NULL);
+ AnalyserPtr analyser = m_curLevel->getAnalyser();
+ if(analyser)
+ analyser->groupStart();
+}
-// -----------------------------------------------------------------------
-// Helper functions
+void XmlComposer::groupEnd()
+{
+ LevelHandler::groupEnd();
+
+ ASSERT(m_curLevel != NULL);
+ AnalyserPtr analyser = m_curLevel->getAnalyser();
+ if(analyser)
+ analyser->groupEnd();
+}
+
+
+/* ----------------------------------------------------------------------------------
+ * HELPER FUNCTIONS
+ */
-DOM::Element RtfParser::createElement(const string& name)
+DOM::Element XmlComposer::createElement(const string& name)
{
ASSERT(name.length() > 0);
return m_document.createElement(name);
-
- // TODO: Throw exception here if necessary
}
-void RtfParser::replaceElement(const DOM::Element& element)
+void XmlComposer::replaceElement(const DOM::Element& element)
{
ASSERT(m_curLevel != NULL);
m_curLevel->setElement(element, true);
}
-void RtfParser::pushElement(const DOM::Element& element)
+void XmlComposer::pushElement(const DOM::Element& element)
{
ASSERT(m_curLevel != NULL);
getElement().appendChild(element);
m_curLevel->setElement(element);
}
-DOM::Element RtfParser::popElement()
+DOM::Element XmlComposer::popElement()
{
DOM::Element element = getElement();
ASSERT(m_curLevel != NULL);
@@ -163,7 +217,7 @@ DOM::Element RtfParser::popElement()
return element;
}
-void RtfParser::setAttribute(const string& name, const wstring& value, DOM::Element el)
+void XmlComposer::setAttribute(const string& name, const wstring& value, DOM::Element el)
{
ASSERT(name.length() > 0);
if(el == NULL)
@@ -171,7 +225,7 @@ void RtfParser::setAttribute(const string& name, const wstring& value, DOM::Elem
el.setAttribute(name, value);
}
-void RtfParser::setAttribute(const string& name, int value, DOM::Element el)
+void XmlComposer::setAttribute(const string& name, int value, DOM::Element el)
{
ASSERT(name.length() > 0);
if(el == NULL)
@@ -179,153 +233,116 @@ void RtfParser::setAttribute(const string& name, int value, DOM::Element el)
el.setAttribute(name, formatInt(value));
}
-void RtfParser::setDestination(DestinationPtr dest)
+void XmlComposer::setDestination(DestinationPtr dest)
{
ASSERT(m_curLevel);
m_curLevel->setDestination(dest);
- dest->m_parser = this;
+ dest->m_composer = this;
dest->initialize();
}
-DestinationPtr RtfParser::replaceDestination(DestinationPtr dest)
+DestinationPtr XmlComposer::replaceDestination(DestinationPtr dest)
{
ASSERT(m_curLevel);
DestinationPtr old = m_curLevel->getDestination();
m_curLevel->setDestination(dest, true);
- dest->m_parser = this;
+ dest->m_composer = this;
dest->initialize();
return old;
}
-void RtfParser::setAnalyser(AnalyserPtr analy)
+void XmlComposer::setAnalyser(AnalyserPtr analy)
{
ASSERT(m_curLevel);
ASSERT(analy != NULL);
- analy->m_parser = this;
+ analy->m_composer = this;
m_curLevel->setAnalyser(analy);
analy->initialize();
}
-AnalyserPtr RtfParser::getAnalyser()
+AnalyserPtr XmlComposer::getAnalyser()
{
ASSERT(m_curLevel);
return m_curLevel->getAnalyser();
}
-DestinationPtr RtfParser::getDestination()
+DestinationPtr XmlComposer::getDestination()
{
ASSERT(m_curLevel);
return m_curLevel->getDestination();
}
-RtfFormatting& RtfParser::getTextFormatting()
+RtfFormatting& XmlComposer::getTextFormatting()
{
ASSERT(m_curLevel);
return m_curLevel->getFormatting();
}
-int RtfParser::getAutoCount(int type)
+int XmlComposer::getAutoCount(int type)
{
ASSERT(type < AUTOCOUNT_MAX);
return m_autocount[type];
}
-void RtfParser::incrementAutoCount(int type)
+void XmlComposer::incrementAutoCount(int type)
{
ASSERT(type < AUTOCOUNT_MAX);
m_autocount[type]++;
}
-// ---------------------------------------------------------------------------------
-// Pass this stuff on through to the appropriate analysers etc...
-
-void RtfParser::charData(wstring data)
+wstring XmlComposer::formatInt(int num)
{
- ASSERT(m_curLevel != NULL);
- DestinationPtr destination = m_curLevel->getDestination();
- if(destination)
- {
- destination->charData(data);
- }
- else
- {
- destination = DestinationPtr(new Content);
- setDestination(destination);
- }
-
-}
+ char buff[16];
-void RtfParser::controlWord(const string& cw, int flags, int param)
-{
- ASSERT(m_curLevel != NULL);
- AnalyserPtr analyser = m_curLevel->getAnalyser();
- if(analyser)
- analyser->controlWord(cw, flags, param);
-}
+ // Certain OSs don't support swprintf :(
+ sprintf(buff, "%d", num);
-void RtfParser::groupStart()
-{
- LevelHandler::groupStart();
+ wstring n;
+ for(char* s = buff; *s; s++)
+ n.append(1, *s);
- ASSERT(m_curLevel != NULL);
- AnalyserPtr analyser = m_curLevel->getAnalyser();
- if(analyser)
- analyser->groupStart();
+ return n;
}
-void RtfParser::groupEnd()
-{
- ASSERT(m_curLevel != NULL);
- bool done = true;
- LevelHandler::groupEnd();
-
- AnalyserPtr analyser = m_curLevel->getAnalyser();
- if(analyser)
- analyser->groupEnd();
-}
+/* ----------------------------------------------------------------------------------
+ * CONVENIENCE MACROS USED BELOW
+ */
-#define ON_INITIALIZE(cls) \
- void RtfParser::cls::initialize()
-#define ON_CONTROLWORD(cls) \
- void RtfParser::cls::controlWord(const string& cw, int flags, int param)
-#define ON_CHARDATA(cls) \
- void RtfParser::cls::charData(wstring data)
-#define ON_GROUPSTART(cls) \
- void RtfParser::cls::groupStart()
-#define ON_GROUPEND(cls) \
- void RtfParser::cls::groupEnd()
-#define ON_DONE(cls) \
- void RtfParser::cls::done()
#define AN_ELEMENT(name) \
- m_parser->pushElement(m_parser->createElement(name))
+ m_composer->pushElement(m_composer->createElement(name))
#define AN_POP_ELEMENT() \
- m_parser->popElement()
+ m_composer->popElement()
#define AN_ATTRIBUTE(name, value) \
- m_parser->setAttribute(name, value)
+ m_composer->setAttribute(name, value)
#define AN_DESTINATION_ATTR(name) \
- m_parser->setDestination(new Attribute(name))
+ m_composer->setDestination(new Attribute(name))
#define AN_DESTINATION(cls) \
- m_parser->setDestination(new cls)
+ m_composer->setDestination(new cls)
#define AN_ANALYSER(cls) \
- m_parser->setAnalyser(AnalyserPtr(new cls))
+ m_composer->setAnalyser(AnalyserPtr(new cls))
#define AN_SET_ANALYSER(cls) \
- m_parser->setAnalyser(AnalyserPtr(cls))
+ m_composer->setAnalyser(AnalyserPtr(cls))
#define HAS_PARAM (flags & kHasParam)
#define DEFAULT_CONTROLWORD processDefault(cw, flags, param)
-#define DUMMY 1 == 1
-#define NUM_ATTR(n) m_parser->formatInt(n)
+#define DUMMY 1 == 1
+
+
+/* ----------------------------------------------------------------------------------
+ * BASE ANALYSER
+ */
-bool RtfParser::ParseAnalyser::processDefault(const string& cw, int flags, int param)
+bool XmlComposer::BaseAnalyser::processDefault(const string& cw, int flags, int param)
{
+ // Unicode blocks go to a special analyser
if(cw == "upr")
{
- AnalyserPtr analy = m_parser->getAnalyser();
+ AnalyserPtr analy = m_composer->getAnalyser();
ASSERT(analy != NULL);
AN_SET_ANALYSER(new Upr(analy));
return true;
@@ -334,41 +351,41 @@ bool RtfParser::ParseAnalyser::processDefault(const string& cw, int flags, int p
return false;
}
-void RtfParser::ParseAnalyser::applyParaFormatting(RtfFormatting* format,
- DOM::Element& el)
+void XmlComposer::BaseAnalyser::applyParaFormatting(RtfFormatting* format,
+ DOM::Element& el)
{
if(format == NULL)
- format = &(m_parser->getTextFormatting());
+ format = &(m_composer->getTextFormatting());
wstring fix = kValPara;
+ // Is it a list?
int list = format->paraList();
if(list != -1)
- {
- el.setAttribute(kAtList, NUM_ATTR(list));
- }
+ el.setAttribute(kAtList, list);
else
- {
el.removeAttribute(kAtList);
- }
+ // Is it a cell?
if(format->paraInTable())
el.setAttribute(kAtCell, L"1");
else
el.removeAttribute(kAtCell);
+ // Paragraph styles
int style = format->paraStyle();
if(style != -1)
- el.setAttribute(kElStyle, NUM_ATTR(style));
+ el.setAttribute(kElStyle, style);
else
el.removeAttribute(kElStyle);
+ // These fix elements are later picked up in XmlFixups::fixBlocks
el.setAttribute(kAtFix, fix);
}
-DOM::Element RtfParser::ParseAnalyser::getCurrentBlock()
+DOM::Element XmlComposer::BaseAnalyser::getCurrentBlock()
{
- DOM::Node node = m_parser->getElement();
+ DOM::Node node = m_composer->getElement();
if(node.hasChildNodes())
node = node.getLastChild();
@@ -377,97 +394,115 @@ DOM::Element RtfParser::ParseAnalyser::getCurrentBlock()
}
-bool RtfParser::ParseAnalyser::processTextContent(const string& cw, int flags, int param)
+bool XmlComposer::BaseAnalyser::processTextContent(const string& cw, int flags, int param)
{
DOM::Element el;
bool process = false;
- RtfFormatting& format = m_parser->getTextFormatting();
+ RtfFormatting& format = m_composer->getTextFormatting();
+ // New paragraph
if(cw == "par")
{
el = getCurrentBlock();
if(el != NULL)
applyParaFormatting(&format, el);
- el = m_parser->createElement(kElBlock);
+ el = m_composer->createElement(kElBlock);
applyParaFormatting(&format, el);
}
+ // Cells (used later in applyParaFormatting)
else if(cw == "intbl")
format.paraSetTable(true);
+ // Start of a cell
else if(cw == "cell")
{
el = getCurrentBlock();
if(el != NULL)
applyParaFormatting(&format, el);
- el = m_parser->createElement(kElCell);
- m_parser->pushElement(el);
- m_parser->popElement();
- el = m_parser->createElement(kElBlock);
+ el = m_composer->createElement(kElCell);
+ m_composer->pushElement(el);
+ m_composer->popElement();
+ el = m_composer->createElement(kElBlock);
applyParaFormatting(&format, el);
}
+ // Start of a row
else if(cw == "trowd")
- el = m_parser->createElement(kElRow);
+ el = m_composer->createElement(kElRow);
+ // A tab
else if(cw == "tab")
- el = m_parser->createElement(kElTab);
+ el = m_composer->createElement(kElTab);
+ // A section break
else if(cw == "sect")
- el = m_parser->createElement(kElSect);
+ el = m_composer->createElement(kElSect);
+ // A page break
else if(cw == "page")
- el = m_parser->createElement(kElPage);
+ el = m_composer->createElement(kElPage);
+ // A paragraph style
else if(cw == "s" && HAS_PARAM)
format.paraSetStyle(param);
+ // A line break
else if(cw == "line")
- el = m_parser->createElement(kElLine);
+ el = m_composer->createElement(kElLine);
+ // A page header (not implemented)
else if(cw == "header")
AN_ANALYSER(Skip);
+
+ // A page footer (not implemented)
else if(cw == "footer")
AN_ANALYSER(Skip);
+
+ // A bookmark (not implemented)
else if(cw == "bkmkstart")
AN_ANALYSER(Skip);
+
+ // List text (not implemented)
else if(cw == "listtext")
AN_ANALYSER(Skip);
+ // Set list style (used in applyFormatting)
else if(cw == "ls" && HAS_PARAM)
format.paraSetList(param);
if(el != NULL)
{
// This ensures that our content destination is open and ready
- DestinationPtr dest = m_parser->getDestination();
+ DestinationPtr dest = m_composer->getDestination();
ASSERT(dest != NULL);
dest->charData(kValNull);
- m_parser->pushElement(el);
- m_parser->popElement();
+ m_composer->pushElement(el);
+ m_composer->popElement();
}
return (el != NULL) || process;
-
- /* TODO: cell, row, intbl, cellx, trowd*/
}
-bool RtfParser::ParseAnalyser::processTextFormatting(const string& cw, int flags,
+bool XmlComposer::BaseAnalyser::processTextFormatting(const string& cw, int flags,
int param, RtfFormatting& format)
{
bool on = true;
if(flags & HAS_PARAM && param == 0)
on = false;
+ // Clears all paragraph formatting
if(cw == "pard")
{
format.resetPara();
-// applyParaFormatting();
+ // applyParaFormatting();
}
+
+ // Rest are pretty much self-explanatory
else if(cw == "plain")
format.resetText();
else if(cw == "b")
@@ -490,21 +525,22 @@ bool RtfParser::ParseAnalyser::processTextFormatting(const string& cw, int flags
return true;
}
-bool RtfParser::ParseAnalyser::processTextFormatting(const string& cw, int flags, int param)
+bool XmlComposer::BaseAnalyser::processTextFormatting(const string& cw, int flags, int param)
{
- return processTextFormatting(cw, flags, param, m_parser->getTextFormatting());
+ return processTextFormatting(cw, flags, param, m_composer->getTextFormatting());
}
-bool RtfParser::ParseAnalyser::processTextAutoContent(const string& cw, int flags, int param)
+bool XmlComposer::BaseAnalyser::processTextAutoContent(const string& cw, int flags, int param)
{
- DestinationPtr dest = m_parser->getDestination();
+ DestinationPtr dest = m_composer->getDestination();
ASSERT(dest != NULL);
dest->charData(kValNull);
// Auto generated content
if(cw == "chftn")
{
- int ac = m_parser->getAutoCount(AUTOCOUNT_FOOTNOTE);
+ // Footnote auto numbering
+ int ac = m_composer->getAutoCount(AUTOCOUNT_FOOTNOTE);
AN_ELEMENT(kElRef);
AN_ATTRIBUTE(kAtType, kValFootNote);
@@ -517,38 +553,72 @@ bool RtfParser::ParseAnalyser::processTextAutoContent(const string& cw, int flag
return false;
}
+/* ----------------------------------------------------------------------------------
+ * ANALYSER/DESTINATION DEFINITIONS
+ */
+
+#define ON_INITIALIZE(cls) \
+ void XmlComposer::cls::initialize()
+#define ON_CONTROLWORD(cls) \
+ void XmlComposer::cls::controlWord(const string& cw, int flags, int param)
+#define ON_CHARDATA(cls) \
+ void XmlComposer::cls::charData(wstring data)
+#define ON_GROUPSTART(cls) \
+ void XmlComposer::cls::groupStart()
+#define ON_GROUPEND(cls) \
+ void XmlComposer::cls::groupEnd()
+#define ON_DONE(cls) \
+ void XmlComposer::cls::done()
+
+
+// Skip Analyser --------------------------------------------------------------------
+
ON_INITIALIZE(Skip)
{ AN_DESTINATION(Null); }
+
ON_GROUPSTART(Skip)
{ AN_ANALYSER(Skip); }
-RtfParser::Upr::Upr(AnalyserPtr prv)
+// Upr Analyser ---------------------------------------------------------------------
+
+XmlComposer::Upr::Upr(AnalyserPtr prv)
{
ASSERT(prv);
prev = prv;
}
+
ON_GROUPSTART(Upr)
- { AN_ANALYSER(Skip); }
+{
+ AN_ANALYSER(Skip);
+}
+
ON_GROUPEND(Upr)
{
ASSERT(prev);
- m_parser->setAnalyser(prev);
+ m_composer->setAnalyser(prev);
prev = NULL;
}
+// Stylesheet Analyser --------------------------------------------------------------
+
ON_INITIALIZE(Stylesheet)
{
AN_ELEMENT(kElStylesheet);
}
+
ON_GROUPSTART(Stylesheet)
{
+ // Each group should be a style
AN_ANALYSER(Style);
+
+ // Without any character data
AN_DESTINATION(Null);
}
+// Stylesheet Style Analyser --------------------------------------------------------
ON_INITIALIZE(Style)
{
@@ -556,6 +626,7 @@ ON_INITIALIZE(Style)
// so we can't always create
haveStyle = false;
}
+
ON_CONTROLWORD(Style)
{
// Get the style id
@@ -565,6 +636,7 @@ ON_CONTROLWORD(Style)
return;
}
+ // Create the style tag if necessary
if(!haveStyle)
{
AN_ELEMENT(kElStyle);
@@ -572,9 +644,10 @@ ON_CONTROLWORD(Style)
haveStyle = true;
}
+ // The style id
if(cw == "s" && flags & kHasParam)
{
- AN_ATTRIBUTE(kAtId, NUM_ATTR(param));
+ AN_ATTRIBUTE(kAtId, param);
}
// Otherwise get as much formatting out of the tag as possible
@@ -584,13 +657,17 @@ ON_CONTROLWORD(Style)
else
DEFAULT_CONTROLWORD;
}
+
ON_GROUPSTART(Style)
{
AN_ANALYSER(Skip);
}
+
ON_GROUPEND(Style)
{
- RtfFormatting& props = m_parser->getTextFormatting();
+ RtfFormatting& props = m_composer->getTextFormatting();
+
+ // Dig out all the formatting attributes
if(props.textIsBold())
AN_ATTRIBUTE(kAtBold, L"1");
if(props.textIsHidden())
@@ -601,42 +678,54 @@ ON_GROUPEND(Style)
AN_ATTRIBUTE(kAtStrike, L"1");
if(props.textIsUnderline())
AN_ATTRIBUTE(kAtUnderline, L"1");
- if(props.textColor() != -1 && m_parser->getOptions().doColors)
- AN_ATTRIBUTE(kAtColor, NUM_ATTR(props.textColor()));
+ if(props.textColor() != -1 && m_composer->getOptions().doColors)
+ AN_ATTRIBUTE(kAtColor, props.textColor());
}
+// List Table Analyser --------------------------------------------------------------
ON_INITIALIZE(ListTable)
{
AN_ELEMENT(kElListtable);
}
+
ON_GROUPSTART(ListTable)
{
+ // Everything in here should be a list
AN_ANALYSER(List);
+
+ // Content doesn't matter
AN_DESTINATION(Null);
}
+// List (in List Table) Analyser ----------------------------------------------------
ON_INITIALIZE(List)
{
- AN_ELEMENT(kElListdef);
+ // Create a default element
+ AN_ELEMENT(kElListdef);
AN_ATTRIBUTE(kAtType, kValDisc);
AN_ATTRIBUTE(kAtOrdered, L"0");
levelsSeen = 0;
}
+
ON_CONTROLWORD(List)
{
+ // The name
if(cw == "listname")
AN_DESTINATION_ATTR(kAtName);
+
+ // The list id
else if(cw == "listid" && HAS_PARAM)
- AN_ATTRIBUTE(kAtId, NUM_ATTR(param));
+ AN_ATTRIBUTE(kAtId, param);
// We let listlevel in here too
else if(cw == "levelstartat" && HAS_PARAM)
- AN_ATTRIBUTE(kAtStart, NUM_ATTR(param));
+ AN_ATTRIBUTE(kAtStart, param);
+ // The list type
else if(cw == "levelnfc" && HAS_PARAM)
{
switch(param)
@@ -679,27 +768,35 @@ ON_CONTROLWORD(List)
else
DEFAULT_CONTROLWORD;
}
+
ON_GROUPSTART(List)
{
+ // Skip internal groups and content
+
if(levelsSeen > 0)
AN_ANALYSER(Skip);
+
levelsSeen++;
}
-
+// The List Override Table ----------------------------------------------------------
ON_INITIALIZE(ListOverrideTable)
{
- DOM::Document document = m_parser->getDocument();
+ // Get all of the current lists
+ DOM::Document document = m_composer->getDocument();
lists = document.getElementsByTagName(kElListdef);
curList = NULL;
lsId = -1;
}
+
ON_GROUPSTART(ListOverrideTable)
{
+ // Content doesn't matter
AN_DESTINATION(Null);
}
+
ON_CONTROLWORD(ListOverrideTable)
{
// New list override clear
@@ -709,10 +806,11 @@ ON_CONTROLWORD(ListOverrideTable)
// List id for current listoverride
else if(cw == "listid" && HAS_PARAM)
{
- wstring id = NUM_ATTR(param);
+ wstring id = XmlComposer::formatInt(param);
if(lists != NULL)
{
+ // Find the list in question
for(int i = 0; i < lists->getLength(); i++)
{
DOM::Node node = lists->item(i);
@@ -737,7 +835,7 @@ ON_CONTROLWORD(ListOverrideTable)
else if(cw == "levelstartat" && HAS_PARAM)
{
if(curList != NULL)
- curList.setAttribute(kAtStart, NUM_ATTR(param));
+ curList.setAttribute(kAtStart, param);
}
else
@@ -755,7 +853,7 @@ ON_CONTROLWORD(ListOverrideTable)
if(curList != NULL)
{
parent.appendChild(curList);
- curList.setAttribute(kAtList, NUM_ATTR(lsId));
+ curList.setAttribute(kAtList, lsId);
}
}
@@ -763,13 +861,9 @@ ON_CONTROLWORD(ListOverrideTable)
}
}
-ON_GROUPEND(ListOverrideTable)
-{
-
-}
-
+// Info Block Analyser --------------------------------------------------------------
ON_INITIALIZE(Info)
{
@@ -777,39 +871,45 @@ ON_INITIALIZE(Info)
AN_ELEMENT(kElInfo);
AN_DESTINATION(Null);
}
+
ON_CONTROLWORD(Info)
{
- // The title
if(cw == "title")
{
AN_ELEMENT(kElTitle);
AN_DESTINATION(Raw);
}
+
else if(cw == "author")
{
AN_ELEMENT(kElAuthor);
AN_DESTINATION(Raw);
}
+
else if(cw == "operator")
{
AN_ELEMENT(kElOperator);
AN_DESTINATION(Raw);
}
+
else if(flags & kAsterisk)
AN_ANALYSER(Skip);
+
else
DEFAULT_CONTROLWORD;
}
-
+// Root Analyser --------------------------------------------------------------------
ON_INITIALIZE(Root)
{
}
+
ON_CONTROLWORD(Root)
{
+ // All the main RTF sections
if(cw == "stylesheet")
AN_ANALYSER(Stylesheet);
else if(cw == "listtable")
@@ -842,23 +942,26 @@ ON_CONTROLWORD(Root)
}
+// Content Destination --------------------------------------------------------------
+
ON_INITIALIZE(Content)
{
- parent = m_parser->getElement();
+ parent = m_composer->getElement();
created = false;
}
+
ON_CHARDATA(Content)
{
// Create the first time we get content
if(!created)
{
- DOM::Element dest = m_parser->createElement(kElDest);
+ DOM::Element dest = m_composer->createElement(kElDest);
parent.appendChild(dest);
- m_parser->replaceElement(dest);
+ m_composer->replaceElement(dest);
- DOM::Element el = m_parser->createElement(kElBlock);
- m_parser->pushElement(el);
- m_parser->popElement();
+ DOM::Element el = m_composer->createElement(kElBlock);
+ m_composer->pushElement(el);
+ m_composer->popElement();
created = true;
}
@@ -867,7 +970,10 @@ ON_CHARDATA(Content)
return;
int elements = 0;
- RtfFormatting& format = m_parser->getTextFormatting();
+ RtfFormatting& format = m_composer->getTextFormatting();
+
+ // Extra elements written out here are consolidated in
+ // XmlFixups::combineDuplicates
// Now do text Properties if necessary
if(format.textIsBold())
@@ -875,37 +981,44 @@ ON_CHARDATA(Content)
AN_ELEMENT(kElB);
elements++;
}
+
if(format.textIsHidden())
{
AN_ELEMENT(kElHide);
elements++;
}
+
if(format.textIsItalic())
{
AN_ELEMENT(kElI);
elements++;
}
+
if(format.textIsStrike())
{
AN_ELEMENT(kElStrike);
elements++;
}
+
if(format.textIsUnderline())
{
AN_ELEMENT(kElU);
elements++;
}
- if(format.textColor() != -1 && m_parser->getOptions().doColors)
+
+ if(format.textColor() != -1 && m_composer->getOptions().doColors)
{
AN_ELEMENT(kElColor);
- AN_ATTRIBUTE(kAtIndex, NUM_ATTR(format.textColor()));
+ AN_ATTRIBUTE(kAtIndex, format.textColor());
elements++;
}
+
if(format.textSuScript() == RtfFormatting::SUPERSCRIPT)
{
AN_ELEMENT(kElSuper);
elements++;
}
+
if(format.textSuScript() == RtfFormatting::SUBSCRIPT)
{
AN_ELEMENT(kElSub);
@@ -913,8 +1026,8 @@ ON_CHARDATA(Content)
}
// Write the data to the element
- m_parser->getElement().appendChild(
- m_parser->getDocument().createTextNode(data));
+ m_composer->getElement().appendChild(
+ m_composer->getDocument().createTextNode(data));
// Now drop out of all the above formatting
while(elements-- > 0)
@@ -922,22 +1035,25 @@ ON_CHARDATA(Content)
}
+// FootNote Analyser ----------------------------------------------------------------
+
ON_INITIALIZE(FootNote)
{
- int ac = m_parser->getAutoCount(AUTOCOUNT_FOOTNOTE);
+ int ac = m_composer->getAutoCount(AUTOCOUNT_FOOTNOTE);
AN_ELEMENT(kElFootNote);
AN_ATTRIBUTE(kAtId, ac);
AN_DESTINATION(Content);
}
+
ON_CONTROLWORD(FootNote)
{
// Inside foot notes there's no link to the foot note
if(cw == "chftn")
{
- DestinationPtr dest = m_parser->getDestination();
+ DestinationPtr dest = m_composer->getDestination();
ASSERT(dest != NULL);
- int ac = m_parser->getAutoCount(AUTOCOUNT_FOOTNOTE);
+ int ac = m_composer->getAutoCount(AUTOCOUNT_FOOTNOTE);
dest->charData(formatInt(ac));
return;
}
@@ -952,27 +1068,28 @@ ON_CONTROLWORD(FootNote)
else
DEFAULT_CONTROLWORD;
}
+
ON_DONE(FootNote)
{
- m_parser->incrementAutoCount(AUTOCOUNT_FOOTNOTE);
+ m_composer->incrementAutoCount(AUTOCOUNT_FOOTNOTE);
}
-
+// Raw Destination ------------------------------------------------------------------
ON_CHARDATA(Raw)
{
// Write the data to the element
- m_parser->getElement().appendChild(
- m_parser->getDocument().createTextNode(data));
+ m_composer->getElement().appendChild(
+ m_composer->getDocument().createTextNode(data));
}
-
+// Attribute Destination ------------------------------------------------------------
ON_INITIALIZE(Attribute)
{
- element = m_parser->getElement();
+ element = m_composer->getElement();
ASSERT(element != NULL);
}
@@ -991,17 +1108,3 @@ ON_CHARDATA(Attribute)
element.setAttribute(name, cur);
}
-wstring RtfParser::formatInt(int num)
-{
- char buff[16];
-
- // Certain OSs don't support swprintf :(
- sprintf(buff, "%d", num);
-
- wstring n;
- for(char* s = buff; *s; s++)
- n.append(1, *s);
-
- return n;
-}
-
diff --git a/src/xmlcomposer.h b/src/xmlcomposer.h
index deba4ba..4e5c739 100644
--- a/src/xmlcomposer.h
+++ b/src/xmlcomposer.h
@@ -36,68 +36,102 @@
*
*/
+// RENAME: xmlcomposer.h
+
#ifndef __RTFPARSER_H__
#define __RTFPARSER_H__
#include "levelhandler.h"
-struct RtfParserOptions
+struct XmlComposerOptions
{
- RtfParserOptions()
+ XmlComposerOptions()
{ memset(this, 0, sizeof(*this)); }
bool doColors;
};
-class RtfParser : public LevelHandler
+/*
+ * XmlComposer
+ *
+ * This is where the RTF gets initially converted to XML. RtfParser sends
+ * notifications to this class's RtfHandler interface. It forwards them to
+ * the current analysers and destinations which produce XML content.
+ * (see xmlcomposehelpers.h)
+ *
+ * Not all conversion is completed here. Because RTF is so very wierd we
+ * have to run lots of fixups are run in endDocument (see rtffixups.h)
+ */
+class XmlComposer :
+ public LevelHandler
{
public:
- RtfParser(const RtfParserOptions& options);
- virtual ~RtfParser();
+ XmlComposer(const XmlComposerOptions& options);
+ virtual ~XmlComposer();
- virtual void startDocument(RtfReader* reader);
+ // Handler Overrides
+ virtual void startDocument(RtfParser* reader);
virtual void endDocument();
virtual void controlWord(const string& cw, int flags, int param);
virtual void groupStart();
virtual void groupEnd();
virtual void charData(wstring data);
- // Element management functions
+ // Create an XML element with given name
DOM::Element createElement(const string& name);
+
+ // Push an XML element on the current level
void pushElement(const DOM::Element& element);
+
+ // Replace current XML element with given element
void replaceElement(const DOM::Element& element);
+
+ // Move up one XML element level without changing RTF level
DOM::Element popElement();
+
+ // Set attributes on the current XML Element
void setAttribute(const string& name, const wstring& value, DOM::Element el = DOM::Element());
void setAttribute(const string& name, int value, DOM::Element el = DOM::Element());
- // Changing the current parser functions
+ // The current analyser in use
+ AnalyserPtr getAnalyser();
void setAnalyser(AnalyserPtr analy);
+
+ // The current destination in use
+ DestinationPtr getDestination();
void setDestination(DestinationPtr dest);
+
+ // Replace the current destination (sets level deep)
DestinationPtr replaceDestination(DestinationPtr dest);
- // The types of auto counters
+
+ // The types of auto numbering
enum
{
AUTOCOUNT_FOOTNOTE,
AUTOCOUNT_MAX
};
- // Functions for auto numbering
+ // Functions for RTF auto numbering
int getAutoCount(int type);
void incrementAutoCount(int type);
- // Current status functions
+
+ // Get the current formatting options
RtfFormatting& getTextFormatting();
- AnalyserPtr getAnalyser();
- DestinationPtr getDestination();
+
DOM::Document getDocument()
{ return m_document; }
const RtfParserOptions& getOptions()
{ return m_options; }
+
+ // TODO: Should this be somewhere else?
static wstring formatInt(int num);
+
+// LevelHandler override
protected:
virtual void clear();
@@ -110,12 +144,12 @@ protected:
int m_autocount[AUTOCOUNT_MAX]; // Auto counters for the document
-
// Sub classes
protected:
+
#define DESTINATION(cls) class cls : public Destination { public:
#define END_DESTINATION };
- #define ANALYSER(cls) class cls : public ParseAnalyser { public:
+ #define ANALYSER(cls) class cls : public BaseAnalyser { public:
#define END_ANALYSER };
#define DATA_PORTION protected:
#define INITIALIZE virtual void initialize();
@@ -125,6 +159,7 @@ protected:
#define GROUPEND virtual void groupEnd();
#define DONE virtual void done();
+ // Main destination for document character content
DESTINATION(Content)
INITIALIZE
CHARDATA
@@ -133,14 +168,16 @@ protected:
DOM::Element parent;
END_DESTINATION
-
+ // Discards character data
DESTINATION(Null)
END_DESTINATION
+ // Copies raw character data to output
DESTINATION(Raw)
CHARDATA
END_DESTINATION
+ // Copies character data to an XML attribute
DESTINATION(Attribute)
Attribute(const string& nm) : name(nm) {}
INITIALIZE
@@ -150,7 +187,9 @@ protected:
DOM::Element element;
END_DESTINATION
- class ParseAnalyser :
+
+ // Base class for analysers with some helper functions
+ class BaseAnalyser :
public Analyser
{
public:
@@ -158,23 +197,34 @@ protected:
{ processDefault(cw, flags, param); }
protected:
- // Some helper functions
+ // Process a standard set of tags that can be found anywhere
bool processDefault(const string& cw, int flags, int param);
+
+ // Process text formatting tags
bool processTextFormatting(const string& cw, int flags, int param, RtfFormatting& format);
+ bool processTextFormatting(const string& cw, int flags, int param);
+
+ // Creates 'fix' tags for paragraph formatting in element
+ void applyParaFormatting(RtfFormatting* format, DOM::Element& el);
+
+ // Process tags that are either text content, or change context
bool processTextContent(const string& cw, int flags, int param);
- bool processTextFormatting(const string& cw, int flags, int param);
+
+ // Process tags that generate text content (like auto-numbering, fields)
bool processTextAutoContent(const string& cw, int flags, int param);
+ // Convenience function
DOM::Element getCurrentBlock();
- void applyParaFormatting(RtfFormatting* format, DOM::Element& el);
};
+ // Skip tags and groups
ANALYSER(Skip)
INITIALIZE
GROUPSTART
END_ANALYSER
+ // Unicode block analyser
ANALYSER(Upr)
Upr(AnalyserPtr prv);
GROUPSTART
@@ -183,11 +233,13 @@ protected:
AnalyserPtr prev;
END_ANALYSER
+ // Handle Stylesheets
ANALYSER(Stylesheet)
INITIALIZE
GROUPSTART
END_ANALYSER
+ // Handle a style in a stylesheet
ANALYSER(Style)
INITIALIZE
CONTROLWORD
@@ -197,11 +249,13 @@ protected:
bool haveStyle;
END_ANALYSER
+ // Handle the list definitions
ANALYSER(ListTable)
INITIALIZE
GROUPSTART
END_ANALYSER
+ // Handle a list in the list definitions
ANALYSER(List)
INITIALIZE
CONTROLWORD
@@ -210,27 +264,30 @@ protected:
int levelsSeen;
END_ANALYSER
+ // Handle list overrides
ANALYSER(ListOverrideTable)
INITIALIZE
CONTROLWORD
GROUPSTART
- GROUPEND
DATA_PORTION
DOM::NodeList lists;
int lsId;
DOM::Element curList;
END_ANALYSER
+ // Creates the info block
ANALYSER(Info)
INITIALIZE
CONTROLWORD
END_ANALYSER
+ // The main root analyser
ANALYSER(Root)
INITIALIZE
CONTROLWORD
END_ANALYSER
+ // Handles footnotes
ANALYSER(FootNote)
INITIALIZE
CONTROLWORD
diff --git a/src/xmlfixups.cpp b/src/xmlfixups.cpp
index c3fd8b6..6efe6f4 100644
--- a/src/xmlfixups.cpp
+++ b/src/xmlfixups.cpp
@@ -59,636 +59,585 @@ static const char* kConsolidateEnd[] =
static const char* kConsolidateStart[] =
{ kElStylesheet, kElInfo, NULL };
+
void RtfFixups::breakBreak(DOM::Document& doc, const string& contain,
const string& tag)
{
- DOM::NodeList els = doc.getElementsByTagName(tag);
- if(els != NULL)
+ DOM::NodeList els = doc.getElementsByTagName(tag);
+ if(els != NULL)
{
- for(int i = 0; i < els->getLength(); i++)
- {
- DOM::Element el = (const DOM::Element&)els->item(i);
-#if 0
- // See if parent node only has this break tag
- // in it. If so then replace parent with this
-
- DOM::Node parent = el.getParentNode();
-
- if(parent != NULL)
- {
- DOM::Node grandparent = parent.getParentNode();
-
- if(grandparent != NULL &&
- el.getPreviousSibling() == NULL &&
- el.getNextSibling() == NULL)
- {
- grandparent.replaceChild(parent.removeChild(el), parent);
- }
- }
-#endif
-
- breakElement(el, contain);
- }
- }
+ for(int i = 0; i < els->getLength(); i++)
+ {
+ DOM::Element el = (const DOM::Element&)els->item(i);
+ breakElement(el, contain);
+ }
+ }
}
-/**
- * Breaks a paragraph up through a previous level. Calls itself
- * recursively to break paragraphs totally free up to containing
- * destination.
- *
- * For example:
- *
- * <dest>
- * This is <b> a <block fix="para"/>
- * test of </b> your concentration.
- * </dest>
- *
- * Becomes:
- *
- * <dest>
- * This is <b> a </b><block fix="para"/>
- * <b>test of </b> your concentration.
- * </dest>
- */
bool RtfFixups::breakElement(const DOM::Element& el, const string& contain)
{
- ASSERT(el != NULL);
+ ASSERT(el != NULL);
- DOM::Element parent = (const DOM::Element&)el.getParentNode();
- DOM::Element grandparent;
+ DOM::Element parent = (const DOM::Element&)el.getParentNode();
+ DOM::Element grandparent;
- string s = el.getNodeName();
- s = parent.getNodeName();
+ string s = el.getNodeName();
+ s = parent.getNodeName();
- // Get the parent node
- if(parent != NULL)
- grandparent = (const DOM::Element&)parent.getParentNode();
+ // Get the parent node
+ if(parent != NULL)
+ grandparent = (const DOM::Element&)parent.getParentNode();
// Make sure we have something to work with before continuing
- if(grandparent == NULL || parent == NULL ||
- DOMHelpers::isElement(parent, contain))
+ if(grandparent == NULL || parent == NULL ||
+ DOMHelpers::isElement(parent, contain))
return true;
- DOM::Node e;
+ DOM::Node e;
- // Check to see if this is the first node in the parent.
- // If so then just move out to before
- if(el.getPreviousSibling() == NULL)
+ // Check to see if this is the first node in the parent.
+ // If so then just move out to before
+ if(el.getPreviousSibling() == NULL)
{
- e = grandparent.insertBefore(parent.removeChild(el), parent);
- }
+ e = grandparent.insertBefore(parent.removeChild(el), parent);
+ }
// Check to see if this is the last node in the parent.
// If so then just move out to after the parent
else if(el.getNextSibling() == NULL)
- {
- DOM::Node next = parent.getNextSibling();
- if(next == NULL)
- e = grandparent.appendChild(parent.removeChild(el));
- else
- e = grandparent.insertBefore(parent.removeChild(el), next);
+ {
+ DOM::Node next = parent.getNextSibling();
+ if(next == NULL)
+ e = grandparent.appendChild(parent.removeChild(el));
+ else
+ e = grandparent.insertBefore(parent.removeChild(el), next);
}
// Otherwise it's in the middle so split the parent
// element etc...
- else
- {
- // Clone it but not deep
- DOM::Element parent2 = (const DOM::Element&)parent.cloneNode(false);
+ else
+ {
+ // Clone it but not deep
+ DOM::Element parent2 = (const DOM::Element&)parent.cloneNode(false);
- if(parent2 == NULL)
+ if(parent2 == NULL)
return false;
- // Flag that tells us whether we moved anything up to parent
- bool moved = false;
+ // Flag that tells us whether we moved anything up to parent
+ bool moved = false;
- // Now move all nodes after this one to the second parent.
- while((e = el.getNextSibling()) != NULL)
- {
- parent2.appendChild(parent.removeChild(e));
+ // Now move all nodes after this one to the second parent.
+ while((e = el.getNextSibling()) != NULL)
+ {
+ parent2.appendChild(parent.removeChild(e));
moved = true;
- }
+ }
- // Remove the element from it's parent
- e = parent.removeChild(el);
+ // Remove the element from it's parent
+ e = parent.removeChild(el);
- // Okay now we move the paragraph up to the parent
- DOMHelpers::insertAfter(grandparent, e, parent);
- if(moved)
- DOMHelpers::insertAfter(grandparent, parent2, e);
- }
+ // Okay now we move the paragraph up to the parent
+ DOMHelpers::insertAfter(grandparent, e, parent);
+ if(moved)
+ DOMHelpers::insertAfter(grandparent, parent2, e);
+ }
- // Now call it again with the paragraph in the new position
- // untill everything's cut through!
+ // Now call it again with the paragraph in the new position
+ // until everything's cut through!
return breakElement((DOM::Element&)e, contain);
}
-/**
- * Changes from a marker based paragraph system to a contained
- * paragraph system. Also applies paragraph attributes to the
- * appropriate paragraph.
- *
- * For example:
- *
- * <dest>
- * This <blockattr style="10"> is <b> a <block fix="para"/>
- * test of </b> your concentration.
- * </dest>
- *
- * Becomes:
- *
- * <para style="10"> This is <b> a </b></para>
- * <para><b>test of </b> your concentration.</para>
- */
void RtfFixups::breakBlocks(DOM::Document& document)
{
// First break out all the paragraphs to the destination level
DOM::NodeList blocks = document.getElementsByTagName(kElBlock);
if(blocks != NULL)
{
- for(int i = 0; i < blocks->getLength(); i++)
- {
- DOM::Element block = (const DOM::Element&)blocks->item(i);
+ for(int i = 0; i < blocks->getLength(); i++)
+ {
+ DOM::Element block = (const DOM::Element&)blocks->item(i);
// If it's the single closed style para then break it
- if(block != NULL && !block.hasChildNodes())
+ if(block != NULL && !block.hasChildNodes())
breakElement(block, kElDest);
- }
+ }
}
// Now group stuff in destinations into paras or other blocks
- DOM::NodeList destinations = document.getElementsByTagName(kElDest);
- if(destinations != NULL)
- {
- for(int i = 0; i < destinations->getLength(); i++)
- {
- DOM::Element dest = (const DOM::Element&)destinations->item(i);
-
- // Sanity Check
+ DOM::NodeList destinations = document.getElementsByTagName(kElDest);
+ if(destinations != NULL)
+ {
+ for(int i = 0; i < destinations->getLength(); i++)
+ {
+ DOM::Element dest = (const DOM::Element&)destinations->item(i);
+
+ // Sanity Check
if(dest == NULL || !dest.hasChildNodes())
- continue;
+ continue;
- // Go through the children of this destination
- DOM::Node child = dest.getFirstChild();
+ // Go through the children of this destination
+ DOM::Node child = dest.getFirstChild();
- DOM::Element block;
+ DOM::Element block;
- while(child != NULL)
- {
- // If it's a block
- if(DOMHelpers::isElement(child, kElBlock))
- {
- block = (DOM::Element&)child;
- child = child.getNextSibling();
+ while(child != NULL)
+ {
+ // If it's a block
+ if(DOMHelpers::isElement(child, kElBlock))
+ {
+ block = (DOM::Element&)child;
+ child = child.getNextSibling();
continue;
- }
-
- // If it's already a real block element
- for(const char** t = kBlockTags; *t != NULL; t++)
- {
- if(DOMHelpers::isElement(child, *t))
- {
- block = NULL;
- break;
- }
- }
-
- // If there's a block then add to it
- if(block != NULL)
- {
- block.appendChild(dest.removeChild(child));
+ }
+
+ // If it's already a real block element
+ for(const char** t = kBlockTags; *t != NULL; t++)
+ {
+ if(DOMHelpers::isElement(child, *t))
+ {
+ block = NULL;
+ break;
+ }
+ }
+
+ // If there's a block then add to it
+ if(block != NULL)
+ {
+ block.appendChild(dest.removeChild(child));
child = block;
- }
+ }
- child = child.getNextSibling();
+ child = child.getNextSibling();
}
- }
- }
+ }
+ }
}
void RtfFixups::wrapTags(DOM::Document& doc, const string& tagName,
- const string& wrapName)
+ const string& wrapName)
{
- DOM::NodeList tags = doc.getElementsByTagName(tagName);
+ DOM::NodeList tags = doc.getElementsByTagName(tagName);
if(tags != NULL)
- {
- for(int i = 0; i < tags->getLength(); i++)
- {
- DOM::Element tag = (const DOM::Element&)tags->item(i);
+ {
+ for(int i = 0; i < tags->getLength(); i++)
+ {
+ DOM::Element tag = (const DOM::Element&)tags->item(i);
DOM::Element wrap = doc.createElement(wrapName);
- while(tag.hasChildNodes())
- wrap.appendChild(tag.removeChild(tag.getFirstChild()));
+ while(tag.hasChildNodes())
+ wrap.appendChild(tag.removeChild(tag.getFirstChild()));
- tag.appendChild(wrap);
- }
- }
+ tag.appendChild(wrap);
+ }
+ }
}
void RtfFixups::breakTags(DOM::Document& doc, const string& parentName,
- const string& tagName)
+ const string& tagName)
{
- DOM::NodeList parents = doc.getElementsByTagName(parentName);
- if(parents != NULL)
+ DOM::NodeList parents = doc.getElementsByTagName(parentName);
+ if(parents != NULL)
{
- for(int i = 0; i < parents->getLength(); i++)
- {
- DOM::Element parent = (const DOM::Element&)parents->item(i);
+ for(int i = 0; i < parents->getLength(); i++)
+ {
+ DOM::Element parent = (const DOM::Element&)parents->item(i);
- if(!parent.hasChildNodes())
+ if(!parent.hasChildNodes())
continue;
- DOM::NodeList tags = parent.getElementsByTagName(tagName);
- if(tags != NULL)
- {
- for(int i = 0; i < tags->getLength(); i++)
- breakElement((const DOM::Element&)tags->item(i), parentName);
- }
+ // First perform the breaks
+ DOM::NodeList tags = parent.getElementsByTagName(tagName);
+ if(tags != NULL)
+ {
+ for(int i = 0; i < tags->getLength(); i++)
+ breakElement((const DOM::Element&)tags->item(i), parentName);
+ }
- DOM::Node tag = doc.createElement(tagName);
+ DOM::Node tag = doc.createElement(tagName);
parent.insertBefore(tag, parent.getFirstChild());
- DOM::Node child = tag;
+ DOM::Node child = tag;
while(child != NULL && (child = child.getNextSibling()) != NULL)
- {
- if(DOMHelpers::isElement(child, kElBlock))
- {
- DOM::Node next = child.getNextSibling();
- if(next == NULL)
- {
- parent.removeChild(child);
- continue;
- }
-
- if(DOMHelpers::isElement(next, tagName))
- {
- DOM::Node twodown = next.getNextSibling();
- if(!DOMHelpers::isElement(twodown, kElBlock))
- {
- child = parent.insertBefore(parent.removeChild(next), child);
- }
- else
- {
- parent.removeChild(child);
- child = next;
- }
- }
- }
-
- if(DOMHelpers::isElement(child, tagName))
- {
- if(!tag.hasChildNodes())
- parent.removeChild(tag);
- tag = child;
- }
- else
- {
- tag.appendChild(parent.removeChild(child));
+ {
+ if(DOMHelpers::isElement(child, kElBlock))
+ {
+ DOM::Node next = child.getNextSibling();
+ if(next == NULL)
+ {
+ parent.removeChild(child);
+ continue;
+ }
+
+ if(DOMHelpers::isElement(next, tagName))
+ {
+ DOM::Node twodown = next.getNextSibling();
+ if(!DOMHelpers::isElement(twodown, kElBlock))
+ {
+ child = parent.insertBefore(parent.removeChild(next), child);
+ }
+ else
+ {
+ parent.removeChild(child);
+ child = next;
+ }
+ }
+ }
+
+ if(DOMHelpers::isElement(child, tagName))
+ {
+ if(!tag.hasChildNodes())
+ parent.removeChild(tag);
+ tag = child;
+ }
+ else
+ {
+ tag.appendChild(parent.removeChild(child));
child = tag;
- }
- }
+ }
+ }
- if(!tag.hasChildNodes())
- parent.removeChild(tag);
- }
- }
+ if(!tag.hasChildNodes())
+ parent.removeChild(tag);
+ }
+ }
- DOM::NodeList tags = doc.getElementsByTagName(tagName);
+ DOM::NodeList tags = doc.getElementsByTagName(tagName);
if(tags != NULL)
- {
- for(int i = 0; i < tags->getLength(); i++)
- {
- DOM::Element tag = (const DOM::Element&)tags->item(i);
- DOM::Node parent = tag.getParentNode();
-
- if(parent != NULL && !DOMHelpers::isElement(parent, parentName))
- parent.removeChild(tag);
+ {
+ for(int i = 0; i < tags->getLength(); i++)
+ {
+ DOM::Element tag = (const DOM::Element&)tags->item(i);
+ DOM::Node parent = tag.getParentNode();
+ if(parent != NULL && !DOMHelpers::isElement(parent, parentName))
+ parent.removeChild(tag);
#if 0
else if(tag.hasChildNodes())
- {
- DOM::NodeList children = tag.getChildNodes();
- if(children != NULL && children->getLength() == 1)
- {
- DOM::Node child = children->item(0);
- if(child != NULL && !child.hasChildNodes() &&
- DOMHelpers::isElement(child, kElBlock))
- parent.removeChild(tag);
- }
- }
+ {
+ DOM::NodeList children = tag.getChildNodes();
+ if(children != NULL && children->getLength() == 1)
+ {
+ DOM::Node child = children->item(0);
+ if(child != NULL && !child.hasChildNodes() &&
+ DOMHelpers::isElement(child, kElBlock))
+ parent.removeChild(tag);
+ }
+ }
#endif
-
- }
- }
+ }
+ }
}
void RtfFixups::breakLists(DOM::Document& doc)
{
- // Now group stuff in destinations into tables
- DOM::NodeList destinations = doc.getElementsByTagName(kElDest);
+ DOM::NodeList destinations = doc.getElementsByTagName(kElDest);
if(destinations != NULL)
- {
- for(int i = 0; i < destinations->getLength(); i++)
- {
- DOM::Element dest = (const DOM::Element&)destinations->item(i);
+ {
+ for(int i = 0; i < destinations->getLength(); i++)
+ {
+ DOM::Element dest = (const DOM::Element&)destinations->item(i);
- // Sanity Check
+ // Sanity Check
if(dest == NULL)
continue;
- // Go through the children of this destination
- DOM::Node child = dest.getFirstChild();
+ // Go through the children of this destination
+ DOM::Node child = dest.getFirstChild();
- DOM::Element list;
- DOM::Element e;
+ DOM::Element list;
+ DOM::Element e;
- wstring previd;
+ wstring previd;
- while(child != NULL)
- {
- // If it's a block and has a cell attribute
+ while(child != NULL)
+ {
+ // If it's a block ...
if(DOMHelpers::isElement(child, kElBlock))
- {
- e = (DOM::Element&)child;
+ {
+ e = (DOM::Element&)child;
- // if it has a cell attribute
- wstring listid = e.getAttribute(kAtList);
- if(listid.length() > 0)
+ // ... and has a list attribute
+ wstring listid = e.getAttribute(kAtList);
+ if(listid.length() > 0)
{
- e.removeAttribute(kAtList);
+ e.removeAttribute(kAtList);
- if(list == NULL || previd != listid)
- {
- list = doc.createElement(kElList);
- list.setAttribute(kAtList, listid);
+ if(list == NULL || previd != listid)
+ {
+ list = doc.createElement(kElList);
+ list.setAttribute(kAtList, listid);
dest.insertBefore(list, child);
previd = listid;
- }
- }
- else
- {
- list = NULL;
- previd.erase();
+ }
}
- }
+ else
+ {
+ list = NULL;
+ previd.erase();
+ }
+ }
- // It's not a block
+ // It's not a block
if(list != NULL)
- {
- list.appendChild(dest.removeChild(child));
- child = list;
- }
+ {
+ list.appendChild(dest.removeChild(child));
+ child = list;
+ }
- child = child.getNextSibling();
+ child = child.getNextSibling();
}
- }
- }
+ }
+ }
}
void RtfFixups::fixStyles(const DOM::Document doc)
{
- DOM::NodeList styles = doc.getElementsByTagName(kElStyle);
- if(styles != NULL)
- {
- DOM::NodeList blocks = doc.getElementsByTagName(kElBlock);
- if(blocks != NULL)
- {
- for(int i = 0; i < blocks->getLength(); i++)
- {
- DOM::Element block = (const DOM::Element&)blocks->item(i);
+ // Get all stylesheet styles
+ DOM::NodeList styles = doc.getElementsByTagName(kElStyle);
+ if(styles != NULL)
+ {
+ // Get list of blocks in the document
+ DOM::NodeList blocks = doc.getElementsByTagName(kElBlock);
+ if(blocks != NULL)
+ {
+ for(int i = 0; i < blocks->getLength(); i++)
+ {
+ DOM::Element block = (const DOM::Element&)blocks->item(i);
if(block == NULL || !block.hasAttribute(kElStyle))
- continue;
-
- for(int j = 0; j < styles->getLength(); j++)
- {
- DOM::Element style = (const DOM::Element&)styles->item(j);
- if(style != NULL)
- {
- if(style.getAttribute(kAtId) == block.getAttribute(kElStyle))
- {
- wstring name = style.getAttribute(kAtName);
+ continue;
+
+ // Lookup block styles
+ for(int j = 0; j < styles->getLength(); j++)
+ {
+ DOM::Element style = (const DOM::Element&)styles->item(j);
+ if(style != NULL)
+ {
+ if(style.getAttribute(kAtId) == block.getAttribute(kElStyle))
+ {
+ // And change to the name
+ wstring name = style.getAttribute(kAtName);
if(name.length() > 0)
- block.setAttribute(kElStyle, name);
+ block.setAttribute(kElStyle, name);
}
- }
- }
- }
- }
-
- for(int i = 0; i < styles->getLength(); i++)
- {
- DOM::Element style = (const DOM::Element&)styles->item(i);
- if(style != NULL)
- style.removeAttribute(kAtId);
- }
- }
-
+ }
+ }
+ }
+ }
+ // A little cleanup of the stylesheet styles
+ for(int i = 0; i < styles->getLength(); i++)
+ {
+ DOM::Element style = (const DOM::Element&)styles->item(i);
+ if(style != NULL)
+ style.removeAttribute(kAtId);
+ }
+ }
}
void RtfFixups::breakTables(DOM::Document& doc)
{
- DOM::NodeList rows = doc.getElementsByTagName(kElRow);
- if(rows != NULL)
- {
- for(int i = 0; i < rows->getLength(); i++)
- {
- DOM::Element row = (const DOM::Element&)rows->item(i);
- DOM::Node parent = row.getParentNode();
-
- if(parent == NULL)
- continue;
-
- if(DOMHelpers::isElement(parent, kElBlock))
+ // Break rows out to destinations
+ DOM::NodeList rows = doc.getElementsByTagName(kElRow);
+ if(rows != NULL)
+ {
+ for(int i = 0; i < rows->getLength(); i++)
+ {
+ DOM::Element row = (const DOM::Element&)rows->item(i);
+ DOM::Node parent = row.getParentNode();
+
+ if(parent == NULL)
+ continue;
+
+ if(DOMHelpers::isElement(parent, kElBlock))
{
- DOM::Node grandparent = parent.getParentNode();
+ DOM::Node grandparent = parent.getParentNode();
- if(grandparent != NULL && !row.hasChildNodes())
+ if(grandparent != NULL && !row.hasChildNodes())
{
- if(row.getPreviousSibling() == NULL)
- grandparent.insertBefore(parent.removeChild(row), parent);
- else if(row.getNextSibling() == NULL)
- DOMHelpers::insertAfter(grandparent, parent.removeChild(row), parent);
- }
- }
-
- breakElement(row, kElDest);
- }
- }
-
+ if(row.getPreviousSibling() == NULL)
+ grandparent.insertBefore(parent.removeChild(row), parent);
+ else if(row.getNextSibling() == NULL)
+ DOMHelpers::insertAfter(grandparent, parent.removeChild(row), parent);
+ }
+ }
+ breakElement(row, kElDest);
+ }
+ }
- // Now group stuff in destinations into tables
- DOM::NodeList destinations = doc.getElementsByTagName(kElDest);
+ // Now group stuff in destinations into tables
+ DOM::NodeList destinations = doc.getElementsByTagName(kElDest);
if(destinations != NULL)
- {
- for(int i = 0; i < destinations->getLength(); i++)
- {
- DOM::Element dest = (const DOM::Element&)destinations->item(i);
+ {
+ for(int i = 0; i < destinations->getLength(); i++)
+ {
+ DOM::Element dest = (const DOM::Element&)destinations->item(i);
- // Sanity Check
+ // Sanity Check
if(dest == NULL)
continue;
- // Go through the children of this destination
- DOM::Node child = dest.getFirstChild();
+ // Go through the children of this destination
+ DOM::Node child = dest.getFirstChild();
- DOM::Element table;
+ DOM::Element table;
DOM::Element e;
- while(child != NULL)
- {
- // If it's a block and has a cell attribute
+ while(child != NULL)
+ {
+ // If it's a block and has a cell attribute
if(DOMHelpers::isElement(child, kElBlock))
- {
- e = (DOM::Element&)child;
-
- // if it has a cell attribute
- if(e.getAttribute(kAtCell).length() > 0)
- {
- e.removeAttribute(kAtCell);
-
- if(table == NULL)
- {
- table = doc.createElement(kElTable);
- dest.insertBefore(table, child);
- }
- }
- else
- {
- table = NULL;
- }
- }
-
- // It's not a block
+ {
+ e = (DOM::Element&)child;
+
+ // if it has a cell attribute
+ if(e.getAttribute(kAtCell).length() > 0)
+ {
+ e.removeAttribute(kAtCell);
+
+ if(table == NULL)
+ {
+ table = doc.createElement(kElTable);
+ dest.insertBefore(table, child);
+ }
+ }
+ else
+ {
+ table = NULL;
+ }
+ }
+
+ // It's not a block
if(table != NULL)
- {
- table.appendChild(dest.removeChild(child));
+ {
+ table.appendChild(dest.removeChild(child));
child = table;
- }
+ }
- child = child.getNextSibling();
+ child = child.getNextSibling();
}
- }
- }
+ }
+ }
}
void RtfFixups::removeTags(const DOM::Document& doc)
{
- // Go through the list of nodes
+ // Go through the list of nodes
for(const char** t = kRemoveTags; *t != NULL; t++)
- {
- DOM::NodeList elements = doc.getElementsByTagName(*t);
- if(elements != NULL)
- {
- for(int j = 0; j < elements->getLength(); j++)
- {
- DOM::Element el = (const DOM::Element&)elements->item(j);
- DOM::Node parent = el->getParentNode();
-
- if(parent == NULL)
- continue;
-
- while(el.hasChildNodes())
- parent.insertBefore(el.removeChild(el.getFirstChild()), el);
-
- parent.removeChild(el);
+ {
+ DOM::NodeList elements = doc.getElementsByTagName(*t);
+ if(elements != NULL)
+ {
+ for(int j = 0; j < elements->getLength(); j++)
+ {
+ DOM::Element el = (const DOM::Element&)elements->item(j);
+ DOM::Node parent = el->getParentNode();
+
+ if(parent == NULL)
+ continue;
+
+ while(el.hasChildNodes())
+ parent.insertBefore(el.removeChild(el.getFirstChild()), el);
+
+ parent.removeChild(el);
}
- }
- }
+ }
+ }
}
void RtfFixups::fixLists(const DOM::Document doc)
{
- DOM::NodeList lists = doc.getElementsByTagName(kElList);
- if(lists != NULL)
- {
- DOM::NodeList listdefs = doc.getElementsByTagName(kElListdef);
- if(listdefs != NULL)
- {
- for(int i = 0; i < listdefs->getLength(); i++)
- {
- DOM::Element listdef = (const DOM::Element&)listdefs->item(i);
+ // Get all the lists
+ DOM::NodeList lists = doc.getElementsByTagName(kElList);
+ if(lists != NULL)
+ {
+ // And all the list definitions
+ DOM::NodeList listdefs = doc.getElementsByTagName(kElListdef);
+ if(listdefs != NULL)
+ {
+ for(int i = 0; i < listdefs->getLength(); i++)
+ {
+ DOM::Element listdef = (const DOM::Element&)listdefs->item(i);
if(listdef == NULL || !listdef.hasAttribute(kAtList))
- continue;
+ continue;
- for(int j = 0; j < lists->getLength(); j++)
+ for(int j = 0; j < lists->getLength(); j++)
{
- DOM::Element list = (const DOM::Element&)lists->item(j);
- if(list != NULL)
- {
- if(list.getAttribute(kAtList) == listdef.getAttribute(kAtList))
+ DOM::Element list = (const DOM::Element&)lists->item(j);
+ if(list != NULL)
+ {
+ if(list.getAttribute(kAtList) == listdef.getAttribute(kAtList))
{
- DOMHelpers::copyAttributes(listdef, list, kHideList);
- list.removeAttribute(kAtList);
- }
- }
- }
- }
- }
- }
+ // And copy all the attributes from the list definition to the list
+ DOMHelpers::copyAttributes(listdef, list, kHideList);
+ list.removeAttribute(kAtList);
+ }
+ }
+ }
+ }
+ }
+ }
}
void RtfFixups::fixBlocks(const DOM::Document doc)
{
- // First break out all the paragraphs to the destination level
- DOM::NodeList blocks = doc.getElementsByTagName(kElBlock);
- if(blocks != NULL)
- {
- string fix;
+ // Get all the blocks
+ DOM::NodeList blocks = doc.getElementsByTagName(kElBlock);
+ if(blocks != NULL)
+ {
+ string fix;
wstring val;
- for(int i = 0; i < blocks->getLength(); i++)
- {
- DOM::Element block = (const DOM::Element&)blocks->item(i);
- DOM::Node parent = block.getParentNode();
-
- if(parent == NULL)
- continue;
+ for(int i = 0; i < blocks->getLength(); i++)
+ {
+ DOM::Element block = (const DOM::Element&)blocks->item(i);
+ DOM::Node parent = block.getParentNode();
- fix.resize(0);
- val.resize(0);
+ if(parent == NULL)
+ continue;
- val = block.getAttribute(kAtFix);
- if(val.length() > 0)
- block.removeAttribute(kAtFix);
+ fix.resize(0);
+ val.resize(0);
+ // Figure out what kind of element they want block fixed to
+ val = block.getAttribute(kAtFix);
+ if(val.length() > 0)
+ block.removeAttribute(kAtFix);
- if(val.length() > 0)
- {
- val = block.getAttributeNS("", kAtFix);
+ // BUG: Sablotron bug work around
+ if(val.length() > 0)
+ {
+ val = block.getAttributeNS("", kAtFix);
if(val.length() > 0)
- block.removeAttributeNS("", kAtFix);
- }
+ block.removeAttributeNS("", kAtFix);
+ }
- if(val.length() > 0)
- DOM::transcode16to8(val, fix);
+ if(val.length() > 0)
+ DOM::transcode16to8(val, fix);
if(fix.length() == 0)
- fix = kElPara;
+ fix = kElPara;
+ // Create duplicate of the 'fix' element
DOM::Element el = doc.createElement(fix);
- DOMHelpers::copyAttributes(block, el, NULL);
+ DOMHelpers::copyAttributes(block, el, NULL);
- while(block.hasChildNodes())
- el.appendChild(block.removeChild(block.getFirstChild()));
+ // Replace block with the given 'fix' element
+ while(block.hasChildNodes())
+ el.appendChild(block.removeChild(block.getFirstChild()));
- parent.replaceChild(el, block);
+ parent.replaceChild(el, block);
}
- }
+ }
}
-/**
- * Consolidates a certain tag types at the end of the document
- */
void RtfFixups::consolidateEndTags(DOM::Document& doc)
{
DOM::Element top = doc.getDocumentElement();
@@ -721,9 +670,6 @@ void RtfFixups::consolidateEndTags(DOM::Document& doc)
}
}
-/**
- * Consolidates a certain tag types at the start of the document
- */
void RtfFixups::consolidateStartTags(DOM::Document& doc)
{
DOM::Element top = doc.getDocumentElement();
@@ -759,10 +705,7 @@ void RtfFixups::consolidateStartTags(DOM::Document& doc)
}
}
-/**
- * Removes adjacent duplicate nodes of certain names
- */
-void RtfFixups::removeDuplicates(const DOM::Document& doc)
+void RtfFixups::combineDuplicates(const DOM::Document& doc)
{
bool found;
diff --git a/src/xmlfixups.h b/src/xmlfixups.h
index a250c5a..1716925 100644
--- a/src/xmlfixups.h
+++ b/src/xmlfixups.h
@@ -41,23 +41,95 @@
#include "sablo.h"
-class RtfFixups
+/*
+ * XMLFixups
+ *
+ * Because RTF is so 'different' (read: brain dead) we need to do all sorts
+ * of antics to get it into a nice XML format. Some of the XML Composition
+ * is done in XMLComposer, but whatever can't be done there as we're parsing
+ * gets done here after the fact.
+ *
+ * These functions are called from XMLComposer::endDocument and massage the
+ * resulting XML DOM into shape.
+ */
+class XMLFixups
{
public:
- // Cleanup Functions
+ // Replace blocks with 'fix' elements like paragraphs
static void fixBlocks(DOM::Document doc);
+
+ // Pass 2 list fixups
static void fixLists(const DOM::Document doc);
+
+ // Pass 2 style fixups
static void fixStyles(const DOM::Document doc);
+
+ /*
+ * Breaks a paragraph up through a previous level. Calls itself
+ * recursively to break paragraphs totally free up to containing
+ * destination.
+ *
+ * For example:
+ *
+ * <dest>
+ * This is <b> a <block fix="para"/>
+ * test of </b> your concentration.
+ * </dest>
+ *
+ * Becomes:
+ *
+ * <dest>
+ * This is <b> a </b><block fix="para"/>
+ * <b>test of </b> your concentration.
+ * </dest>
+ */
static bool breakElement(const DOM::Element& el, const string& contain);
+
+ // Break all tags of a given type to a previous level (see above)
static void breakBreak(DOM::Document& doc, const string& contain, const string& tag);
+
+ // Used to break tables cells and rows into blocks (but more complicated)
+ static void breakTags(DOM::Document& doc, const string& parentName, const string& tagName);
+
+ // Fixes and combines list elements with the same id
static void breakLists(DOM::Document& document);
+
+ // Used to find and create tables and perform initial break out
static void breakTables(DOM::Document& document);
- static void breakTags(DOM::Document& doc, const string& parentName, const string& tagName);
+
+
+ /*
+ * Changes from a marker based paragraph system to a contained
+ * paragraph system. Also applies paragraph attributes to the
+ * appropriate paragraph.
+ *
+ * For example:
+ *
+ * <dest>
+ * This <blockattr style="10"> is <b> a <block fix="para"/>
+ * test of </b> your concentration.
+ * </dest>
+ *
+ * Becomes:
+ *
+ * <para style="10"> This is <b> a </b></para>
+ * <para><b>test of </b> your concentration.</para>
+ */
static void breakBlocks(DOM::Document& document);
+
+ // Wrap certain tags in a wrapper tag of given name
static void wrapTags(DOM::Document& document, const string& tagName, const string& wrapName);
+
+ // Remove certain tags from document
static void removeTags(const DOM::Document& doc);
- static void removeDuplicates(const DOM::Document& doc);
+
+ // Combines certain adjacent duplicate tags
+ static void combineDuplicates(const DOM::Document& doc);
+
+ // Consolidates a certain tag types at the beginning of the document
static void consolidateStartTags(DOM::Document& doc);
+
+ // Consolidates a certain tag types at the end of the document
static void consolidateEndTags(DOM::Document& doc);
};