summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--configure.in21
-rw-r--r--src/Makefile.am2
-rw-r--r--src/domhelpers.cpp181
-rw-r--r--src/domhelpers.h57
-rw-r--r--src/tags.h112
-rw-r--r--src/xmlcomposer.cpp112
-rw-r--r--src/xmlcomposer.h3
-rw-r--r--src/xmlfixups.cpp739
-rw-r--r--src/xmlfixups.h62
9 files changed, 1201 insertions, 88 deletions
diff --git a/configure.in b/configure.in
index 997e73a..2e5f435 100644
--- a/configure.in
+++ b/configure.in
@@ -55,13 +55,30 @@ AC_PROG_INSTALL
AC_PROG_LN_S
AC_PROG_MAKE_SET
+# Debug mode
+AC_ARG_ENABLE(debug,
+ AC_HELP_STRING([--enable-debug],
+ [Compile binaries in debug mode]))
+
+if test "$enable_debug" = "yes"; then
+ CFLAGS="$CFLAGS -g -O0"
+ AC_DEFINE_UNQUOTED(_DEBUG, 1, [In debug mode])
+ echo "enabling debug compile mode"
+fi
+
# Check for libraries
AC_CHECK_LIB(c, iconv, [ LIB_ICONV=""; ],
[ AC_CHECK_LIB(iconv, iconv, [ LIB_ICONV="-liconv"; LIBS="-liconv $LIBS" ],
[echo "ERROR: Must have iconv routines."; exit 1])
])
-AC_CHECK_LIB(expat, XML_Parse, ,
- [echo "ERROR: Must install sablotron libraries."; exit 1])
+
+# Sablotron can depend on these libraries so include if present
+AC_CHECK_LIB(termcap, tgetnum)
+AC_CHECK_LIB(readline, readline)
+AC_CHECK_LIB(expat, XML_Parse)
+AC_CHECK_LIB(js, JS_NewContext)
+
+# Check for Sablotron
AC_CHECK_LIB(sablot, SDOM_createElement, ,
[echo "ERROR: Must install sablotron libraries."; exit 1])
diff --git a/src/Makefile.am b/src/Makefile.am
index f4253a3..4261c60 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -4,7 +4,7 @@ bin_PROGRAMS = rtfx
rtfx_SOURCES = rtfx.cpp basehandler.cpp basehandler.h levelhandler.cpp levelhandler.h \
reference.h rtfanalyser.cpp rtfanalyser.h rtfparsehelpers.cpp rtfformatting.h \
rtfparsehelpers.h rtfreader.cpp rtfreader.h sablo.h sablotr.cpp usuals.h \
- rtffixups.h rtffixups.cpp
+ rtffixups.h rtffixups.cpp domhelpers.h domhelpers.cpp tags.h
rtfx_LDADD = -lsablot -lexpat $(LIB_ICONV)
rtfx_CFLAGS = -O0 -I${top_srcdir} -I/usr/local/include
rtfx_LDFLAGS = -L/usr/local/lib
diff --git a/src/domhelpers.cpp b/src/domhelpers.cpp
new file mode 100644
index 0000000..7b06f55
--- /dev/null
+++ b/src/domhelpers.cpp
@@ -0,0 +1,181 @@
+/*
+ * Copyright (c) 2004, Nate Nielsen
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the
+ * following disclaimer.
+ * * Redistributions in binary form must reproduce the
+ * above copyright notice, this list of conditions and
+ * the following disclaimer in the documentation and/or
+ * other materials provided with the distribution.
+ * * The names of contributors to this software may not be
+ * used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ *
+ * CONTRIBUTORS
+ * Nate Nielsen <nielsen@memberwebs.com>
+ *
+ */
+
+#include "usuals.h"
+#include "domhelpers.h"
+#include "tags.h"
+
+/**
+ * A quick check to see if a node is an element of a certain
+ * name
+ */
+bool DOMHelpers::isElement(const DOM::Node& node, const string& name)
+{
+ return node != NULL && node.getNodeType() == DOM::Node::ELEMENT_NODE &&
+ node.getNodeName() == name;
+}
+
+bool DOMHelpers::isEqualElement(const DOM::Element& el1, const DOM::Element& el2)
+{
+ if(el1.getNodeName() == el2.getNodeName())
+ return false;
+
+ DOM::NamedNodeMap at1 = el1.getAttributes();
+ DOM::NamedNodeMap at2 = el2.getAttributes();
+
+ if(at1 == NULL && at2 == NULL)
+ return true;
+
+ if(at1 == NULL || at2 == NULL ||
+ at1->getLength() != at2->getLength())
+ return false;
+
+ for(int i = 0; i < at1->getLength(); i++)
+ {
+ DOM::Attr attr1 = (const DOM::Attr&)at1->item(0);
+ if(attr1 != NULL)
+ return false;
+
+ DOM::Attr attr2 = (const DOM::Attr&)at2->getNamedItem(attr1.getNodeName());
+ if(attr2 != NULL)
+ return false;
+
+ if(attr1.getNodeValue() == attr2.getNodeValue())
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * Gets the pertinent ancestor of this node, or returns null
+ * if not found.
+ */
+DOM::Element DOMHelpers::getContainingElement(const DOM::Node& node, const string& name)
+{
+ DOM::Node n = node;
+
+ while(true)
+ {
+ n = n.getParentNode();
+ if(n == NULL)
+ break;
+
+ if(isElement(n, name))
+ return (DOM::Element&)n;
+ }
+
+ return DOM::Element();
+}
+
+bool isNsAttr(const string& name)
+{
+ return strncmp(name.c_str(), kNSPrefix, strlen(kNSPrefix)) ? false : true;
+}
+
+void DOMHelpers::copyAttributes(const DOM::Element& src, DOM::Element& dest,
+ const char** hideList)
+{
+ // Now get both sets of attributes
+ DOM::NamedNodeMap srcMap = src.getAttributes();
+ DOM::NamedNodeMap destMap = dest.getAttributes();
+
+ if(srcMap == NULL || destMap == NULL)
+ return;
+
+ // And copy them from one to the other
+ for(int j = 0; j < srcMap->getLength(); j++)
+ {
+ DOM::Node attr = srcMap->item(j);
+ if(attr != NULL)
+ {
+ // BUG: Sablotron seems to have a bug in it's
+ // setAttributeNode implementation. It always
+ // adds a blank namespace
+ // attr = attr.cloneNode(false);
+ // if(attr != NULL)
+ // destMap.setNamedItem(attr);
+
+ string name = attr.getNodeName();
+
+ if(hideList)
+ {
+
+ for(const char** t = hideList; *t != NULL; t++)
+ {
+ if(name == *t)
+ name.erase();
+ }
+ }
+
+ if(name.length() > 0 && !isNsAttr(name))
+ dest.setAttribute(attr.getNodeName(), attr.getNodeValue());
+ }
+ }
+}
+
+DOM::Element DOMHelpers::getPriorElement(const DOM::Node& node, const string& name)
+{
+ DOM::Node n = node;
+
+ while(n != NULL)
+ {
+ if(isElement(n, name))
+ return (DOM::Element&)n;
+
+ n = n.getPreviousSibling();
+ }
+
+ DOM::Node parent = node.getParentNode();
+
+ if(parent == NULL)
+ return DOM::Element();
+ else
+ return getPriorElement(parent, name);
+}
+
+void DOMHelpers::insertAfter(DOM::Node& parent, const DOM::Node& node,
+ const DOM::Node& ref)
+{
+ DOM::Node sibling = ref.getNextSibling();
+ if(sibling == NULL)
+ parent.appendChild(node);
+ else
+ parent.insertBefore(node, sibling);
+}
+
diff --git a/src/domhelpers.h b/src/domhelpers.h
new file mode 100644
index 0000000..16afd79
--- /dev/null
+++ b/src/domhelpers.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2004, Nate Nielsen
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the
+ * following disclaimer.
+ * * Redistributions in binary form must reproduce the
+ * above copyright notice, this list of conditions and
+ * the following disclaimer in the documentation and/or
+ * other materials provided with the distribution.
+ * * The names of contributors to this software may not be
+ * used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ *
+ * CONTRIBUTORS
+ * Nate Nielsen <nielsen@memberwebs.com>
+ *
+ */
+
+#ifndef __DOMHELPERS_H__
+#define __DOMHELPERS_H__
+
+#include "sablo.h"
+
+class DOMHelpers
+{
+public:
+
+ // DOM Helper Functions
+ static bool isElement(const DOM::Node& node, const string& name);
+ static bool isEqualElement(const DOM::Element& el1, const DOM::Element& el2);
+ static void copyAttributes(const DOM::Element& src, DOM::Element& dest, const char** hideList);
+ static void insertAfter(DOM::Node& parent, const DOM::Node& node, const DOM::Node& ref);
+ static DOM::Element getContainingElement(const DOM::Node& node, const string& name);
+ static DOM::Element getPriorElement(const DOM::Node& node, const string& name);
+};
+
+#endif // __DOMHELPERS_H__
diff --git a/src/tags.h b/src/tags.h
new file mode 100644
index 0000000..d0382cc
--- /dev/null
+++ b/src/tags.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2004, Nate Nielsen
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the
+ * following disclaimer.
+ * * Redistributions in binary form must reproduce the
+ * above copyright notice, this list of conditions and
+ * the following disclaimer in the documentation and/or
+ * other materials provided with the distribution.
+ * * The names of contributors to this software may not be
+ * used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ *
+ * CONTRIBUTORS
+ * Nate Nielsen <nielsen@memberwebs.com>
+ *
+ */
+
+#ifndef __TAGS_H__
+#define __TAGS_H__
+
+static const char* kElDest = "i_dest";
+static const char* kElBlock = "i_block";
+static const char* kAtFix = "i_fix";
+static const char* kAtCell = "i_cell";
+static const char* kElListtable = "i_listtable";
+static const char* kElListdef = "i_listdef";
+
+static const char* kElPara = "para";
+static const char* kElDoc = "document";
+static const char* kElTab = "tab";
+static const char* kElSect = "sect";
+static const char* kElPage = "page";
+static const char* kElStyle = "style";
+static const char* kElLine = "line";
+static const char* kElList = "list";
+static const char* kElStylesheet = "stylesheet";
+static const char* kElInfo = "info";
+static const char* kElTitle = "title";
+static const char* kElAuthor = "author";
+static const char* kElOperator = "operator";
+static const char* kElB = "b";
+static const char* kElHide = "hide";
+static const char* kElI = "i";
+static const char* kElStrike = "strike";
+static const char* kElU = "u";
+static const char* kElColor = "color";
+static const char* kElCell = "cell";
+static const char* kElRow = "row";
+static const char* kElTable = "table";
+
+static const char* kAtList = "list";
+static const char* kAtName = "name";
+static const char* kAtBold = "bold";
+static const char* kAtHidden = "hide";
+static const char* kAtItalic = "italic";
+static const char* kAtStrike = "strike";
+static const char* kAtUnderline = "underline";
+static const char* kAtColor = "color";
+static const char* kAtType = "type";
+static const char* kAtOrdered = "ordered";
+static const char* kAtStart = "start";
+static const char* kAtId = "id";
+static const char* kAtIndex = "id";
+
+static const wchar_t* kValDisc = L"disc";
+static const wchar_t* kValLowerAlpha = L"lower-alpha";
+static const wchar_t* kValUpperAlpha = L"upper-alpha";
+static const wchar_t* kValLowerRoman = L"lower-roman";
+static const wchar_t* kValUpperRoman = L"upper-roman";
+static const wchar_t* kValArabic = L"arabic";
+static const wchar_t* kValNull = L"";
+
+static const wchar_t* kValList = L"list";
+static const wchar_t* kValPara = L"para";
+static const wchar_t* kValTable = L"table";
+
+static const char* kNoDuplicates[] =
+ { kElB, kElU, kElI, kElColor, kElHide, kElColor, NULL };
+
+static const char* kRemoveTags[] =
+ { kElDest, kElListdef, kElListtable, NULL };
+
+static const char* kBlockTags[] =
+ { kElTable, kElPara, NULL };
+
+static const char* kHideList[] =
+ { kAtId, kAtList, NULL };
+
+static const char* kNSPrefix = "xmlns";
+
+#endif // __TAGS_H__
diff --git a/src/xmlcomposer.cpp b/src/xmlcomposer.cpp
index 967fd3c..9be1eeb 100644
--- a/src/xmlcomposer.cpp
+++ b/src/xmlcomposer.cpp
@@ -39,76 +39,8 @@
#include "usuals.h"
#include "rtfanalyser.h"
#include "rtffixups.h"
-
-const char* kElDest = "i_dest";
-const char* kElBlock = "i_block";
-const char* kAtFix = "i_fix";
-const char* kAtCell = "i_cell";
-const char* kElListtable = "i_listtable";
-const char* kElListdef = "i_listdef";
-
-const char* kElPara = "para";
-const char* kElDoc = "document";
-const char* kElTab = "tab";
-const char* kElSect = "sect";
-const char* kElPage = "page";
-const char* kElStyle = "style";
-const char* kElLine = "line";
-const char* kElList = "list";
-const char* kElStylesheet = "stylesheet";
-const char* kElInfo = "info";
-const char* kElTitle = "title";
-const char* kElAuthor = "author";
-const char* kElOperator = "operator";
-const char* kElB = "b";
-const char* kElHide = "hide";
-const char* kElI = "i";
-const char* kElStrike = "strike";
-const char* kElU = "u";
-const char* kElColor = "color";
-const char* kElCell = "cell";
-const char* kElRow = "row";
-const char* kElTable = "table";
-
-const char* kAtList = "list";
-const char* kAtName = "name";
-const char* kAtBold = "bold";
-const char* kAtHidden = "hide";
-const char* kAtItalic = "italic";
-const char* kAtStrike = "strike";
-const char* kAtUnderline = "underline";
-const char* kAtColor = "color";
-const char* kAtType = "type";
-const char* kAtOrdered = "ordered";
-const char* kAtStart = "start";
-const char* kAtId = "id";
-const char* kAtIndex = "id";
-
-const wchar_t* kValDisc = L"disc";
-const wchar_t* kValLowerAlpha = L"lower-alpha";
-const wchar_t* kValUpperAlpha = L"upper-alpha";
-const wchar_t* kValLowerRoman = L"lower-roman";
-const wchar_t* kValUpperRoman = L"upper-roman";
-const wchar_t* kValArabic = L"arabic";
-const wchar_t* kValNull = L"";
-
-const wchar_t* kValList = L"list";
-const wchar_t* kValPara = L"para";
-const wchar_t* kValTable = L"table";
-
-const char* kNoDuplicates[] =
- { kElB, kElU, kElI, kElColor, kElHide, kElColor, NULL };
-
-const char* kRemoveTags[] =
- { kElDest, kElListdef, kElListtable, NULL };
-
-const char* kBlockTags[] =
- { kElTable, kElPara, NULL };
-
-const char* kHideList[] =
- { kAtId, kAtList, NULL };
-
-const char* kNSPrefix = "xmlns";
+#include "domhelpers.h"
+#include "tags.h"
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
@@ -168,19 +100,19 @@ void RtfParser::endDocument()
LevelHandler::endDocument();
// Cleanup the tree
- removeDuplicates(m_document);
- breakTables(m_document);
- breakTags(m_document, kElTable, kElRow);
- breakTags(m_document, kElRow, kElCell);
- wrapTags(m_document, kElCell, kElDest);
- breakBlocks(m_document);
- breakLists(m_document);
- fixLists(m_document);
- fixStyles(m_document);
- fixBlocks(m_document);
- removeTags(m_document);
- breakBreak(m_document, kElDoc, kElPage);
- breakBreak(m_document, kElDoc, kElSect);
+ RtfFixups::removeDuplicates(m_document);
+ RtfFixups::breakTables(m_document);
+ RtfFixups::breakTags(m_document, kElTable, kElRow);
+ RtfFixups::breakTags(m_document, kElRow, kElCell);
+ RtfFixups::wrapTags(m_document, kElCell, kElDest);
+ RtfFixups::breakBlocks(m_document);
+ RtfFixups::breakLists(m_document);
+ RtfFixups::fixLists(m_document);
+ RtfFixups::fixStyles(m_document);
+ RtfFixups::fixBlocks(m_document);
+ RtfFixups::removeTags(m_document);
+ RtfFixups::breakBreak(m_document, kElDoc, kElPage);
+ RtfFixups::breakBreak(m_document, kElDoc, kElSect);
return;
}
@@ -414,7 +346,7 @@ DOM::Element RtfParser::ParseAnalyser::getCurrentBlock()
if(node.hasChildNodes())
node = node.getLastChild();
- return m_parser->getPriorElement(node, kElBlock);
+ return DOMHelpers::getPriorElement(node, kElBlock);
}
@@ -997,5 +929,17 @@ ON_CHARDATA(Attribute)
element.setAttribute(name, cur);
}
+wstring RtfParser::formatInt(int num)
+{
+ char buff[16];
+
+ // Certain OSs don't support swprintf :(
+ sprintf(buff, "%d", num);
+ wstring n;
+ for(char* s = buff; *s; s++)
+ n.append(1, *s);
+
+ return n;
+}
diff --git a/src/xmlcomposer.h b/src/xmlcomposer.h
index 5403240..fd9fd7a 100644
--- a/src/xmlcomposer.h
+++ b/src/xmlcomposer.h
@@ -62,7 +62,6 @@ public:
virtual void groupEnd();
virtual void charData(wstring data);
-
// Element management functions
DOM::Element createElement(const string& name);
void pushElement(const DOM::Element& element);
@@ -85,6 +84,8 @@ public:
const RtfParserOptions& getOptions()
{ return m_options; }
+ static wstring formatInt(int num);
+
protected:
virtual void clear();
diff --git a/src/xmlfixups.cpp b/src/xmlfixups.cpp
new file mode 100644
index 0000000..7201703
--- /dev/null
+++ b/src/xmlfixups.cpp
@@ -0,0 +1,739 @@
+/*
+ * Copyright (c) 2004, Nate Nielsen
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the
+ * following disclaimer.
+ * * Redistributions in binary form must reproduce the
+ * above copyright notice, this list of conditions and
+ * the following disclaimer in the documentation and/or
+ * other materials provided with the distribution.
+ * * The names of contributors to this software may not be
+ * used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ *
+ * CONTRIBUTORS
+ * Nate Nielsen <nielsen@memberwebs.com>
+ *
+ */
+
+#include "usuals.h"
+#include "rtffixups.h"
+#include "domhelpers.h"
+#include "tags.h"
+
+void RtfFixups::breakBreak(DOM::Document& doc, const string& contain,
+ const string& tag)
+{
+ DOM::NodeList els = doc.getElementsByTagName(tag);
+ if(els != NULL)
+ {
+ for(int i = 0; i < els->getLength(); i++)
+ {
+ DOM::Element el = (const DOM::Element&)els->item(i);
+#if 0
+ // See if parent node only has this break tag
+ // in it. If so then replace parent with this
+
+ DOM::Node parent = el.getParentNode();
+
+ if(parent != NULL)
+ {
+ DOM::Node grandparent = parent.getParentNode();
+
+ if(grandparent != NULL &&
+ el.getPreviousSibling() == NULL &&
+ el.getNextSibling() == NULL)
+ {
+ grandparent.replaceChild(parent.removeChild(el), parent);
+ }
+ }
+#endif
+
+ breakElement(el, contain);
+ }
+ }
+}
+
+/**
+ * Breaks a paragraph up through a previous level. Calls itself
+ * recursively to break paragraphs totally free up to containing
+ * destination.
+ *
+ * For example:
+ *
+ * <dest>
+ * This is <b> a <block fix="para"/>
+ * test of </b> your concentration.
+ * </dest>
+ *
+ * Becomes:
+ *
+ * <dest>
+ * This is <b> a </b><block fix="para"/>
+ * <b>test of </b> your concentration.
+ * </dest>
+ */
+bool RtfFixups::breakElement(const DOM::Element& el, const string& contain)
+{
+ ASSERT(el != NULL);
+
+ DOM::Element parent = (const DOM::Element&)el.getParentNode();
+ DOM::Element grandparent;
+
+ string s = el.getNodeName();
+ s = parent.getNodeName();
+
+ // Get the parent node
+ if(parent != NULL)
+ grandparent = (const DOM::Element&)parent.getParentNode();
+
+ // Make sure we have something to work with before continuing
+ if(grandparent == NULL || parent == NULL ||
+ DOMHelpers::isElement(parent, contain))
+ return true;
+
+ DOM::Node e;
+
+ // Check to see if this is the first node in the parent.
+ // If so then just move out to before
+ if(el.getPreviousSibling() == NULL)
+ {
+ e = grandparent.insertBefore(parent.removeChild(el), parent);
+ }
+
+
+ // Check to see if this is the last node in the parent.
+ // If so then just move out to after the parent
+ else if(el.getNextSibling() == NULL)
+ {
+ DOM::Node next = parent.getNextSibling();
+ if(next == NULL)
+ e = grandparent.appendChild(parent.removeChild(el));
+ else
+ e = grandparent.insertBefore(parent.removeChild(el), next);
+ }
+
+
+ // Otherwise it's in the middle so split the parent
+ // element etc...
+ else
+ {
+ // Clone it but not deep
+ DOM::Element parent2 = (const DOM::Element&)parent.cloneNode(false);
+
+ if(parent2 == NULL)
+ return false;
+
+ // Flag that tells us whether we moved anything up to parent
+ bool moved = false;
+
+ // Now move all nodes after this one to the second parent.
+ while((e = el.getNextSibling()) != NULL)
+ {
+ parent2.appendChild(parent.removeChild(e));
+ moved = true;
+ }
+
+ // Remove the element from it's parent
+ e = parent.removeChild(el);
+
+ // Okay now we move the paragraph up to the parent
+ DOMHelpers::insertAfter(grandparent, e, parent);
+ if(moved)
+ DOMHelpers::insertAfter(grandparent, parent2, e);
+ }
+
+ // Now call it again with the paragraph in the new position
+ // untill everything's cut through!
+ return breakElement((DOM::Element&)e, contain);
+}
+
+/**
+ * Changes from a marker based paragraph system to a contained
+ * paragraph system. Also applies paragraph attributes to the
+ * appropriate paragraph.
+ *
+ * For example:
+ *
+ * <dest>
+ * This <blockattr style="10"> is <b> a <block fix="para"/>
+ * test of </b> your concentration.
+ * </dest>
+ *
+ * Becomes:
+ *
+ * <para style="10"> This is <b> a </b></para>
+ * <para><b>test of </b> your concentration.</para>
+ */
+void RtfFixups::breakBlocks(DOM::Document& document)
+{
+ // First break out all the paragraphs to the destination level
+ DOM::NodeList blocks = document.getElementsByTagName(kElBlock);
+ if(blocks != NULL)
+ {
+ for(int i = 0; i < blocks->getLength(); i++)
+ {
+ DOM::Element block = (const DOM::Element&)blocks->item(i);
+
+ // If it's the single closed style para then break it
+ if(block != NULL && !block.hasChildNodes())
+ breakElement(block, kElDest);
+ }
+ }
+
+
+ // Now group stuff in destinations into paras or other blocks
+ DOM::NodeList destinations = document.getElementsByTagName(kElDest);
+ if(destinations != NULL)
+ {
+ for(int i = 0; i < destinations->getLength(); i++)
+ {
+ DOM::Element dest = (const DOM::Element&)destinations->item(i);
+
+ // Sanity Check
+ if(dest == NULL || !dest.hasChildNodes())
+ continue;
+
+ // Go through the children of this destination
+ DOM::Node child = dest.getFirstChild();
+
+ DOM::Element block;
+
+ while(child != NULL)
+ {
+ // If it's a block
+ if(DOMHelpers::isElement(child, kElBlock))
+ {
+ block = (DOM::Element&)child;
+ child = child.getNextSibling();
+ continue;
+ }
+
+ // If it's already a real block element
+ for(const char** t = kBlockTags; *t != NULL; t++)
+ {
+ if(DOMHelpers::isElement(child, *t))
+ {
+ block = NULL;
+ break;
+ }
+ }
+
+ // If there's a block then add to it
+ if(block != NULL)
+ {
+ block.appendChild(dest.removeChild(child));
+ child = block;
+ }
+
+ child = child.getNextSibling();
+ }
+ }
+ }
+}
+
+void RtfFixups::wrapTags(DOM::Document& doc, const string& tagName,
+ const string& wrapName)
+{
+ DOM::NodeList tags = doc.getElementsByTagName(tagName);
+ if(tags != NULL)
+ {
+ for(int i = 0; i < tags->getLength(); i++)
+ {
+ DOM::Element tag = (const DOM::Element&)tags->item(i);
+
+ DOM::Element wrap = doc.createElement(wrapName);
+ while(tag.hasChildNodes())
+ wrap.appendChild(tag.removeChild(tag.getFirstChild()));
+
+ tag.appendChild(wrap);
+ }
+ }
+}
+
+void RtfFixups::breakTags(DOM::Document& doc, const string& parentName,
+ const string& tagName)
+{
+ DOM::NodeList parents = doc.getElementsByTagName(parentName);
+ if(parents != NULL)
+ {
+ for(int i = 0; i < parents->getLength(); i++)
+ {
+ DOM::Element parent = (const DOM::Element&)parents->item(i);
+
+ if(!parent.hasChildNodes())
+ continue;
+
+ DOM::NodeList tags = parent.getElementsByTagName(tagName);
+ if(tags != NULL)
+ {
+ for(int i = 0; i < tags->getLength(); i++)
+ breakElement((const DOM::Element&)tags->item(i), parentName);
+ }
+
+ DOM::Node tag = doc.createElement(tagName);
+ parent.insertBefore(tag, parent.getFirstChild());
+
+ DOM::Node child = tag;
+
+ while(child != NULL && (child = child.getNextSibling()) != NULL)
+ {
+ if(DOMHelpers::isElement(child, kElBlock))
+ {
+ DOM::Node next = child.getNextSibling();
+ if(next == NULL)
+ {
+ parent.removeChild(child);
+ continue;
+ }
+
+ if(DOMHelpers::isElement(next, tagName))
+ {
+ DOM::Node twodown = next.getNextSibling();
+ if(!DOMHelpers::isElement(twodown, kElBlock))
+ {
+ child = parent.insertBefore(parent.removeChild(next), child);
+ }
+ else
+ {
+ parent.removeChild(child);
+ child = next;
+ }
+ }
+ }
+
+ if(DOMHelpers::isElement(child, tagName))
+ {
+ if(!tag.hasChildNodes())
+ parent.removeChild(tag);
+ tag = child;
+ }
+ else
+ {
+ tag.appendChild(parent.removeChild(child));
+ child = tag;
+ }
+ }
+
+ if(!tag.hasChildNodes())
+ parent.removeChild(tag);
+ }
+ }
+
+ DOM::NodeList tags = doc.getElementsByTagName(tagName);
+ if(tags != NULL)
+ {
+ for(int i = 0; i < tags->getLength(); i++)
+ {
+ DOM::Element tag = (const DOM::Element&)tags->item(i);
+ DOM::Node parent = tag.getParentNode();
+
+ if(parent != NULL && !DOMHelpers::isElement(parent, parentName))
+ parent.removeChild(tag);
+
+#if 0
+ else if(tag.hasChildNodes())
+ {
+ DOM::NodeList children = tag.getChildNodes();
+ if(children != NULL && children->getLength() == 1)
+ {
+ DOM::Node child = children->item(0);
+ if(child != NULL && !child.hasChildNodes() &&
+ DOMHelpers::isElement(child, kElBlock))
+ parent.removeChild(tag);
+ }
+ }
+#endif
+
+ }
+ }
+}
+
+void RtfFixups::breakLists(DOM::Document& doc)
+{
+ // Now group stuff in destinations into tables
+ DOM::NodeList destinations = doc.getElementsByTagName(kElDest);
+ if(destinations != NULL)
+ {
+ for(int i = 0; i < destinations->getLength(); i++)
+ {
+ DOM::Element dest = (const DOM::Element&)destinations->item(i);
+
+ // Sanity Check
+ if(dest == NULL)
+ continue;
+
+ // Go through the children of this destination
+ DOM::Node child = dest.getFirstChild();
+
+ DOM::Element list;
+ DOM::Element e;
+
+ wstring previd;
+
+ while(child != NULL)
+ {
+ // If it's a block and has a cell attribute
+ if(DOMHelpers::isElement(child, kElBlock))
+ {
+ e = (DOM::Element&)child;
+
+ // if it has a cell attribute
+ wstring listid = e.getAttribute(kAtList);
+ if(listid.length() > 0)
+ {
+ e.removeAttribute(kAtList);
+
+ if(list == NULL || previd != listid)
+ {
+ list = doc.createElement(kElList);
+ list.setAttribute(kAtList, listid);
+ dest.insertBefore(list, child);
+ previd = listid;
+ }
+ }
+ else
+ {
+ list = NULL;
+ previd.erase();
+ }
+ }
+
+ // It's not a block
+ if(list != NULL)
+ {
+ list.appendChild(dest.removeChild(child));
+ child = list;
+ }
+
+ child = child.getNextSibling();
+ }
+ }
+ }
+}
+
+void RtfFixups::fixStyles(const DOM::Document doc)
+{
+ DOM::NodeList styles = doc.getElementsByTagName(kElStyle);
+ if(styles != NULL)
+ {
+ DOM::NodeList blocks = doc.getElementsByTagName(kElBlock);
+ if(blocks != NULL)
+ {
+ for(int i = 0; i < blocks->getLength(); i++)
+ {
+ DOM::Element block = (const DOM::Element&)blocks->item(i);
+
+ if(block == NULL || !block.hasAttribute(kElStyle))
+ continue;
+
+ for(int j = 0; j < styles->getLength(); j++)
+ {
+ DOM::Element style = (const DOM::Element&)styles->item(j);
+ if(style != NULL)
+ {
+ if(style.getAttribute(kAtId) == block.getAttribute(kElStyle))
+ {
+ wstring name = style.getAttribute(kAtName);
+ if(name.length() > 0)
+ block.setAttribute(kElStyle, name);
+ }
+ }
+ }
+ }
+ }
+
+ for(int i = 0; i < styles->getLength(); i++)
+ {
+ DOM::Element style = (const DOM::Element&)styles->item(i);
+ if(style != NULL)
+ style.removeAttribute(kAtId);
+ }
+ }
+
+
+}
+
+
+void RtfFixups::breakTables(DOM::Document& doc)
+{
+ DOM::NodeList rows = doc.getElementsByTagName(kElRow);
+ if(rows != NULL)
+ {
+ for(int i = 0; i < rows->getLength(); i++)
+ {
+ DOM::Element row = (const DOM::Element&)rows->item(i);
+ DOM::Node parent = row.getParentNode();
+
+ if(parent == NULL)
+ continue;
+
+ if(DOMHelpers::isElement(parent, kElBlock))
+ {
+ DOM::Node grandparent = parent.getParentNode();
+
+ if(grandparent != NULL && !row.hasChildNodes())
+ {
+ if(row.getPreviousSibling() == NULL)
+ grandparent.insertBefore(parent.removeChild(row), parent);
+ else if(row.getNextSibling() == NULL)
+ DOMHelpers::insertAfter(grandparent, parent.removeChild(row), parent);
+ }
+ }
+
+ breakElement(row, kElDest);
+ }
+ }
+
+
+
+ // Now group stuff in destinations into tables
+ DOM::NodeList destinations = doc.getElementsByTagName(kElDest);
+ if(destinations != NULL)
+ {
+ for(int i = 0; i < destinations->getLength(); i++)
+ {
+ DOM::Element dest = (const DOM::Element&)destinations->item(i);
+
+ // Sanity Check
+ if(dest == NULL)
+ continue;
+
+ // Go through the children of this destination
+ DOM::Node child = dest.getFirstChild();
+
+ DOM::Element table;
+ DOM::Element e;
+
+ while(child != NULL)
+ {
+ // If it's a block and has a cell attribute
+ if(DOMHelpers::isElement(child, kElBlock))
+ {
+ e = (DOM::Element&)child;
+
+ // if it has a cell attribute
+ if(e.getAttribute(kAtCell).length() > 0)
+ {
+ e.removeAttribute(kAtCell);
+
+ if(table == NULL)
+ {
+ table = doc.createElement(kElTable);
+ dest.insertBefore(table, child);
+ }
+ }
+ else
+ {
+ table = NULL;
+ }
+ }
+
+ // It's not a block
+ if(table != NULL)
+ {
+ table.appendChild(dest.removeChild(child));
+ child = table;
+ }
+
+ child = child.getNextSibling();
+ }
+ }
+ }
+}
+
+void RtfFixups::removeTags(const DOM::Document& doc)
+{
+ // Go through the list of nodes
+ for(const char** t = kRemoveTags; *t != NULL; t++)
+ {
+ DOM::NodeList elements = doc.getElementsByTagName(*t);
+ if(elements != NULL)
+ {
+ for(int j = 0; j < elements->getLength(); j++)
+ {
+ DOM::Element el = (const DOM::Element&)elements->item(j);
+ DOM::Node parent = el->getParentNode();
+
+ if(parent == NULL)
+ continue;
+
+ while(el.hasChildNodes())
+ parent.insertBefore(el.removeChild(el.getFirstChild()), el);
+
+ parent.removeChild(el);
+ }
+ }
+ }
+}
+
+void RtfFixups::fixLists(const DOM::Document doc)
+{
+ DOM::NodeList lists = doc.getElementsByTagName(kElList);
+ if(lists != NULL)
+ {
+ DOM::NodeList listdefs = doc.getElementsByTagName(kElListdef);
+ if(listdefs != NULL)
+ {
+ for(int i = 0; i < listdefs->getLength(); i++)
+ {
+ DOM::Element listdef = (const DOM::Element&)listdefs->item(i);
+
+ if(listdef == NULL || !listdef.hasAttribute(kAtList))
+ continue;
+
+ for(int j = 0; j < lists->getLength(); j++)
+ {
+ DOM::Element list = (const DOM::Element&)lists->item(j);
+ if(list != NULL)
+ {
+ if(list.getAttribute(kAtList) == listdef.getAttribute(kAtList))
+ {
+ DOMHelpers::copyAttributes(listdef, list, kHideList);
+ list.removeAttribute(kAtList);
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+void RtfFixups::fixBlocks(const DOM::Document doc)
+{
+ // First break out all the paragraphs to the destination level
+ DOM::NodeList blocks = doc.getElementsByTagName(kElBlock);
+ if(blocks != NULL)
+ {
+ string fix;
+ wstring val;
+
+ for(int i = 0; i < blocks->getLength(); i++)
+ {
+ DOM::Element block = (const DOM::Element&)blocks->item(i);
+ DOM::Node parent = block.getParentNode();
+
+ if(parent == NULL)
+ continue;
+
+ fix.resize(0);
+ val.resize(0);
+
+ val = block.getAttribute(kAtFix);
+ if(val.length() > 0)
+ block.removeAttribute(kAtFix);
+
+
+ if(val.length() > 0)
+ {
+ val = block.getAttributeNS("", kAtFix);
+ if(val.length() > 0)
+ block.removeAttributeNS("", kAtFix);
+ }
+
+ if(val.length() > 0)
+ DOM::transcode16to8(val, fix);
+
+ if(fix.length() == 0)
+ fix = kElPara;
+
+ DOM::Element el = doc.createElement(fix);
+ DOMHelpers::copyAttributes(block, el, NULL);
+
+ while(block.hasChildNodes())
+ el.appendChild(block.removeChild(block.getFirstChild()));
+
+ parent.replaceChild(el, block);
+ }
+ }
+}
+
+
+/**
+ * Removes adjacent duplicate nodes of certain names
+ */
+void RtfFixups::removeDuplicates(const DOM::Document& doc)
+{
+ // Go through the list of nodes
+ for(const char** t = kNoDuplicates; *t = NULL; t++)
+ {
+ DOM::NodeList elements = doc.getElementsByTagName(*t);
+ if(elements != NULL)
+ {
+ int x = elements->getLength();
+ for(int j = 0; j < elements->getLength(); j++)
+ {
+
+ // Make sure it's a valid element
+ DOM::Element element = (const DOM::Element&)elements->item(j);
+ if(element == NULL)
+ continue;
+
+ // Get neighbors
+ DOM::Node previous = element.getPreviousSibling();
+ DOM::Node next = element.getNextSibling();
+
+ // Make sure it's still in the document, as we may have
+ // removed it on a previous loop
+ DOM::Node parent = element.getParentNode();
+ if(parent == NULL)
+ continue;
+
+ // Combine previous if valid
+ if(previous != NULL && previous.getNodeType() == DOM::Node::ELEMENT_NODE &&
+ DOMHelpers::isEqualElement((DOM::Element&)previous, element))
+ {
+ while(previous.hasChildNodes())
+ {
+ DOM::Node child = previous.removeChild(previous.getLastChild());
+ if(child != NULL)
+ {
+ if(element.hasChildNodes())
+ element.insertBefore(child, element.getFirstChild());
+ else
+ element.appendChild(child);
+ }
+ }
+
+ // Remove duplicate node
+ parent.removeChild(previous);
+ }
+
+ // Combine next if valid
+ if(next != NULL && next.getNodeType() == DOM::Node::ELEMENT_NODE &&
+ DOMHelpers::isEqualElement((DOM::Element&)next, element))
+ {
+ while(next.hasChildNodes())
+ {
+ DOM::Node child = next.removeChild(next.getFirstChild());
+ if(child != NULL)
+ element.appendChild(child);
+ }
+
+ // Remove duplicate node
+ parent.removeChild(next);
+ }
+ }
+ }
+ }
+}
diff --git a/src/xmlfixups.h b/src/xmlfixups.h
new file mode 100644
index 0000000..8cc9b82
--- /dev/null
+++ b/src/xmlfixups.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2004, Nate Nielsen
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the
+ * following disclaimer.
+ * * Redistributions in binary form must reproduce the
+ * above copyright notice, this list of conditions and
+ * the following disclaimer in the documentation and/or
+ * other materials provided with the distribution.
+ * * The names of contributors to this software may not be
+ * used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ *
+ * CONTRIBUTORS
+ * Nate Nielsen <nielsen@memberwebs.com>
+ *
+ */
+
+#ifndef __RTFFIXUPS_H__
+#define __RTFFIXUPS_H__
+
+#include "sablo.h"
+
+class RtfFixups
+{
+public:
+ // Cleanup Functions
+ static void fixBlocks(DOM::Document doc);
+ static void fixLists(const DOM::Document doc);
+ static void fixStyles(const DOM::Document doc);
+ static bool breakElement(const DOM::Element& el, const string& contain);
+ static void breakBreak(DOM::Document& doc, const string& contain, const string& tag);
+ static void breakLists(DOM::Document& document);
+ static void breakTables(DOM::Document& document);
+ static void breakTags(DOM::Document& doc, const string& parentName, const string& tagName);
+ static void breakBlocks(DOM::Document& document);
+ static void wrapTags(DOM::Document& document, const string& tagName, const string& wrapName);
+ static void removeTags(const DOM::Document& doc);
+ static void removeDuplicates(const DOM::Document& doc);
+};
+
+#endif // __RTFFIXUPS_H__