summaryrefslogtreecommitdiff
path: root/src/xmlfixups.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/xmlfixups.cpp')
-rw-r--r--src/xmlfixups.cpp739
1 files changed, 739 insertions, 0 deletions
diff --git a/src/xmlfixups.cpp b/src/xmlfixups.cpp
new file mode 100644
index 0000000..7201703
--- /dev/null
+++ b/src/xmlfixups.cpp
@@ -0,0 +1,739 @@
+/*
+ * Copyright (c) 2004, Nate Nielsen
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the
+ * following disclaimer.
+ * * Redistributions in binary form must reproduce the
+ * above copyright notice, this list of conditions and
+ * the following disclaimer in the documentation and/or
+ * other materials provided with the distribution.
+ * * The names of contributors to this software may not be
+ * used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ *
+ * CONTRIBUTORS
+ * Nate Nielsen <nielsen@memberwebs.com>
+ *
+ */
+
+#include "usuals.h"
+#include "rtffixups.h"
+#include "domhelpers.h"
+#include "tags.h"
+
+void RtfFixups::breakBreak(DOM::Document& doc, const string& contain,
+ const string& tag)
+{
+ DOM::NodeList els = doc.getElementsByTagName(tag);
+ if(els != NULL)
+ {
+ for(int i = 0; i < els->getLength(); i++)
+ {
+ DOM::Element el = (const DOM::Element&)els->item(i);
+#if 0
+ // See if parent node only has this break tag
+ // in it. If so then replace parent with this
+
+ DOM::Node parent = el.getParentNode();
+
+ if(parent != NULL)
+ {
+ DOM::Node grandparent = parent.getParentNode();
+
+ if(grandparent != NULL &&
+ el.getPreviousSibling() == NULL &&
+ el.getNextSibling() == NULL)
+ {
+ grandparent.replaceChild(parent.removeChild(el), parent);
+ }
+ }
+#endif
+
+ breakElement(el, contain);
+ }
+ }
+}
+
+/**
+ * Breaks a paragraph up through a previous level. Calls itself
+ * recursively to break paragraphs totally free up to containing
+ * destination.
+ *
+ * For example:
+ *
+ * <dest>
+ * This is <b> a <block fix="para"/>
+ * test of </b> your concentration.
+ * </dest>
+ *
+ * Becomes:
+ *
+ * <dest>
+ * This is <b> a </b><block fix="para"/>
+ * <b>test of </b> your concentration.
+ * </dest>
+ */
+bool RtfFixups::breakElement(const DOM::Element& el, const string& contain)
+{
+ ASSERT(el != NULL);
+
+ DOM::Element parent = (const DOM::Element&)el.getParentNode();
+ DOM::Element grandparent;
+
+ string s = el.getNodeName();
+ s = parent.getNodeName();
+
+ // Get the parent node
+ if(parent != NULL)
+ grandparent = (const DOM::Element&)parent.getParentNode();
+
+ // Make sure we have something to work with before continuing
+ if(grandparent == NULL || parent == NULL ||
+ DOMHelpers::isElement(parent, contain))
+ return true;
+
+ DOM::Node e;
+
+ // Check to see if this is the first node in the parent.
+ // If so then just move out to before
+ if(el.getPreviousSibling() == NULL)
+ {
+ e = grandparent.insertBefore(parent.removeChild(el), parent);
+ }
+
+
+ // Check to see if this is the last node in the parent.
+ // If so then just move out to after the parent
+ else if(el.getNextSibling() == NULL)
+ {
+ DOM::Node next = parent.getNextSibling();
+ if(next == NULL)
+ e = grandparent.appendChild(parent.removeChild(el));
+ else
+ e = grandparent.insertBefore(parent.removeChild(el), next);
+ }
+
+
+ // Otherwise it's in the middle so split the parent
+ // element etc...
+ else
+ {
+ // Clone it but not deep
+ DOM::Element parent2 = (const DOM::Element&)parent.cloneNode(false);
+
+ if(parent2 == NULL)
+ return false;
+
+ // Flag that tells us whether we moved anything up to parent
+ bool moved = false;
+
+ // Now move all nodes after this one to the second parent.
+ while((e = el.getNextSibling()) != NULL)
+ {
+ parent2.appendChild(parent.removeChild(e));
+ moved = true;
+ }
+
+ // Remove the element from it's parent
+ e = parent.removeChild(el);
+
+ // Okay now we move the paragraph up to the parent
+ DOMHelpers::insertAfter(grandparent, e, parent);
+ if(moved)
+ DOMHelpers::insertAfter(grandparent, parent2, e);
+ }
+
+ // Now call it again with the paragraph in the new position
+ // untill everything's cut through!
+ return breakElement((DOM::Element&)e, contain);
+}
+
+/**
+ * Changes from a marker based paragraph system to a contained
+ * paragraph system. Also applies paragraph attributes to the
+ * appropriate paragraph.
+ *
+ * For example:
+ *
+ * <dest>
+ * This <blockattr style="10"> is <b> a <block fix="para"/>
+ * test of </b> your concentration.
+ * </dest>
+ *
+ * Becomes:
+ *
+ * <para style="10"> This is <b> a </b></para>
+ * <para><b>test of </b> your concentration.</para>
+ */
+void RtfFixups::breakBlocks(DOM::Document& document)
+{
+ // First break out all the paragraphs to the destination level
+ DOM::NodeList blocks = document.getElementsByTagName(kElBlock);
+ if(blocks != NULL)
+ {
+ for(int i = 0; i < blocks->getLength(); i++)
+ {
+ DOM::Element block = (const DOM::Element&)blocks->item(i);
+
+ // If it's the single closed style para then break it
+ if(block != NULL && !block.hasChildNodes())
+ breakElement(block, kElDest);
+ }
+ }
+
+
+ // Now group stuff in destinations into paras or other blocks
+ DOM::NodeList destinations = document.getElementsByTagName(kElDest);
+ if(destinations != NULL)
+ {
+ for(int i = 0; i < destinations->getLength(); i++)
+ {
+ DOM::Element dest = (const DOM::Element&)destinations->item(i);
+
+ // Sanity Check
+ if(dest == NULL || !dest.hasChildNodes())
+ continue;
+
+ // Go through the children of this destination
+ DOM::Node child = dest.getFirstChild();
+
+ DOM::Element block;
+
+ while(child != NULL)
+ {
+ // If it's a block
+ if(DOMHelpers::isElement(child, kElBlock))
+ {
+ block = (DOM::Element&)child;
+ child = child.getNextSibling();
+ continue;
+ }
+
+ // If it's already a real block element
+ for(const char** t = kBlockTags; *t != NULL; t++)
+ {
+ if(DOMHelpers::isElement(child, *t))
+ {
+ block = NULL;
+ break;
+ }
+ }
+
+ // If there's a block then add to it
+ if(block != NULL)
+ {
+ block.appendChild(dest.removeChild(child));
+ child = block;
+ }
+
+ child = child.getNextSibling();
+ }
+ }
+ }
+}
+
+void RtfFixups::wrapTags(DOM::Document& doc, const string& tagName,
+ const string& wrapName)
+{
+ DOM::NodeList tags = doc.getElementsByTagName(tagName);
+ if(tags != NULL)
+ {
+ for(int i = 0; i < tags->getLength(); i++)
+ {
+ DOM::Element tag = (const DOM::Element&)tags->item(i);
+
+ DOM::Element wrap = doc.createElement(wrapName);
+ while(tag.hasChildNodes())
+ wrap.appendChild(tag.removeChild(tag.getFirstChild()));
+
+ tag.appendChild(wrap);
+ }
+ }
+}
+
+void RtfFixups::breakTags(DOM::Document& doc, const string& parentName,
+ const string& tagName)
+{
+ DOM::NodeList parents = doc.getElementsByTagName(parentName);
+ if(parents != NULL)
+ {
+ for(int i = 0; i < parents->getLength(); i++)
+ {
+ DOM::Element parent = (const DOM::Element&)parents->item(i);
+
+ if(!parent.hasChildNodes())
+ continue;
+
+ DOM::NodeList tags = parent.getElementsByTagName(tagName);
+ if(tags != NULL)
+ {
+ for(int i = 0; i < tags->getLength(); i++)
+ breakElement((const DOM::Element&)tags->item(i), parentName);
+ }
+
+ DOM::Node tag = doc.createElement(tagName);
+ parent.insertBefore(tag, parent.getFirstChild());
+
+ DOM::Node child = tag;
+
+ while(child != NULL && (child = child.getNextSibling()) != NULL)
+ {
+ if(DOMHelpers::isElement(child, kElBlock))
+ {
+ DOM::Node next = child.getNextSibling();
+ if(next == NULL)
+ {
+ parent.removeChild(child);
+ continue;
+ }
+
+ if(DOMHelpers::isElement(next, tagName))
+ {
+ DOM::Node twodown = next.getNextSibling();
+ if(!DOMHelpers::isElement(twodown, kElBlock))
+ {
+ child = parent.insertBefore(parent.removeChild(next), child);
+ }
+ else
+ {
+ parent.removeChild(child);
+ child = next;
+ }
+ }
+ }
+
+ if(DOMHelpers::isElement(child, tagName))
+ {
+ if(!tag.hasChildNodes())
+ parent.removeChild(tag);
+ tag = child;
+ }
+ else
+ {
+ tag.appendChild(parent.removeChild(child));
+ child = tag;
+ }
+ }
+
+ if(!tag.hasChildNodes())
+ parent.removeChild(tag);
+ }
+ }
+
+ DOM::NodeList tags = doc.getElementsByTagName(tagName);
+ if(tags != NULL)
+ {
+ for(int i = 0; i < tags->getLength(); i++)
+ {
+ DOM::Element tag = (const DOM::Element&)tags->item(i);
+ DOM::Node parent = tag.getParentNode();
+
+ if(parent != NULL && !DOMHelpers::isElement(parent, parentName))
+ parent.removeChild(tag);
+
+#if 0
+ else if(tag.hasChildNodes())
+ {
+ DOM::NodeList children = tag.getChildNodes();
+ if(children != NULL && children->getLength() == 1)
+ {
+ DOM::Node child = children->item(0);
+ if(child != NULL && !child.hasChildNodes() &&
+ DOMHelpers::isElement(child, kElBlock))
+ parent.removeChild(tag);
+ }
+ }
+#endif
+
+ }
+ }
+}
+
+void RtfFixups::breakLists(DOM::Document& doc)
+{
+ // Now group stuff in destinations into tables
+ DOM::NodeList destinations = doc.getElementsByTagName(kElDest);
+ if(destinations != NULL)
+ {
+ for(int i = 0; i < destinations->getLength(); i++)
+ {
+ DOM::Element dest = (const DOM::Element&)destinations->item(i);
+
+ // Sanity Check
+ if(dest == NULL)
+ continue;
+
+ // Go through the children of this destination
+ DOM::Node child = dest.getFirstChild();
+
+ DOM::Element list;
+ DOM::Element e;
+
+ wstring previd;
+
+ while(child != NULL)
+ {
+ // If it's a block and has a cell attribute
+ if(DOMHelpers::isElement(child, kElBlock))
+ {
+ e = (DOM::Element&)child;
+
+ // if it has a cell attribute
+ wstring listid = e.getAttribute(kAtList);
+ if(listid.length() > 0)
+ {
+ e.removeAttribute(kAtList);
+
+ if(list == NULL || previd != listid)
+ {
+ list = doc.createElement(kElList);
+ list.setAttribute(kAtList, listid);
+ dest.insertBefore(list, child);
+ previd = listid;
+ }
+ }
+ else
+ {
+ list = NULL;
+ previd.erase();
+ }
+ }
+
+ // It's not a block
+ if(list != NULL)
+ {
+ list.appendChild(dest.removeChild(child));
+ child = list;
+ }
+
+ child = child.getNextSibling();
+ }
+ }
+ }
+}
+
+void RtfFixups::fixStyles(const DOM::Document doc)
+{
+ DOM::NodeList styles = doc.getElementsByTagName(kElStyle);
+ if(styles != NULL)
+ {
+ DOM::NodeList blocks = doc.getElementsByTagName(kElBlock);
+ if(blocks != NULL)
+ {
+ for(int i = 0; i < blocks->getLength(); i++)
+ {
+ DOM::Element block = (const DOM::Element&)blocks->item(i);
+
+ if(block == NULL || !block.hasAttribute(kElStyle))
+ continue;
+
+ for(int j = 0; j < styles->getLength(); j++)
+ {
+ DOM::Element style = (const DOM::Element&)styles->item(j);
+ if(style != NULL)
+ {
+ if(style.getAttribute(kAtId) == block.getAttribute(kElStyle))
+ {
+ wstring name = style.getAttribute(kAtName);
+ if(name.length() > 0)
+ block.setAttribute(kElStyle, name);
+ }
+ }
+ }
+ }
+ }
+
+ for(int i = 0; i < styles->getLength(); i++)
+ {
+ DOM::Element style = (const DOM::Element&)styles->item(i);
+ if(style != NULL)
+ style.removeAttribute(kAtId);
+ }
+ }
+
+
+}
+
+
+void RtfFixups::breakTables(DOM::Document& doc)
+{
+ DOM::NodeList rows = doc.getElementsByTagName(kElRow);
+ if(rows != NULL)
+ {
+ for(int i = 0; i < rows->getLength(); i++)
+ {
+ DOM::Element row = (const DOM::Element&)rows->item(i);
+ DOM::Node parent = row.getParentNode();
+
+ if(parent == NULL)
+ continue;
+
+ if(DOMHelpers::isElement(parent, kElBlock))
+ {
+ DOM::Node grandparent = parent.getParentNode();
+
+ if(grandparent != NULL && !row.hasChildNodes())
+ {
+ if(row.getPreviousSibling() == NULL)
+ grandparent.insertBefore(parent.removeChild(row), parent);
+ else if(row.getNextSibling() == NULL)
+ DOMHelpers::insertAfter(grandparent, parent.removeChild(row), parent);
+ }
+ }
+
+ breakElement(row, kElDest);
+ }
+ }
+
+
+
+ // Now group stuff in destinations into tables
+ DOM::NodeList destinations = doc.getElementsByTagName(kElDest);
+ if(destinations != NULL)
+ {
+ for(int i = 0; i < destinations->getLength(); i++)
+ {
+ DOM::Element dest = (const DOM::Element&)destinations->item(i);
+
+ // Sanity Check
+ if(dest == NULL)
+ continue;
+
+ // Go through the children of this destination
+ DOM::Node child = dest.getFirstChild();
+
+ DOM::Element table;
+ DOM::Element e;
+
+ while(child != NULL)
+ {
+ // If it's a block and has a cell attribute
+ if(DOMHelpers::isElement(child, kElBlock))
+ {
+ e = (DOM::Element&)child;
+
+ // if it has a cell attribute
+ if(e.getAttribute(kAtCell).length() > 0)
+ {
+ e.removeAttribute(kAtCell);
+
+ if(table == NULL)
+ {
+ table = doc.createElement(kElTable);
+ dest.insertBefore(table, child);
+ }
+ }
+ else
+ {
+ table = NULL;
+ }
+ }
+
+ // It's not a block
+ if(table != NULL)
+ {
+ table.appendChild(dest.removeChild(child));
+ child = table;
+ }
+
+ child = child.getNextSibling();
+ }
+ }
+ }
+}
+
+void RtfFixups::removeTags(const DOM::Document& doc)
+{
+ // Go through the list of nodes
+ for(const char** t = kRemoveTags; *t != NULL; t++)
+ {
+ DOM::NodeList elements = doc.getElementsByTagName(*t);
+ if(elements != NULL)
+ {
+ for(int j = 0; j < elements->getLength(); j++)
+ {
+ DOM::Element el = (const DOM::Element&)elements->item(j);
+ DOM::Node parent = el->getParentNode();
+
+ if(parent == NULL)
+ continue;
+
+ while(el.hasChildNodes())
+ parent.insertBefore(el.removeChild(el.getFirstChild()), el);
+
+ parent.removeChild(el);
+ }
+ }
+ }
+}
+
+void RtfFixups::fixLists(const DOM::Document doc)
+{
+ DOM::NodeList lists = doc.getElementsByTagName(kElList);
+ if(lists != NULL)
+ {
+ DOM::NodeList listdefs = doc.getElementsByTagName(kElListdef);
+ if(listdefs != NULL)
+ {
+ for(int i = 0; i < listdefs->getLength(); i++)
+ {
+ DOM::Element listdef = (const DOM::Element&)listdefs->item(i);
+
+ if(listdef == NULL || !listdef.hasAttribute(kAtList))
+ continue;
+
+ for(int j = 0; j < lists->getLength(); j++)
+ {
+ DOM::Element list = (const DOM::Element&)lists->item(j);
+ if(list != NULL)
+ {
+ if(list.getAttribute(kAtList) == listdef.getAttribute(kAtList))
+ {
+ DOMHelpers::copyAttributes(listdef, list, kHideList);
+ list.removeAttribute(kAtList);
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+void RtfFixups::fixBlocks(const DOM::Document doc)
+{
+ // First break out all the paragraphs to the destination level
+ DOM::NodeList blocks = doc.getElementsByTagName(kElBlock);
+ if(blocks != NULL)
+ {
+ string fix;
+ wstring val;
+
+ for(int i = 0; i < blocks->getLength(); i++)
+ {
+ DOM::Element block = (const DOM::Element&)blocks->item(i);
+ DOM::Node parent = block.getParentNode();
+
+ if(parent == NULL)
+ continue;
+
+ fix.resize(0);
+ val.resize(0);
+
+ val = block.getAttribute(kAtFix);
+ if(val.length() > 0)
+ block.removeAttribute(kAtFix);
+
+
+ if(val.length() > 0)
+ {
+ val = block.getAttributeNS("", kAtFix);
+ if(val.length() > 0)
+ block.removeAttributeNS("", kAtFix);
+ }
+
+ if(val.length() > 0)
+ DOM::transcode16to8(val, fix);
+
+ if(fix.length() == 0)
+ fix = kElPara;
+
+ DOM::Element el = doc.createElement(fix);
+ DOMHelpers::copyAttributes(block, el, NULL);
+
+ while(block.hasChildNodes())
+ el.appendChild(block.removeChild(block.getFirstChild()));
+
+ parent.replaceChild(el, block);
+ }
+ }
+}
+
+
+/**
+ * Removes adjacent duplicate nodes of certain names
+ */
+void RtfFixups::removeDuplicates(const DOM::Document& doc)
+{
+ // Go through the list of nodes
+ for(const char** t = kNoDuplicates; *t = NULL; t++)
+ {
+ DOM::NodeList elements = doc.getElementsByTagName(*t);
+ if(elements != NULL)
+ {
+ int x = elements->getLength();
+ for(int j = 0; j < elements->getLength(); j++)
+ {
+
+ // Make sure it's a valid element
+ DOM::Element element = (const DOM::Element&)elements->item(j);
+ if(element == NULL)
+ continue;
+
+ // Get neighbors
+ DOM::Node previous = element.getPreviousSibling();
+ DOM::Node next = element.getNextSibling();
+
+ // Make sure it's still in the document, as we may have
+ // removed it on a previous loop
+ DOM::Node parent = element.getParentNode();
+ if(parent == NULL)
+ continue;
+
+ // Combine previous if valid
+ if(previous != NULL && previous.getNodeType() == DOM::Node::ELEMENT_NODE &&
+ DOMHelpers::isEqualElement((DOM::Element&)previous, element))
+ {
+ while(previous.hasChildNodes())
+ {
+ DOM::Node child = previous.removeChild(previous.getLastChild());
+ if(child != NULL)
+ {
+ if(element.hasChildNodes())
+ element.insertBefore(child, element.getFirstChild());
+ else
+ element.appendChild(child);
+ }
+ }
+
+ // Remove duplicate node
+ parent.removeChild(previous);
+ }
+
+ // Combine next if valid
+ if(next != NULL && next.getNodeType() == DOM::Node::ELEMENT_NODE &&
+ DOMHelpers::isEqualElement((DOM::Element&)next, element))
+ {
+ while(next.hasChildNodes())
+ {
+ DOM::Node child = next.removeChild(next.getFirstChild());
+ if(child != NULL)
+ element.appendChild(child);
+ }
+
+ // Remove duplicate node
+ parent.removeChild(next);
+ }
+ }
+ }
+ }
+}