diff options
-rw-r--r-- | .cvsignore | 4 | ||||
-rw-r--r-- | ChangeLog | 1 | ||||
-rw-r--r-- | config.h.in | 132 | ||||
-rw-r--r-- | src/Makefile.am | 3 | ||||
-rw-r--r-- | src/xmlcomposer.cpp | 853 | ||||
-rw-r--r-- | src/xmlcomposer.h | 28 |
6 files changed, 8 insertions, 1013 deletions
@@ -5,6 +5,7 @@ Makefile Makefile.in aclocal.m4 config.h +config.h.in config.log config.status configure @@ -16,3 +17,6 @@ stamp-* *~ *.zip *.tar.gz +.project +.cdtproject + @@ -1,5 +1,6 @@ Version 0.9.2 - Updated windows version to new sablot library + - Added support for foot notes Version 0.9.1 - Changed name to 'rtfx' due to a naming conflict diff --git a/config.h.in b/config.h.in deleted file mode 100644 index 36b5094..0000000 --- a/config.h.in +++ /dev/null @@ -1,132 +0,0 @@ -/* config.h.in. Generated from configure.in by autoheader. */ - -/* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP - systems. This function is required for `alloca.c' support on those systems. - */ -#undef CRAY_STACKSEG_END - -/* Define to 1 if using `alloca.c'. */ -#undef C_ALLOCA - -/* Define to 1 if you have `alloca', as a function or macro. */ -#undef HAVE_ALLOCA - -/* Define to 1 if you have <alloca.h> and it should be used (not on Ultrix). - */ -#undef HAVE_ALLOCA_H - -/* Define to 1 if you have the <assert.h> header file. */ -#undef HAVE_ASSERT_H - -/* Define to 1 if you have the <errno.h> header file. */ -#undef HAVE_ERRNO_H - -/* Define to 1 if you have the <exception> header file. */ -#undef HAVE_EXCEPTION - -/* Define to 1 if you have the <inttypes.h> header file. */ -#undef HAVE_INTTYPES_H - -/* Define to 1 if you have the `expat' library (-lexpat). */ -#undef HAVE_LIBEXPAT - -/* Define to 1 if you have the `sablot' library (-lsablot). */ -#undef HAVE_LIBSABLOT - -/* Define to 1 if you have the <memory.h> header file. */ -#undef HAVE_MEMORY_H - -/* Define to 1 if you have the `memset' function. */ -#undef HAVE_MEMSET - -/* Define to 1 if you have the <sablot.h> header file. */ -#undef HAVE_SABLOT_H - -/* Define to 1 if you have the <sdom.h> header file. */ -#undef HAVE_SDOM_H - -/* Define to 1 if you have the `sprintf' function. */ -#undef HAVE_SPRINTF - -/* Define to 1 if you have the <stack> header file. */ -#undef HAVE_STACK - -/* Define to 1 if you have the <stddef.h> header file. */ -#undef HAVE_STDDEF_H - -/* Define to 1 if you have the <stdint.h> header file. */ -#undef HAVE_STDINT_H - -/* Define to 1 if you have the <stdio.h> header file. */ -#undef HAVE_STDIO_H - -/* Define to 1 if you have the <stdlib.h> header file. */ -#undef HAVE_STDLIB_H - -/* Define to 1 if you have the `strchr' function. */ -#undef HAVE_STRCHR - -/* Define to 1 if you have the `strerror' function. */ -#undef HAVE_STRERROR - -/* Define to 1 if you have the <string> header file. */ -#undef HAVE_STRING - -/* Define to 1 if you have the <strings.h> header file. */ -#undef HAVE_STRINGS_H - -/* Define to 1 if you have the <string.h> header file. */ -#undef HAVE_STRING_H - -/* Define to 1 if you have the <sys/stat.h> header file. */ -#undef HAVE_SYS_STAT_H - -/* Define to 1 if you have the <sys/types.h> header file. */ -#undef HAVE_SYS_TYPES_H - -/* Define to 1 if you have the <unistd.h> header file. */ -#undef HAVE_UNISTD_H - -/* Define to 1 if you have the <wchar.h> header file. */ -#undef HAVE_WCHAR_H - -/* Define to 1 if the system has the type `wstring'. */ -#undef HAVE_WSTRING - -/* Name of package */ -#undef PACKAGE - -/* Define to the address where bug reports for this package should be sent. */ -#undef PACKAGE_BUGREPORT - -/* Define to the full name of this package. */ -#undef PACKAGE_NAME - -/* Define to the full name and version of this package. */ -#undef PACKAGE_STRING - -/* Define to the one symbol short name of this package. */ -#undef PACKAGE_TARNAME - -/* Define to the version of this package. */ -#undef PACKAGE_VERSION - -/* If using the C implementation of alloca, define if you know the - direction of stack growth for your system; otherwise it will be - automatically deduced at run-time. - STACK_DIRECTION > 0 => grows toward higher addresses - STACK_DIRECTION < 0 => grows toward lower addresses - STACK_DIRECTION = 0 => direction of growth unknown */ -#undef STACK_DIRECTION - -/* Define to 1 if you have the ANSI C header files. */ -#undef STDC_HEADERS - -/* Version number of package */ -#undef VERSION - -/* Define to empty if `const' does not conform to ANSI C. */ -#undef const - -/* Define to `unsigned' if <sys/types.h> does not define. */ -#undef size_t diff --git a/src/Makefile.am b/src/Makefile.am index e28b98e..f4253a3 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -3,7 +3,8 @@ bin_PROGRAMS = rtfx rtfx_SOURCES = rtfx.cpp basehandler.cpp basehandler.h levelhandler.cpp levelhandler.h \ reference.h rtfanalyser.cpp rtfanalyser.h rtfparsehelpers.cpp rtfformatting.h \ - rtfparsehelpers.h rtfreader.cpp rtfreader.h sablo.h sablotr.cpp usuals.h + rtfparsehelpers.h rtfreader.cpp rtfreader.h sablo.h sablotr.cpp usuals.h \ + rtffixups.h rtffixups.cpp rtfx_LDADD = -lsablot -lexpat $(LIB_ICONV) rtfx_CFLAGS = -O0 -I${top_srcdir} -I/usr/local/include rtfx_LDFLAGS = -L/usr/local/lib diff --git a/src/xmlcomposer.cpp b/src/xmlcomposer.cpp index 80d45af..967fd3c 100644 --- a/src/xmlcomposer.cpp +++ b/src/xmlcomposer.cpp @@ -38,6 +38,7 @@ #include "usuals.h" #include "rtfanalyser.h" +#include "rtffixups.h" const char* kElDest = "i_dest"; const char* kElBlock = "i_block"; @@ -998,855 +999,3 @@ ON_CHARDATA(Attribute) - - -/** - * A quick check to see if a node is an element of a certain - * name - */ -bool RtfParser::isElement(const DOM::Node& node, const string& name) -{ - return node != NULL && node.getNodeType() == DOM::Node::ELEMENT_NODE && - node.getNodeName() == name; -} - -bool RtfParser::isEqualElement(const DOM::Element& el1, const DOM::Element& el2) -{ - if(el1.getNodeName() == el2.getNodeName()) - return false; - - DOM::NamedNodeMap at1 = el1.getAttributes(); - DOM::NamedNodeMap at2 = el2.getAttributes(); - - if(at1 == NULL && at2 == NULL) - return true; - - if(at1 == NULL || at2 == NULL || - at1->getLength() != at2->getLength()) - return false; - - for(int i = 0; i < at1->getLength(); i++) - { - DOM::Attr attr1 = (const DOM::Attr&)at1->item(0); - if(attr1 != NULL) - return false; - - DOM::Attr attr2 = (const DOM::Attr&)at2->getNamedItem(attr1.getNodeName()); - if(attr2 != NULL) - return false; - - if(attr1.getNodeValue() == attr2.getNodeValue()) - return false; - } - - return true; -} - -wstring RtfParser::formatInt(int num) -{ - char buff[16]; - - // Certain OSs don't support swprintf :( - sprintf(buff, "%d", num); - - wstring n; - for(char* s = buff; *s; s++) - n.append(1, *s); - - return n; -} - -/** - * Gets the pertinent ancestor of this node, or returns null - * if not found. - */ -DOM::Element RtfParser::getContainingElement(const DOM::Node& node, const string& name) -{ - DOM::Node n = node; - - while(true) - { - n = n.getParentNode(); - if(n == NULL) - break; - - if(isElement(n, name)) - return (DOM::Element&)n; - } - - return DOM::Element(); -} - -DOM::Element RtfParser::getPriorElement(const DOM::Node& node, const string& name) -{ - DOM::Node n = node; - - while(n != NULL) - { - if(isElement(n, name)) - return (DOM::Element&)n; - - n = n.getPreviousSibling(); - } - - DOM::Node parent = node.getParentNode(); - - if(parent == NULL) - return DOM::Element(); - else - return getPriorElement(parent, name); -} - -bool isNsAttr(const string& name) -{ - return strncmp(name.c_str(), kNSPrefix, strlen(kNSPrefix)) ? false : true; -} - -void RtfParser::copyAttributes(const DOM::Element& src, DOM::Element& dest, - const char** hideList) -{ - // Now get both sets of attributes - DOM::NamedNodeMap srcMap = src.getAttributes(); - DOM::NamedNodeMap destMap = dest.getAttributes(); - - if(srcMap == NULL || destMap == NULL) - return; - - // And copy them from one to the other - for(int j = 0; j < srcMap->getLength(); j++) - { - DOM::Node attr = srcMap->item(j); - if(attr != NULL) - { - // BUG: Sablotron seems to have a bug in it's - // setAttributeNode implementation. It always - // adds a blank namespace - // attr = attr.cloneNode(false); - // if(attr != NULL) - // destMap.setNamedItem(attr); - - string name = attr.getNodeName(); - - if(hideList) - { - - for(const char** t = hideList; *t != NULL; t++) - { - if(name == *t) - name.erase(); - } - } - - if(name.length() > 0 && !isNsAttr(name)) - dest.setAttribute(attr.getNodeName(), attr.getNodeValue()); - } - } -} - - -void RtfParser::breakBreak(DOM::Document& doc, const string& contain, - const string& tag) -{ - DOM::NodeList els = doc.getElementsByTagName(tag); - if(els != NULL) - { - for(int i = 0; i < els->getLength(); i++) - { - DOM::Element el = (const DOM::Element&)els->item(i); -#if 0 - // See if parent node only has this break tag - // in it. If so then replace parent with this - - DOM::Node parent = el.getParentNode(); - - if(parent != NULL) - { - DOM::Node grandparent = parent.getParentNode(); - - if(grandparent != NULL && - el.getPreviousSibling() == NULL && - el.getNextSibling() == NULL) - { - grandparent.replaceChild(parent.removeChild(el), parent); - } - } -#endif - - breakElement(el, contain); - } - } -} - -/** - * Breaks a paragraph up through a previous level. Calls itself - * recursively to break paragraphs totally free up to containing - * destination. - * - * For example: - * - * <dest> - * This is <b> a <block fix="para"/> - * test of </b> your concentration. - * </dest> - * - * Becomes: - * - * <dest> - * This is <b> a </b><block fix="para"/> - * <b>test of </b> your concentration. - * </dest> - */ -bool RtfParser::breakElement(const DOM::Element& el, const string& contain) -{ - ASSERT(el != NULL); - - DOM::Element parent = (const DOM::Element&)el.getParentNode(); - DOM::Element grandparent; - - string s = el.getNodeName(); - s = parent.getNodeName(); - - // Get the parent node - if(parent != NULL) - grandparent = (const DOM::Element&)parent.getParentNode(); - - // Make sure we have something to work with before continuing - if(grandparent == NULL || parent == NULL || - isElement(parent, contain)) - return true; - - DOM::Node e; - - // Check to see if this is the first node in the parent. - // If so then just move out to before - if(el.getPreviousSibling() == NULL) - { - e = grandparent.insertBefore(parent.removeChild(el), parent); - } - - - // Check to see if this is the last node in the parent. - // If so then just move out to after the parent - else if(el.getNextSibling() == NULL) - { - DOM::Node next = parent.getNextSibling(); - if(next == NULL) - e = grandparent.appendChild(parent.removeChild(el)); - else - e = grandparent.insertBefore(parent.removeChild(el), next); - } - - - // Otherwise it's in the middle so split the parent - // element etc... - else - { - // Clone it but not deep - DOM::Element parent2 = (const DOM::Element&)parent.cloneNode(false); - - if(parent2 == NULL) - return false; - - // Flag that tells us whether we moved anything up to parent - bool moved = false; - - // Now move all nodes after this one to the second parent. - while((e = el.getNextSibling()) != NULL) - { - parent2.appendChild(parent.removeChild(e)); - moved = true; - } - - // Remove the element from it's parent - e = parent.removeChild(el); - - // Okay now we move the paragraph up to the parent - insertAfter(grandparent, e, parent); - if(moved) - insertAfter(grandparent, parent2, e); - } - - // Now call it again with the paragraph in the new position - // untill everything's cut through! - return breakElement((DOM::Element&)e, contain); -} - -/** - * Changes from a marker based paragraph system to a contained - * paragraph system. Also applies paragraph attributes to the - * appropriate paragraph. - * - * For example: - * - * <dest> - * This <blockattr style="10"> is <b> a <block fix="para"/> - * test of </b> your concentration. - * </dest> - * - * Becomes: - * - * <para style="10"> This is <b> a </b></para> - * <para><b>test of </b> your concentration.</para> - */ -void RtfParser::breakBlocks(DOM::Document& document) -{ - // First break out all the paragraphs to the destination level - DOM::NodeList blocks = document.getElementsByTagName(kElBlock); - if(blocks != NULL) - { - for(int i = 0; i < blocks->getLength(); i++) - { - DOM::Element block = (const DOM::Element&)blocks->item(i); - - // If it's the single closed style para then break it - if(block != NULL && !block.hasChildNodes()) - breakElement(block, kElDest); - } - } - - - // Now group stuff in destinations into paras or other blocks - DOM::NodeList destinations = document.getElementsByTagName(kElDest); - if(destinations != NULL) - { - for(int i = 0; i < destinations->getLength(); i++) - { - DOM::Element dest = (const DOM::Element&)destinations->item(i); - - // Sanity Check - if(dest == NULL || !dest.hasChildNodes()) - continue; - - // Go through the children of this destination - DOM::Node child = dest.getFirstChild(); - - DOM::Element block; - - while(child != NULL) - { - // If it's a block - if(isElement(child, kElBlock)) - { - block = (DOM::Element&)child; - child = child.getNextSibling(); - continue; - } - - // If it's already a real block element - for(const char** t = kBlockTags; *t != NULL; t++) - { - if(isElement(child, *t)) - { - block = NULL; - break; - } - } - - // If there's a block then add to it - if(block != NULL) - { - block.appendChild(dest.removeChild(child)); - child = block; - } - - child = child.getNextSibling(); - } - } - } -} - -void RtfParser::wrapTags(DOM::Document& doc, const string& tagName, - const string& wrapName) -{ - DOM::NodeList tags = doc.getElementsByTagName(tagName); - if(tags != NULL) - { - for(int i = 0; i < tags->getLength(); i++) - { - DOM::Element tag = (const DOM::Element&)tags->item(i); - - DOM::Element wrap = doc.createElement(wrapName); - while(tag.hasChildNodes()) - wrap.appendChild(tag.removeChild(tag.getFirstChild())); - - tag.appendChild(wrap); - } - } -} - -void RtfParser::breakTags(DOM::Document& doc, const string& parentName, - const string& tagName) -{ - DOM::NodeList parents = doc.getElementsByTagName(parentName); - if(parents != NULL) - { - for(int i = 0; i < parents->getLength(); i++) - { - DOM::Element parent = (const DOM::Element&)parents->item(i); - - if(!parent.hasChildNodes()) - continue; - - DOM::NodeList tags = parent.getElementsByTagName(tagName); - if(tags != NULL) - { - for(int i = 0; i < tags->getLength(); i++) - breakElement((const DOM::Element&)tags->item(i), parentName); - } - - DOM::Node tag = doc.createElement(tagName); - parent.insertBefore(tag, parent.getFirstChild()); - - DOM::Node child = tag; - - while(child != NULL && (child = child.getNextSibling()) != NULL) - { - if(isElement(child, kElBlock)) - { - DOM::Node next = child.getNextSibling(); - if(next == NULL) - { - parent.removeChild(child); - continue; - } - - if(isElement(next, tagName)) - { - DOM::Node twodown = next.getNextSibling(); - if(!isElement(twodown, kElBlock)) - { - child = parent.insertBefore(parent.removeChild(next), child); - } - else - { - parent.removeChild(child); - child = next; - } - } - } - - if(isElement(child, tagName)) - { - if(!tag.hasChildNodes()) - parent.removeChild(tag); - tag = child; - } - else - { - tag.appendChild(parent.removeChild(child)); - child = tag; - } - } - - if(!tag.hasChildNodes()) - parent.removeChild(tag); - } - } - - DOM::NodeList tags = doc.getElementsByTagName(tagName); - if(tags != NULL) - { - for(int i = 0; i < tags->getLength(); i++) - { - DOM::Element tag = (const DOM::Element&)tags->item(i); - DOM::Node parent = tag.getParentNode(); - - if(parent != NULL && !isElement(parent, parentName)) - parent.removeChild(tag); - -#if 0 - else if(tag.hasChildNodes()) - { - DOM::NodeList children = tag.getChildNodes(); - if(children != NULL && children->getLength() == 1) - { - DOM::Node child = children->item(0); - if(child != NULL && !child.hasChildNodes() && - isElement(child, kElBlock)) - parent.removeChild(tag); - } - } -#endif - - } - } -} - -void RtfParser::breakLists(DOM::Document& doc) -{ - // Now group stuff in destinations into tables - DOM::NodeList destinations = doc.getElementsByTagName(kElDest); - if(destinations != NULL) - { - for(int i = 0; i < destinations->getLength(); i++) - { - DOM::Element dest = (const DOM::Element&)destinations->item(i); - - // Sanity Check - if(dest == NULL) - continue; - - // Go through the children of this destination - DOM::Node child = dest.getFirstChild(); - - DOM::Element list; - DOM::Element e; - - wstring previd; - - while(child != NULL) - { - // If it's a block and has a cell attribute - if(isElement(child, kElBlock)) - { - e = (DOM::Element&)child; - - // if it has a cell attribute - wstring listid = e.getAttribute(kAtList); - if(listid.length() > 0) - { - e.removeAttribute(kAtList); - - if(list == NULL || previd != listid) - { - list = doc.createElement(kElList); - list.setAttribute(kAtList, listid); - dest.insertBefore(list, child); - previd = listid; - } - } - else - { - list = NULL; - previd.erase(); - } - } - - // It's not a block - if(list != NULL) - { - list.appendChild(dest.removeChild(child)); - child = list; - } - - child = child.getNextSibling(); - } - } - } -} - -void RtfParser::fixStyles(const DOM::Document doc) -{ - DOM::NodeList styles = doc.getElementsByTagName(kElStyle); - if(styles != NULL) - { - DOM::NodeList blocks = doc.getElementsByTagName(kElBlock); - if(blocks != NULL) - { - for(int i = 0; i < blocks->getLength(); i++) - { - DOM::Element block = (const DOM::Element&)blocks->item(i); - - if(block == NULL || !block.hasAttribute(kElStyle)) - continue; - - for(int j = 0; j < styles->getLength(); j++) - { - DOM::Element style = (const DOM::Element&)styles->item(j); - if(style != NULL) - { - if(style.getAttribute(kAtId) == block.getAttribute(kElStyle)) - { - wstring name = style.getAttribute(kAtName); - if(name.length() > 0) - block.setAttribute(kElStyle, name); - } - } - } - } - } - - for(int i = 0; i < styles->getLength(); i++) - { - DOM::Element style = (const DOM::Element&)styles->item(i); - if(style != NULL) - style.removeAttribute(kAtId); - } - } - - -} - - -void RtfParser::breakTables(DOM::Document& doc) -{ - DOM::NodeList rows = doc.getElementsByTagName(kElRow); - if(rows != NULL) - { - for(int i = 0; i < rows->getLength(); i++) - { - DOM::Element row = (const DOM::Element&)rows->item(i); - DOM::Node parent = row.getParentNode(); - - if(parent == NULL) - continue; - - if(isElement(parent, kElBlock)) - { - DOM::Node grandparent = parent.getParentNode(); - - if(grandparent != NULL && !row.hasChildNodes()) - { - if(row.getPreviousSibling() == NULL) - grandparent.insertBefore(parent.removeChild(row), parent); - else if(row.getNextSibling() == NULL) - insertAfter(grandparent, parent.removeChild(row), parent); - } - } - - breakElement(row, kElDest); - } - } - - - - // Now group stuff in destinations into tables - DOM::NodeList destinations = doc.getElementsByTagName(kElDest); - if(destinations != NULL) - { - for(int i = 0; i < destinations->getLength(); i++) - { - DOM::Element dest = (const DOM::Element&)destinations->item(i); - - // Sanity Check - if(dest == NULL) - continue; - - // Go through the children of this destination - DOM::Node child = dest.getFirstChild(); - - DOM::Element table; - DOM::Element e; - - while(child != NULL) - { - // If it's a block and has a cell attribute - if(isElement(child, kElBlock)) - { - e = (DOM::Element&)child; - - // if it has a cell attribute - if(e.getAttribute(kAtCell).length() > 0) - { - e.removeAttribute(kAtCell); - - if(table == NULL) - { - table = doc.createElement(kElTable); - dest.insertBefore(table, child); - } - } - else - { - table = NULL; - } - } - - // It's not a block - if(table != NULL) - { - table.appendChild(dest.removeChild(child)); - child = table; - } - - child = child.getNextSibling(); - } - } - } -} - -void RtfParser::insertAfter(DOM::Node& parent, const DOM::Node& node, - const DOM::Node& ref) -{ - DOM::Node sibling = ref.getNextSibling(); - if(sibling == NULL) - parent.appendChild(node); - else - parent.insertBefore(node, sibling); -} - -void RtfParser::removeTags(const DOM::Document& doc) -{ - // Go through the list of nodes - for(const char** t = kRemoveTags; *t != NULL; t++) - { - DOM::NodeList elements = doc.getElementsByTagName(*t); - if(elements != NULL) - { - for(int j = 0; j < elements->getLength(); j++) - { - DOM::Element el = (const DOM::Element&)elements->item(j); - DOM::Node parent = el->getParentNode(); - - if(parent == NULL) - continue; - - while(el.hasChildNodes()) - parent.insertBefore(el.removeChild(el.getFirstChild()), el); - - parent.removeChild(el); - } - } - } -} - -void RtfParser::fixLists(const DOM::Document doc) -{ - DOM::NodeList lists = doc.getElementsByTagName(kElList); - if(lists != NULL) - { - DOM::NodeList listdefs = doc.getElementsByTagName(kElListdef); - if(listdefs != NULL) - { - for(int i = 0; i < listdefs->getLength(); i++) - { - DOM::Element listdef = (const DOM::Element&)listdefs->item(i); - - if(listdef == NULL || !listdef.hasAttribute(kAtList)) - continue; - - for(int j = 0; j < lists->getLength(); j++) - { - DOM::Element list = (const DOM::Element&)lists->item(j); - if(list != NULL) - { - if(list.getAttribute(kAtList) == listdef.getAttribute(kAtList)) - { - copyAttributes(listdef, list, kHideList); - list.removeAttribute(kAtList); - } - } - } - } - } - } -} - -void RtfParser::fixBlocks(const DOM::Document doc) -{ - // First break out all the paragraphs to the destination level - DOM::NodeList blocks = doc.getElementsByTagName(kElBlock); - if(blocks != NULL) - { - string fix; - wstring val; - - for(int i = 0; i < blocks->getLength(); i++) - { - DOM::Element block = (const DOM::Element&)blocks->item(i); - DOM::Node parent = block.getParentNode(); - - if(parent == NULL) - continue; - - fix.resize(0); - val.resize(0); - - val = block.getAttribute(kAtFix); - if(val.length() > 0) - block.removeAttribute(kAtFix); - - - if(val.length() > 0) - { - val = block.getAttributeNS("", kAtFix); - if(val.length() > 0) - block.removeAttributeNS("", kAtFix); - } - - if(val.length() > 0) - DOM::transcode16to8(val, fix); - - if(fix.length() == 0) - fix = kElPara; - - DOM::Element el = doc.createElement(fix); - copyAttributes(block, el, NULL); - - while(block.hasChildNodes()) - el.appendChild(block.removeChild(block.getFirstChild())); - - parent.replaceChild(el, block); - } - } -} - - -/** - * Removes adjacent duplicate nodes of certain names - */ -void RtfParser::removeDuplicates(const DOM::Document& doc) -{ - // Go through the list of nodes - for(const char** t = kNoDuplicates; *t = NULL; t++) - { - DOM::NodeList elements = doc.getElementsByTagName(*t); - if(elements != NULL) - { - int x = elements->getLength(); - for(int j = 0; j < elements->getLength(); j++) - { - - // Make sure it's a valid element - DOM::Element element = (const DOM::Element&)elements->item(j); - if(element == NULL) - continue; - - // Get neighbors - DOM::Node previous = element.getPreviousSibling(); - DOM::Node next = element.getNextSibling(); - - // Make sure it's still in the document, as we may have - // removed it on a previous loop - DOM::Node parent = element.getParentNode(); - if(parent == NULL) - continue; - - // Combine previous if valid - if(previous != NULL && previous.getNodeType() == DOM::Node::ELEMENT_NODE && - isEqualElement((DOM::Element&)previous, element)) - { - while(previous.hasChildNodes()) - { - DOM::Node child = previous.removeChild(previous.getLastChild()); - if(child != NULL) - { - if(element.hasChildNodes()) - element.insertBefore(child, element.getFirstChild()); - else - element.appendChild(child); - } - } - - // Remove duplicate node - parent.removeChild(previous); - } - - // Combine next if valid - if(next != NULL && next.getNodeType() == DOM::Node::ELEMENT_NODE && - isEqualElement((DOM::Element&)next, element)) - { - while(next.hasChildNodes()) - { - DOM::Node child = next.removeChild(next.getFirstChild()); - if(child != NULL) - element.appendChild(child); - } - - // Remove duplicate node - parent.removeChild(next); - } - } - } - } -} diff --git a/src/xmlcomposer.h b/src/xmlcomposer.h index a4e0fae..5403240 100644 --- a/src/xmlcomposer.h +++ b/src/xmlcomposer.h @@ -82,38 +82,10 @@ public: DOM::Document getDocument() { return m_document; } - static bool isElement(const DOM::Node& node, const string& name); - static bool isEqualElement(const DOM::Element& el1, const DOM::Element& el2); - static void copyAttributes(const DOM::Element& src, DOM::Element& dest, const char** hideList); - static void insertAfter(DOM::Node& parent, const DOM::Node& node, const DOM::Node& ref); - static DOM::Element getContainingElement(const DOM::Node& node, const string& name); - static DOM::Element getPriorElement(const DOM::Node& node, const string& name); - - static wstring formatInt(int num); - const RtfParserOptions& getOptions() { return m_options; } protected: - // Cleanup Functions - void fixBlocks(DOM::Document doc); - void fixLists(const DOM::Document doc); - void fixStyles(const DOM::Document doc); - bool breakElement(const DOM::Element& el, const string& contain); - - void breakBreak(DOM::Document& doc, const string& contain, - const string& tag); - void breakLists(DOM::Document& document); - void breakTables(DOM::Document& document); - void breakTags(DOM::Document& doc, const string& parentName, - const string& tagName); - void breakBlocks(DOM::Document& document); - void wrapTags(DOM::Document& document, const string& tagName, - const string& wrapName); - - void removeTags(const DOM::Document& doc); - void removeDuplicates(const DOM::Document& doc); - virtual void clear(); |