From 85c48677e37a474105096747e59efa3bbb9ce4df Mon Sep 17 00:00:00 2001 From: Stef Date: Sun, 11 Jul 2004 01:23:51 +0000 Subject: Reorganization of functions and files. --- src/Makefile.am | 2 +- src/domhelpers.cpp | 181 +++++++++++++ src/domhelpers.h | 57 ++++ src/tags.h | 112 ++++++++ src/xmlcomposer.cpp | 112 ++------ src/xmlcomposer.h | 3 +- src/xmlfixups.cpp | 739 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/xmlfixups.h | 62 +++++ 8 files changed, 1182 insertions(+), 86 deletions(-) create mode 100644 src/domhelpers.cpp create mode 100644 src/domhelpers.h create mode 100644 src/tags.h create mode 100644 src/xmlfixups.cpp create mode 100644 src/xmlfixups.h (limited to 'src') diff --git a/src/Makefile.am b/src/Makefile.am index f4253a3..4261c60 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -4,7 +4,7 @@ bin_PROGRAMS = rtfx rtfx_SOURCES = rtfx.cpp basehandler.cpp basehandler.h levelhandler.cpp levelhandler.h \ reference.h rtfanalyser.cpp rtfanalyser.h rtfparsehelpers.cpp rtfformatting.h \ rtfparsehelpers.h rtfreader.cpp rtfreader.h sablo.h sablotr.cpp usuals.h \ - rtffixups.h rtffixups.cpp + rtffixups.h rtffixups.cpp domhelpers.h domhelpers.cpp tags.h rtfx_LDADD = -lsablot -lexpat $(LIB_ICONV) rtfx_CFLAGS = -O0 -I${top_srcdir} -I/usr/local/include rtfx_LDFLAGS = -L/usr/local/lib diff --git a/src/domhelpers.cpp b/src/domhelpers.cpp new file mode 100644 index 0000000..7b06f55 --- /dev/null +++ b/src/domhelpers.cpp @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2004, Nate Nielsen + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * * Redistributions in binary form must reproduce the + * above copyright notice, this list of conditions and + * the following disclaimer in the documentation and/or + * other materials provided with the distribution. + * * The names of contributors to this software may not be + * used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * + * CONTRIBUTORS + * Nate Nielsen + * + */ + +#include "usuals.h" +#include "domhelpers.h" +#include "tags.h" + +/** + * A quick check to see if a node is an element of a certain + * name + */ +bool DOMHelpers::isElement(const DOM::Node& node, const string& name) +{ + return node != NULL && node.getNodeType() == DOM::Node::ELEMENT_NODE && + node.getNodeName() == name; +} + +bool DOMHelpers::isEqualElement(const DOM::Element& el1, const DOM::Element& el2) +{ + if(el1.getNodeName() == el2.getNodeName()) + return false; + + DOM::NamedNodeMap at1 = el1.getAttributes(); + DOM::NamedNodeMap at2 = el2.getAttributes(); + + if(at1 == NULL && at2 == NULL) + return true; + + if(at1 == NULL || at2 == NULL || + at1->getLength() != at2->getLength()) + return false; + + for(int i = 0; i < at1->getLength(); i++) + { + DOM::Attr attr1 = (const DOM::Attr&)at1->item(0); + if(attr1 != NULL) + return false; + + DOM::Attr attr2 = (const DOM::Attr&)at2->getNamedItem(attr1.getNodeName()); + if(attr2 != NULL) + return false; + + if(attr1.getNodeValue() == attr2.getNodeValue()) + return false; + } + + return true; +} + +/** + * Gets the pertinent ancestor of this node, or returns null + * if not found. + */ +DOM::Element DOMHelpers::getContainingElement(const DOM::Node& node, const string& name) +{ + DOM::Node n = node; + + while(true) + { + n = n.getParentNode(); + if(n == NULL) + break; + + if(isElement(n, name)) + return (DOM::Element&)n; + } + + return DOM::Element(); +} + +bool isNsAttr(const string& name) +{ + return strncmp(name.c_str(), kNSPrefix, strlen(kNSPrefix)) ? false : true; +} + +void DOMHelpers::copyAttributes(const DOM::Element& src, DOM::Element& dest, + const char** hideList) +{ + // Now get both sets of attributes + DOM::NamedNodeMap srcMap = src.getAttributes(); + DOM::NamedNodeMap destMap = dest.getAttributes(); + + if(srcMap == NULL || destMap == NULL) + return; + + // And copy them from one to the other + for(int j = 0; j < srcMap->getLength(); j++) + { + DOM::Node attr = srcMap->item(j); + if(attr != NULL) + { + // BUG: Sablotron seems to have a bug in it's + // setAttributeNode implementation. It always + // adds a blank namespace + // attr = attr.cloneNode(false); + // if(attr != NULL) + // destMap.setNamedItem(attr); + + string name = attr.getNodeName(); + + if(hideList) + { + + for(const char** t = hideList; *t != NULL; t++) + { + if(name == *t) + name.erase(); + } + } + + if(name.length() > 0 && !isNsAttr(name)) + dest.setAttribute(attr.getNodeName(), attr.getNodeValue()); + } + } +} + +DOM::Element DOMHelpers::getPriorElement(const DOM::Node& node, const string& name) +{ + DOM::Node n = node; + + while(n != NULL) + { + if(isElement(n, name)) + return (DOM::Element&)n; + + n = n.getPreviousSibling(); + } + + DOM::Node parent = node.getParentNode(); + + if(parent == NULL) + return DOM::Element(); + else + return getPriorElement(parent, name); +} + +void DOMHelpers::insertAfter(DOM::Node& parent, const DOM::Node& node, + const DOM::Node& ref) +{ + DOM::Node sibling = ref.getNextSibling(); + if(sibling == NULL) + parent.appendChild(node); + else + parent.insertBefore(node, sibling); +} + diff --git a/src/domhelpers.h b/src/domhelpers.h new file mode 100644 index 0000000..16afd79 --- /dev/null +++ b/src/domhelpers.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2004, Nate Nielsen + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * * Redistributions in binary form must reproduce the + * above copyright notice, this list of conditions and + * the following disclaimer in the documentation and/or + * other materials provided with the distribution. + * * The names of contributors to this software may not be + * used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * + * CONTRIBUTORS + * Nate Nielsen + * + */ + +#ifndef __DOMHELPERS_H__ +#define __DOMHELPERS_H__ + +#include "sablo.h" + +class DOMHelpers +{ +public: + + // DOM Helper Functions + static bool isElement(const DOM::Node& node, const string& name); + static bool isEqualElement(const DOM::Element& el1, const DOM::Element& el2); + static void copyAttributes(const DOM::Element& src, DOM::Element& dest, const char** hideList); + static void insertAfter(DOM::Node& parent, const DOM::Node& node, const DOM::Node& ref); + static DOM::Element getContainingElement(const DOM::Node& node, const string& name); + static DOM::Element getPriorElement(const DOM::Node& node, const string& name); +}; + +#endif // __DOMHELPERS_H__ diff --git a/src/tags.h b/src/tags.h new file mode 100644 index 0000000..d0382cc --- /dev/null +++ b/src/tags.h @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2004, Nate Nielsen + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * * Redistributions in binary form must reproduce the + * above copyright notice, this list of conditions and + * the following disclaimer in the documentation and/or + * other materials provided with the distribution. + * * The names of contributors to this software may not be + * used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * + * CONTRIBUTORS + * Nate Nielsen + * + */ + +#ifndef __TAGS_H__ +#define __TAGS_H__ + +static const char* kElDest = "i_dest"; +static const char* kElBlock = "i_block"; +static const char* kAtFix = "i_fix"; +static const char* kAtCell = "i_cell"; +static const char* kElListtable = "i_listtable"; +static const char* kElListdef = "i_listdef"; + +static const char* kElPara = "para"; +static const char* kElDoc = "document"; +static const char* kElTab = "tab"; +static const char* kElSect = "sect"; +static const char* kElPage = "page"; +static const char* kElStyle = "style"; +static const char* kElLine = "line"; +static const char* kElList = "list"; +static const char* kElStylesheet = "stylesheet"; +static const char* kElInfo = "info"; +static const char* kElTitle = "title"; +static const char* kElAuthor = "author"; +static const char* kElOperator = "operator"; +static const char* kElB = "b"; +static const char* kElHide = "hide"; +static const char* kElI = "i"; +static const char* kElStrike = "strike"; +static const char* kElU = "u"; +static const char* kElColor = "color"; +static const char* kElCell = "cell"; +static const char* kElRow = "row"; +static const char* kElTable = "table"; + +static const char* kAtList = "list"; +static const char* kAtName = "name"; +static const char* kAtBold = "bold"; +static const char* kAtHidden = "hide"; +static const char* kAtItalic = "italic"; +static const char* kAtStrike = "strike"; +static const char* kAtUnderline = "underline"; +static const char* kAtColor = "color"; +static const char* kAtType = "type"; +static const char* kAtOrdered = "ordered"; +static const char* kAtStart = "start"; +static const char* kAtId = "id"; +static const char* kAtIndex = "id"; + +static const wchar_t* kValDisc = L"disc"; +static const wchar_t* kValLowerAlpha = L"lower-alpha"; +static const wchar_t* kValUpperAlpha = L"upper-alpha"; +static const wchar_t* kValLowerRoman = L"lower-roman"; +static const wchar_t* kValUpperRoman = L"upper-roman"; +static const wchar_t* kValArabic = L"arabic"; +static const wchar_t* kValNull = L""; + +static const wchar_t* kValList = L"list"; +static const wchar_t* kValPara = L"para"; +static const wchar_t* kValTable = L"table"; + +static const char* kNoDuplicates[] = + { kElB, kElU, kElI, kElColor, kElHide, kElColor, NULL }; + +static const char* kRemoveTags[] = + { kElDest, kElListdef, kElListtable, NULL }; + +static const char* kBlockTags[] = + { kElTable, kElPara, NULL }; + +static const char* kHideList[] = + { kAtId, kAtList, NULL }; + +static const char* kNSPrefix = "xmlns"; + +#endif // __TAGS_H__ diff --git a/src/xmlcomposer.cpp b/src/xmlcomposer.cpp index 967fd3c..9be1eeb 100644 --- a/src/xmlcomposer.cpp +++ b/src/xmlcomposer.cpp @@ -39,76 +39,8 @@ #include "usuals.h" #include "rtfanalyser.h" #include "rtffixups.h" - -const char* kElDest = "i_dest"; -const char* kElBlock = "i_block"; -const char* kAtFix = "i_fix"; -const char* kAtCell = "i_cell"; -const char* kElListtable = "i_listtable"; -const char* kElListdef = "i_listdef"; - -const char* kElPara = "para"; -const char* kElDoc = "document"; -const char* kElTab = "tab"; -const char* kElSect = "sect"; -const char* kElPage = "page"; -const char* kElStyle = "style"; -const char* kElLine = "line"; -const char* kElList = "list"; -const char* kElStylesheet = "stylesheet"; -const char* kElInfo = "info"; -const char* kElTitle = "title"; -const char* kElAuthor = "author"; -const char* kElOperator = "operator"; -const char* kElB = "b"; -const char* kElHide = "hide"; -const char* kElI = "i"; -const char* kElStrike = "strike"; -const char* kElU = "u"; -const char* kElColor = "color"; -const char* kElCell = "cell"; -const char* kElRow = "row"; -const char* kElTable = "table"; - -const char* kAtList = "list"; -const char* kAtName = "name"; -const char* kAtBold = "bold"; -const char* kAtHidden = "hide"; -const char* kAtItalic = "italic"; -const char* kAtStrike = "strike"; -const char* kAtUnderline = "underline"; -const char* kAtColor = "color"; -const char* kAtType = "type"; -const char* kAtOrdered = "ordered"; -const char* kAtStart = "start"; -const char* kAtId = "id"; -const char* kAtIndex = "id"; - -const wchar_t* kValDisc = L"disc"; -const wchar_t* kValLowerAlpha = L"lower-alpha"; -const wchar_t* kValUpperAlpha = L"upper-alpha"; -const wchar_t* kValLowerRoman = L"lower-roman"; -const wchar_t* kValUpperRoman = L"upper-roman"; -const wchar_t* kValArabic = L"arabic"; -const wchar_t* kValNull = L""; - -const wchar_t* kValList = L"list"; -const wchar_t* kValPara = L"para"; -const wchar_t* kValTable = L"table"; - -const char* kNoDuplicates[] = - { kElB, kElU, kElI, kElColor, kElHide, kElColor, NULL }; - -const char* kRemoveTags[] = - { kElDest, kElListdef, kElListtable, NULL }; - -const char* kBlockTags[] = - { kElTable, kElPara, NULL }; - -const char* kHideList[] = - { kAtId, kAtList, NULL }; - -const char* kNSPrefix = "xmlns"; +#include "domhelpers.h" +#include "tags.h" ////////////////////////////////////////////////////////////////////// // Construction/Destruction @@ -168,19 +100,19 @@ void RtfParser::endDocument() LevelHandler::endDocument(); // Cleanup the tree - removeDuplicates(m_document); - breakTables(m_document); - breakTags(m_document, kElTable, kElRow); - breakTags(m_document, kElRow, kElCell); - wrapTags(m_document, kElCell, kElDest); - breakBlocks(m_document); - breakLists(m_document); - fixLists(m_document); - fixStyles(m_document); - fixBlocks(m_document); - removeTags(m_document); - breakBreak(m_document, kElDoc, kElPage); - breakBreak(m_document, kElDoc, kElSect); + RtfFixups::removeDuplicates(m_document); + RtfFixups::breakTables(m_document); + RtfFixups::breakTags(m_document, kElTable, kElRow); + RtfFixups::breakTags(m_document, kElRow, kElCell); + RtfFixups::wrapTags(m_document, kElCell, kElDest); + RtfFixups::breakBlocks(m_document); + RtfFixups::breakLists(m_document); + RtfFixups::fixLists(m_document); + RtfFixups::fixStyles(m_document); + RtfFixups::fixBlocks(m_document); + RtfFixups::removeTags(m_document); + RtfFixups::breakBreak(m_document, kElDoc, kElPage); + RtfFixups::breakBreak(m_document, kElDoc, kElSect); return; } @@ -414,7 +346,7 @@ DOM::Element RtfParser::ParseAnalyser::getCurrentBlock() if(node.hasChildNodes()) node = node.getLastChild(); - return m_parser->getPriorElement(node, kElBlock); + return DOMHelpers::getPriorElement(node, kElBlock); } @@ -997,5 +929,17 @@ ON_CHARDATA(Attribute) element.setAttribute(name, cur); } +wstring RtfParser::formatInt(int num) +{ + char buff[16]; + + // Certain OSs don't support swprintf :( + sprintf(buff, "%d", num); + wstring n; + for(char* s = buff; *s; s++) + n.append(1, *s); + + return n; +} diff --git a/src/xmlcomposer.h b/src/xmlcomposer.h index 5403240..fd9fd7a 100644 --- a/src/xmlcomposer.h +++ b/src/xmlcomposer.h @@ -62,7 +62,6 @@ public: virtual void groupEnd(); virtual void charData(wstring data); - // Element management functions DOM::Element createElement(const string& name); void pushElement(const DOM::Element& element); @@ -85,6 +84,8 @@ public: const RtfParserOptions& getOptions() { return m_options; } + static wstring formatInt(int num); + protected: virtual void clear(); diff --git a/src/xmlfixups.cpp b/src/xmlfixups.cpp new file mode 100644 index 0000000..7201703 --- /dev/null +++ b/src/xmlfixups.cpp @@ -0,0 +1,739 @@ +/* + * Copyright (c) 2004, Nate Nielsen + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * * Redistributions in binary form must reproduce the + * above copyright notice, this list of conditions and + * the following disclaimer in the documentation and/or + * other materials provided with the distribution. + * * The names of contributors to this software may not be + * used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * + * CONTRIBUTORS + * Nate Nielsen + * + */ + +#include "usuals.h" +#include "rtffixups.h" +#include "domhelpers.h" +#include "tags.h" + +void RtfFixups::breakBreak(DOM::Document& doc, const string& contain, + const string& tag) +{ + DOM::NodeList els = doc.getElementsByTagName(tag); + if(els != NULL) + { + for(int i = 0; i < els->getLength(); i++) + { + DOM::Element el = (const DOM::Element&)els->item(i); +#if 0 + // See if parent node only has this break tag + // in it. If so then replace parent with this + + DOM::Node parent = el.getParentNode(); + + if(parent != NULL) + { + DOM::Node grandparent = parent.getParentNode(); + + if(grandparent != NULL && + el.getPreviousSibling() == NULL && + el.getNextSibling() == NULL) + { + grandparent.replaceChild(parent.removeChild(el), parent); + } + } +#endif + + breakElement(el, contain); + } + } +} + +/** + * Breaks a paragraph up through a previous level. Calls itself + * recursively to break paragraphs totally free up to containing + * destination. + * + * For example: + * + * + * This is a + * test of your concentration. + * + * + * Becomes: + * + * + * This is a + * test of your concentration. + * + */ +bool RtfFixups::breakElement(const DOM::Element& el, const string& contain) +{ + ASSERT(el != NULL); + + DOM::Element parent = (const DOM::Element&)el.getParentNode(); + DOM::Element grandparent; + + string s = el.getNodeName(); + s = parent.getNodeName(); + + // Get the parent node + if(parent != NULL) + grandparent = (const DOM::Element&)parent.getParentNode(); + + // Make sure we have something to work with before continuing + if(grandparent == NULL || parent == NULL || + DOMHelpers::isElement(parent, contain)) + return true; + + DOM::Node e; + + // Check to see if this is the first node in the parent. + // If so then just move out to before + if(el.getPreviousSibling() == NULL) + { + e = grandparent.insertBefore(parent.removeChild(el), parent); + } + + + // Check to see if this is the last node in the parent. + // If so then just move out to after the parent + else if(el.getNextSibling() == NULL) + { + DOM::Node next = parent.getNextSibling(); + if(next == NULL) + e = grandparent.appendChild(parent.removeChild(el)); + else + e = grandparent.insertBefore(parent.removeChild(el), next); + } + + + // Otherwise it's in the middle so split the parent + // element etc... + else + { + // Clone it but not deep + DOM::Element parent2 = (const DOM::Element&)parent.cloneNode(false); + + if(parent2 == NULL) + return false; + + // Flag that tells us whether we moved anything up to parent + bool moved = false; + + // Now move all nodes after this one to the second parent. + while((e = el.getNextSibling()) != NULL) + { + parent2.appendChild(parent.removeChild(e)); + moved = true; + } + + // Remove the element from it's parent + e = parent.removeChild(el); + + // Okay now we move the paragraph up to the parent + DOMHelpers::insertAfter(grandparent, e, parent); + if(moved) + DOMHelpers::insertAfter(grandparent, parent2, e); + } + + // Now call it again with the paragraph in the new position + // untill everything's cut through! + return breakElement((DOM::Element&)e, contain); +} + +/** + * Changes from a marker based paragraph system to a contained + * paragraph system. Also applies paragraph attributes to the + * appropriate paragraph. + * + * For example: + * + * + * This is a + * test of your concentration. + * + * + * Becomes: + * + * This is a + * test of your concentration. + */ +void RtfFixups::breakBlocks(DOM::Document& document) +{ + // First break out all the paragraphs to the destination level + DOM::NodeList blocks = document.getElementsByTagName(kElBlock); + if(blocks != NULL) + { + for(int i = 0; i < blocks->getLength(); i++) + { + DOM::Element block = (const DOM::Element&)blocks->item(i); + + // If it's the single closed style para then break it + if(block != NULL && !block.hasChildNodes()) + breakElement(block, kElDest); + } + } + + + // Now group stuff in destinations into paras or other blocks + DOM::NodeList destinations = document.getElementsByTagName(kElDest); + if(destinations != NULL) + { + for(int i = 0; i < destinations->getLength(); i++) + { + DOM::Element dest = (const DOM::Element&)destinations->item(i); + + // Sanity Check + if(dest == NULL || !dest.hasChildNodes()) + continue; + + // Go through the children of this destination + DOM::Node child = dest.getFirstChild(); + + DOM::Element block; + + while(child != NULL) + { + // If it's a block + if(DOMHelpers::isElement(child, kElBlock)) + { + block = (DOM::Element&)child; + child = child.getNextSibling(); + continue; + } + + // If it's already a real block element + for(const char** t = kBlockTags; *t != NULL; t++) + { + if(DOMHelpers::isElement(child, *t)) + { + block = NULL; + break; + } + } + + // If there's a block then add to it + if(block != NULL) + { + block.appendChild(dest.removeChild(child)); + child = block; + } + + child = child.getNextSibling(); + } + } + } +} + +void RtfFixups::wrapTags(DOM::Document& doc, const string& tagName, + const string& wrapName) +{ + DOM::NodeList tags = doc.getElementsByTagName(tagName); + if(tags != NULL) + { + for(int i = 0; i < tags->getLength(); i++) + { + DOM::Element tag = (const DOM::Element&)tags->item(i); + + DOM::Element wrap = doc.createElement(wrapName); + while(tag.hasChildNodes()) + wrap.appendChild(tag.removeChild(tag.getFirstChild())); + + tag.appendChild(wrap); + } + } +} + +void RtfFixups::breakTags(DOM::Document& doc, const string& parentName, + const string& tagName) +{ + DOM::NodeList parents = doc.getElementsByTagName(parentName); + if(parents != NULL) + { + for(int i = 0; i < parents->getLength(); i++) + { + DOM::Element parent = (const DOM::Element&)parents->item(i); + + if(!parent.hasChildNodes()) + continue; + + DOM::NodeList tags = parent.getElementsByTagName(tagName); + if(tags != NULL) + { + for(int i = 0; i < tags->getLength(); i++) + breakElement((const DOM::Element&)tags->item(i), parentName); + } + + DOM::Node tag = doc.createElement(tagName); + parent.insertBefore(tag, parent.getFirstChild()); + + DOM::Node child = tag; + + while(child != NULL && (child = child.getNextSibling()) != NULL) + { + if(DOMHelpers::isElement(child, kElBlock)) + { + DOM::Node next = child.getNextSibling(); + if(next == NULL) + { + parent.removeChild(child); + continue; + } + + if(DOMHelpers::isElement(next, tagName)) + { + DOM::Node twodown = next.getNextSibling(); + if(!DOMHelpers::isElement(twodown, kElBlock)) + { + child = parent.insertBefore(parent.removeChild(next), child); + } + else + { + parent.removeChild(child); + child = next; + } + } + } + + if(DOMHelpers::isElement(child, tagName)) + { + if(!tag.hasChildNodes()) + parent.removeChild(tag); + tag = child; + } + else + { + tag.appendChild(parent.removeChild(child)); + child = tag; + } + } + + if(!tag.hasChildNodes()) + parent.removeChild(tag); + } + } + + DOM::NodeList tags = doc.getElementsByTagName(tagName); + if(tags != NULL) + { + for(int i = 0; i < tags->getLength(); i++) + { + DOM::Element tag = (const DOM::Element&)tags->item(i); + DOM::Node parent = tag.getParentNode(); + + if(parent != NULL && !DOMHelpers::isElement(parent, parentName)) + parent.removeChild(tag); + +#if 0 + else if(tag.hasChildNodes()) + { + DOM::NodeList children = tag.getChildNodes(); + if(children != NULL && children->getLength() == 1) + { + DOM::Node child = children->item(0); + if(child != NULL && !child.hasChildNodes() && + DOMHelpers::isElement(child, kElBlock)) + parent.removeChild(tag); + } + } +#endif + + } + } +} + +void RtfFixups::breakLists(DOM::Document& doc) +{ + // Now group stuff in destinations into tables + DOM::NodeList destinations = doc.getElementsByTagName(kElDest); + if(destinations != NULL) + { + for(int i = 0; i < destinations->getLength(); i++) + { + DOM::Element dest = (const DOM::Element&)destinations->item(i); + + // Sanity Check + if(dest == NULL) + continue; + + // Go through the children of this destination + DOM::Node child = dest.getFirstChild(); + + DOM::Element list; + DOM::Element e; + + wstring previd; + + while(child != NULL) + { + // If it's a block and has a cell attribute + if(DOMHelpers::isElement(child, kElBlock)) + { + e = (DOM::Element&)child; + + // if it has a cell attribute + wstring listid = e.getAttribute(kAtList); + if(listid.length() > 0) + { + e.removeAttribute(kAtList); + + if(list == NULL || previd != listid) + { + list = doc.createElement(kElList); + list.setAttribute(kAtList, listid); + dest.insertBefore(list, child); + previd = listid; + } + } + else + { + list = NULL; + previd.erase(); + } + } + + // It's not a block + if(list != NULL) + { + list.appendChild(dest.removeChild(child)); + child = list; + } + + child = child.getNextSibling(); + } + } + } +} + +void RtfFixups::fixStyles(const DOM::Document doc) +{ + DOM::NodeList styles = doc.getElementsByTagName(kElStyle); + if(styles != NULL) + { + DOM::NodeList blocks = doc.getElementsByTagName(kElBlock); + if(blocks != NULL) + { + for(int i = 0; i < blocks->getLength(); i++) + { + DOM::Element block = (const DOM::Element&)blocks->item(i); + + if(block == NULL || !block.hasAttribute(kElStyle)) + continue; + + for(int j = 0; j < styles->getLength(); j++) + { + DOM::Element style = (const DOM::Element&)styles->item(j); + if(style != NULL) + { + if(style.getAttribute(kAtId) == block.getAttribute(kElStyle)) + { + wstring name = style.getAttribute(kAtName); + if(name.length() > 0) + block.setAttribute(kElStyle, name); + } + } + } + } + } + + for(int i = 0; i < styles->getLength(); i++) + { + DOM::Element style = (const DOM::Element&)styles->item(i); + if(style != NULL) + style.removeAttribute(kAtId); + } + } + + +} + + +void RtfFixups::breakTables(DOM::Document& doc) +{ + DOM::NodeList rows = doc.getElementsByTagName(kElRow); + if(rows != NULL) + { + for(int i = 0; i < rows->getLength(); i++) + { + DOM::Element row = (const DOM::Element&)rows->item(i); + DOM::Node parent = row.getParentNode(); + + if(parent == NULL) + continue; + + if(DOMHelpers::isElement(parent, kElBlock)) + { + DOM::Node grandparent = parent.getParentNode(); + + if(grandparent != NULL && !row.hasChildNodes()) + { + if(row.getPreviousSibling() == NULL) + grandparent.insertBefore(parent.removeChild(row), parent); + else if(row.getNextSibling() == NULL) + DOMHelpers::insertAfter(grandparent, parent.removeChild(row), parent); + } + } + + breakElement(row, kElDest); + } + } + + + + // Now group stuff in destinations into tables + DOM::NodeList destinations = doc.getElementsByTagName(kElDest); + if(destinations != NULL) + { + for(int i = 0; i < destinations->getLength(); i++) + { + DOM::Element dest = (const DOM::Element&)destinations->item(i); + + // Sanity Check + if(dest == NULL) + continue; + + // Go through the children of this destination + DOM::Node child = dest.getFirstChild(); + + DOM::Element table; + DOM::Element e; + + while(child != NULL) + { + // If it's a block and has a cell attribute + if(DOMHelpers::isElement(child, kElBlock)) + { + e = (DOM::Element&)child; + + // if it has a cell attribute + if(e.getAttribute(kAtCell).length() > 0) + { + e.removeAttribute(kAtCell); + + if(table == NULL) + { + table = doc.createElement(kElTable); + dest.insertBefore(table, child); + } + } + else + { + table = NULL; + } + } + + // It's not a block + if(table != NULL) + { + table.appendChild(dest.removeChild(child)); + child = table; + } + + child = child.getNextSibling(); + } + } + } +} + +void RtfFixups::removeTags(const DOM::Document& doc) +{ + // Go through the list of nodes + for(const char** t = kRemoveTags; *t != NULL; t++) + { + DOM::NodeList elements = doc.getElementsByTagName(*t); + if(elements != NULL) + { + for(int j = 0; j < elements->getLength(); j++) + { + DOM::Element el = (const DOM::Element&)elements->item(j); + DOM::Node parent = el->getParentNode(); + + if(parent == NULL) + continue; + + while(el.hasChildNodes()) + parent.insertBefore(el.removeChild(el.getFirstChild()), el); + + parent.removeChild(el); + } + } + } +} + +void RtfFixups::fixLists(const DOM::Document doc) +{ + DOM::NodeList lists = doc.getElementsByTagName(kElList); + if(lists != NULL) + { + DOM::NodeList listdefs = doc.getElementsByTagName(kElListdef); + if(listdefs != NULL) + { + for(int i = 0; i < listdefs->getLength(); i++) + { + DOM::Element listdef = (const DOM::Element&)listdefs->item(i); + + if(listdef == NULL || !listdef.hasAttribute(kAtList)) + continue; + + for(int j = 0; j < lists->getLength(); j++) + { + DOM::Element list = (const DOM::Element&)lists->item(j); + if(list != NULL) + { + if(list.getAttribute(kAtList) == listdef.getAttribute(kAtList)) + { + DOMHelpers::copyAttributes(listdef, list, kHideList); + list.removeAttribute(kAtList); + } + } + } + } + } + } +} + +void RtfFixups::fixBlocks(const DOM::Document doc) +{ + // First break out all the paragraphs to the destination level + DOM::NodeList blocks = doc.getElementsByTagName(kElBlock); + if(blocks != NULL) + { + string fix; + wstring val; + + for(int i = 0; i < blocks->getLength(); i++) + { + DOM::Element block = (const DOM::Element&)blocks->item(i); + DOM::Node parent = block.getParentNode(); + + if(parent == NULL) + continue; + + fix.resize(0); + val.resize(0); + + val = block.getAttribute(kAtFix); + if(val.length() > 0) + block.removeAttribute(kAtFix); + + + if(val.length() > 0) + { + val = block.getAttributeNS("", kAtFix); + if(val.length() > 0) + block.removeAttributeNS("", kAtFix); + } + + if(val.length() > 0) + DOM::transcode16to8(val, fix); + + if(fix.length() == 0) + fix = kElPara; + + DOM::Element el = doc.createElement(fix); + DOMHelpers::copyAttributes(block, el, NULL); + + while(block.hasChildNodes()) + el.appendChild(block.removeChild(block.getFirstChild())); + + parent.replaceChild(el, block); + } + } +} + + +/** + * Removes adjacent duplicate nodes of certain names + */ +void RtfFixups::removeDuplicates(const DOM::Document& doc) +{ + // Go through the list of nodes + for(const char** t = kNoDuplicates; *t = NULL; t++) + { + DOM::NodeList elements = doc.getElementsByTagName(*t); + if(elements != NULL) + { + int x = elements->getLength(); + for(int j = 0; j < elements->getLength(); j++) + { + + // Make sure it's a valid element + DOM::Element element = (const DOM::Element&)elements->item(j); + if(element == NULL) + continue; + + // Get neighbors + DOM::Node previous = element.getPreviousSibling(); + DOM::Node next = element.getNextSibling(); + + // Make sure it's still in the document, as we may have + // removed it on a previous loop + DOM::Node parent = element.getParentNode(); + if(parent == NULL) + continue; + + // Combine previous if valid + if(previous != NULL && previous.getNodeType() == DOM::Node::ELEMENT_NODE && + DOMHelpers::isEqualElement((DOM::Element&)previous, element)) + { + while(previous.hasChildNodes()) + { + DOM::Node child = previous.removeChild(previous.getLastChild()); + if(child != NULL) + { + if(element.hasChildNodes()) + element.insertBefore(child, element.getFirstChild()); + else + element.appendChild(child); + } + } + + // Remove duplicate node + parent.removeChild(previous); + } + + // Combine next if valid + if(next != NULL && next.getNodeType() == DOM::Node::ELEMENT_NODE && + DOMHelpers::isEqualElement((DOM::Element&)next, element)) + { + while(next.hasChildNodes()) + { + DOM::Node child = next.removeChild(next.getFirstChild()); + if(child != NULL) + element.appendChild(child); + } + + // Remove duplicate node + parent.removeChild(next); + } + } + } + } +} diff --git a/src/xmlfixups.h b/src/xmlfixups.h new file mode 100644 index 0000000..8cc9b82 --- /dev/null +++ b/src/xmlfixups.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2004, Nate Nielsen + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * * Redistributions in binary form must reproduce the + * above copyright notice, this list of conditions and + * the following disclaimer in the documentation and/or + * other materials provided with the distribution. + * * The names of contributors to this software may not be + * used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * + * CONTRIBUTORS + * Nate Nielsen + * + */ + +#ifndef __RTFFIXUPS_H__ +#define __RTFFIXUPS_H__ + +#include "sablo.h" + +class RtfFixups +{ +public: + // Cleanup Functions + static void fixBlocks(DOM::Document doc); + static void fixLists(const DOM::Document doc); + static void fixStyles(const DOM::Document doc); + static bool breakElement(const DOM::Element& el, const string& contain); + static void breakBreak(DOM::Document& doc, const string& contain, const string& tag); + static void breakLists(DOM::Document& document); + static void breakTables(DOM::Document& document); + static void breakTags(DOM::Document& doc, const string& parentName, const string& tagName); + static void breakBlocks(DOM::Document& document); + static void wrapTags(DOM::Document& document, const string& tagName, const string& wrapName); + static void removeTags(const DOM::Document& doc); + static void removeDuplicates(const DOM::Document& doc); +}; + +#endif // __RTFFIXUPS_H__ -- cgit v1.2.3