From 2ea017993016cf10f5d3f872842474ec715ca0a0 Mon Sep 17 00:00:00 2001 From: Stef Walter Date: Sun, 11 Jul 2004 01:23:51 +0000 Subject: Reorganization of functions and files. --- src/xmlfixups.cpp | 739 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 739 insertions(+) create mode 100644 src/xmlfixups.cpp (limited to 'src/xmlfixups.cpp') diff --git a/src/xmlfixups.cpp b/src/xmlfixups.cpp new file mode 100644 index 0000000..7201703 --- /dev/null +++ b/src/xmlfixups.cpp @@ -0,0 +1,739 @@ +/* + * Copyright (c) 2004, Nate Nielsen + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * * Redistributions in binary form must reproduce the + * above copyright notice, this list of conditions and + * the following disclaimer in the documentation and/or + * other materials provided with the distribution. + * * The names of contributors to this software may not be + * used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * + * CONTRIBUTORS + * Nate Nielsen + * + */ + +#include "usuals.h" +#include "rtffixups.h" +#include "domhelpers.h" +#include "tags.h" + +void RtfFixups::breakBreak(DOM::Document& doc, const string& contain, + const string& tag) +{ + DOM::NodeList els = doc.getElementsByTagName(tag); + if(els != NULL) + { + for(int i = 0; i < els->getLength(); i++) + { + DOM::Element el = (const DOM::Element&)els->item(i); +#if 0 + // See if parent node only has this break tag + // in it. If so then replace parent with this + + DOM::Node parent = el.getParentNode(); + + if(parent != NULL) + { + DOM::Node grandparent = parent.getParentNode(); + + if(grandparent != NULL && + el.getPreviousSibling() == NULL && + el.getNextSibling() == NULL) + { + grandparent.replaceChild(parent.removeChild(el), parent); + } + } +#endif + + breakElement(el, contain); + } + } +} + +/** + * Breaks a paragraph up through a previous level. Calls itself + * recursively to break paragraphs totally free up to containing + * destination. + * + * For example: + * + * + * This is a + * test of your concentration. + * + * + * Becomes: + * + * + * This is a + * test of your concentration. + * + */ +bool RtfFixups::breakElement(const DOM::Element& el, const string& contain) +{ + ASSERT(el != NULL); + + DOM::Element parent = (const DOM::Element&)el.getParentNode(); + DOM::Element grandparent; + + string s = el.getNodeName(); + s = parent.getNodeName(); + + // Get the parent node + if(parent != NULL) + grandparent = (const DOM::Element&)parent.getParentNode(); + + // Make sure we have something to work with before continuing + if(grandparent == NULL || parent == NULL || + DOMHelpers::isElement(parent, contain)) + return true; + + DOM::Node e; + + // Check to see if this is the first node in the parent. + // If so then just move out to before + if(el.getPreviousSibling() == NULL) + { + e = grandparent.insertBefore(parent.removeChild(el), parent); + } + + + // Check to see if this is the last node in the parent. + // If so then just move out to after the parent + else if(el.getNextSibling() == NULL) + { + DOM::Node next = parent.getNextSibling(); + if(next == NULL) + e = grandparent.appendChild(parent.removeChild(el)); + else + e = grandparent.insertBefore(parent.removeChild(el), next); + } + + + // Otherwise it's in the middle so split the parent + // element etc... + else + { + // Clone it but not deep + DOM::Element parent2 = (const DOM::Element&)parent.cloneNode(false); + + if(parent2 == NULL) + return false; + + // Flag that tells us whether we moved anything up to parent + bool moved = false; + + // Now move all nodes after this one to the second parent. + while((e = el.getNextSibling()) != NULL) + { + parent2.appendChild(parent.removeChild(e)); + moved = true; + } + + // Remove the element from it's parent + e = parent.removeChild(el); + + // Okay now we move the paragraph up to the parent + DOMHelpers::insertAfter(grandparent, e, parent); + if(moved) + DOMHelpers::insertAfter(grandparent, parent2, e); + } + + // Now call it again with the paragraph in the new position + // untill everything's cut through! + return breakElement((DOM::Element&)e, contain); +} + +/** + * Changes from a marker based paragraph system to a contained + * paragraph system. Also applies paragraph attributes to the + * appropriate paragraph. + * + * For example: + * + * + * This is a + * test of your concentration. + * + * + * Becomes: + * + * This is a + * test of your concentration. + */ +void RtfFixups::breakBlocks(DOM::Document& document) +{ + // First break out all the paragraphs to the destination level + DOM::NodeList blocks = document.getElementsByTagName(kElBlock); + if(blocks != NULL) + { + for(int i = 0; i < blocks->getLength(); i++) + { + DOM::Element block = (const DOM::Element&)blocks->item(i); + + // If it's the single closed style para then break it + if(block != NULL && !block.hasChildNodes()) + breakElement(block, kElDest); + } + } + + + // Now group stuff in destinations into paras or other blocks + DOM::NodeList destinations = document.getElementsByTagName(kElDest); + if(destinations != NULL) + { + for(int i = 0; i < destinations->getLength(); i++) + { + DOM::Element dest = (const DOM::Element&)destinations->item(i); + + // Sanity Check + if(dest == NULL || !dest.hasChildNodes()) + continue; + + // Go through the children of this destination + DOM::Node child = dest.getFirstChild(); + + DOM::Element block; + + while(child != NULL) + { + // If it's a block + if(DOMHelpers::isElement(child, kElBlock)) + { + block = (DOM::Element&)child; + child = child.getNextSibling(); + continue; + } + + // If it's already a real block element + for(const char** t = kBlockTags; *t != NULL; t++) + { + if(DOMHelpers::isElement(child, *t)) + { + block = NULL; + break; + } + } + + // If there's a block then add to it + if(block != NULL) + { + block.appendChild(dest.removeChild(child)); + child = block; + } + + child = child.getNextSibling(); + } + } + } +} + +void RtfFixups::wrapTags(DOM::Document& doc, const string& tagName, + const string& wrapName) +{ + DOM::NodeList tags = doc.getElementsByTagName(tagName); + if(tags != NULL) + { + for(int i = 0; i < tags->getLength(); i++) + { + DOM::Element tag = (const DOM::Element&)tags->item(i); + + DOM::Element wrap = doc.createElement(wrapName); + while(tag.hasChildNodes()) + wrap.appendChild(tag.removeChild(tag.getFirstChild())); + + tag.appendChild(wrap); + } + } +} + +void RtfFixups::breakTags(DOM::Document& doc, const string& parentName, + const string& tagName) +{ + DOM::NodeList parents = doc.getElementsByTagName(parentName); + if(parents != NULL) + { + for(int i = 0; i < parents->getLength(); i++) + { + DOM::Element parent = (const DOM::Element&)parents->item(i); + + if(!parent.hasChildNodes()) + continue; + + DOM::NodeList tags = parent.getElementsByTagName(tagName); + if(tags != NULL) + { + for(int i = 0; i < tags->getLength(); i++) + breakElement((const DOM::Element&)tags->item(i), parentName); + } + + DOM::Node tag = doc.createElement(tagName); + parent.insertBefore(tag, parent.getFirstChild()); + + DOM::Node child = tag; + + while(child != NULL && (child = child.getNextSibling()) != NULL) + { + if(DOMHelpers::isElement(child, kElBlock)) + { + DOM::Node next = child.getNextSibling(); + if(next == NULL) + { + parent.removeChild(child); + continue; + } + + if(DOMHelpers::isElement(next, tagName)) + { + DOM::Node twodown = next.getNextSibling(); + if(!DOMHelpers::isElement(twodown, kElBlock)) + { + child = parent.insertBefore(parent.removeChild(next), child); + } + else + { + parent.removeChild(child); + child = next; + } + } + } + + if(DOMHelpers::isElement(child, tagName)) + { + if(!tag.hasChildNodes()) + parent.removeChild(tag); + tag = child; + } + else + { + tag.appendChild(parent.removeChild(child)); + child = tag; + } + } + + if(!tag.hasChildNodes()) + parent.removeChild(tag); + } + } + + DOM::NodeList tags = doc.getElementsByTagName(tagName); + if(tags != NULL) + { + for(int i = 0; i < tags->getLength(); i++) + { + DOM::Element tag = (const DOM::Element&)tags->item(i); + DOM::Node parent = tag.getParentNode(); + + if(parent != NULL && !DOMHelpers::isElement(parent, parentName)) + parent.removeChild(tag); + +#if 0 + else if(tag.hasChildNodes()) + { + DOM::NodeList children = tag.getChildNodes(); + if(children != NULL && children->getLength() == 1) + { + DOM::Node child = children->item(0); + if(child != NULL && !child.hasChildNodes() && + DOMHelpers::isElement(child, kElBlock)) + parent.removeChild(tag); + } + } +#endif + + } + } +} + +void RtfFixups::breakLists(DOM::Document& doc) +{ + // Now group stuff in destinations into tables + DOM::NodeList destinations = doc.getElementsByTagName(kElDest); + if(destinations != NULL) + { + for(int i = 0; i < destinations->getLength(); i++) + { + DOM::Element dest = (const DOM::Element&)destinations->item(i); + + // Sanity Check + if(dest == NULL) + continue; + + // Go through the children of this destination + DOM::Node child = dest.getFirstChild(); + + DOM::Element list; + DOM::Element e; + + wstring previd; + + while(child != NULL) + { + // If it's a block and has a cell attribute + if(DOMHelpers::isElement(child, kElBlock)) + { + e = (DOM::Element&)child; + + // if it has a cell attribute + wstring listid = e.getAttribute(kAtList); + if(listid.length() > 0) + { + e.removeAttribute(kAtList); + + if(list == NULL || previd != listid) + { + list = doc.createElement(kElList); + list.setAttribute(kAtList, listid); + dest.insertBefore(list, child); + previd = listid; + } + } + else + { + list = NULL; + previd.erase(); + } + } + + // It's not a block + if(list != NULL) + { + list.appendChild(dest.removeChild(child)); + child = list; + } + + child = child.getNextSibling(); + } + } + } +} + +void RtfFixups::fixStyles(const DOM::Document doc) +{ + DOM::NodeList styles = doc.getElementsByTagName(kElStyle); + if(styles != NULL) + { + DOM::NodeList blocks = doc.getElementsByTagName(kElBlock); + if(blocks != NULL) + { + for(int i = 0; i < blocks->getLength(); i++) + { + DOM::Element block = (const DOM::Element&)blocks->item(i); + + if(block == NULL || !block.hasAttribute(kElStyle)) + continue; + + for(int j = 0; j < styles->getLength(); j++) + { + DOM::Element style = (const DOM::Element&)styles->item(j); + if(style != NULL) + { + if(style.getAttribute(kAtId) == block.getAttribute(kElStyle)) + { + wstring name = style.getAttribute(kAtName); + if(name.length() > 0) + block.setAttribute(kElStyle, name); + } + } + } + } + } + + for(int i = 0; i < styles->getLength(); i++) + { + DOM::Element style = (const DOM::Element&)styles->item(i); + if(style != NULL) + style.removeAttribute(kAtId); + } + } + + +} + + +void RtfFixups::breakTables(DOM::Document& doc) +{ + DOM::NodeList rows = doc.getElementsByTagName(kElRow); + if(rows != NULL) + { + for(int i = 0; i < rows->getLength(); i++) + { + DOM::Element row = (const DOM::Element&)rows->item(i); + DOM::Node parent = row.getParentNode(); + + if(parent == NULL) + continue; + + if(DOMHelpers::isElement(parent, kElBlock)) + { + DOM::Node grandparent = parent.getParentNode(); + + if(grandparent != NULL && !row.hasChildNodes()) + { + if(row.getPreviousSibling() == NULL) + grandparent.insertBefore(parent.removeChild(row), parent); + else if(row.getNextSibling() == NULL) + DOMHelpers::insertAfter(grandparent, parent.removeChild(row), parent); + } + } + + breakElement(row, kElDest); + } + } + + + + // Now group stuff in destinations into tables + DOM::NodeList destinations = doc.getElementsByTagName(kElDest); + if(destinations != NULL) + { + for(int i = 0; i < destinations->getLength(); i++) + { + DOM::Element dest = (const DOM::Element&)destinations->item(i); + + // Sanity Check + if(dest == NULL) + continue; + + // Go through the children of this destination + DOM::Node child = dest.getFirstChild(); + + DOM::Element table; + DOM::Element e; + + while(child != NULL) + { + // If it's a block and has a cell attribute + if(DOMHelpers::isElement(child, kElBlock)) + { + e = (DOM::Element&)child; + + // if it has a cell attribute + if(e.getAttribute(kAtCell).length() > 0) + { + e.removeAttribute(kAtCell); + + if(table == NULL) + { + table = doc.createElement(kElTable); + dest.insertBefore(table, child); + } + } + else + { + table = NULL; + } + } + + // It's not a block + if(table != NULL) + { + table.appendChild(dest.removeChild(child)); + child = table; + } + + child = child.getNextSibling(); + } + } + } +} + +void RtfFixups::removeTags(const DOM::Document& doc) +{ + // Go through the list of nodes + for(const char** t = kRemoveTags; *t != NULL; t++) + { + DOM::NodeList elements = doc.getElementsByTagName(*t); + if(elements != NULL) + { + for(int j = 0; j < elements->getLength(); j++) + { + DOM::Element el = (const DOM::Element&)elements->item(j); + DOM::Node parent = el->getParentNode(); + + if(parent == NULL) + continue; + + while(el.hasChildNodes()) + parent.insertBefore(el.removeChild(el.getFirstChild()), el); + + parent.removeChild(el); + } + } + } +} + +void RtfFixups::fixLists(const DOM::Document doc) +{ + DOM::NodeList lists = doc.getElementsByTagName(kElList); + if(lists != NULL) + { + DOM::NodeList listdefs = doc.getElementsByTagName(kElListdef); + if(listdefs != NULL) + { + for(int i = 0; i < listdefs->getLength(); i++) + { + DOM::Element listdef = (const DOM::Element&)listdefs->item(i); + + if(listdef == NULL || !listdef.hasAttribute(kAtList)) + continue; + + for(int j = 0; j < lists->getLength(); j++) + { + DOM::Element list = (const DOM::Element&)lists->item(j); + if(list != NULL) + { + if(list.getAttribute(kAtList) == listdef.getAttribute(kAtList)) + { + DOMHelpers::copyAttributes(listdef, list, kHideList); + list.removeAttribute(kAtList); + } + } + } + } + } + } +} + +void RtfFixups::fixBlocks(const DOM::Document doc) +{ + // First break out all the paragraphs to the destination level + DOM::NodeList blocks = doc.getElementsByTagName(kElBlock); + if(blocks != NULL) + { + string fix; + wstring val; + + for(int i = 0; i < blocks->getLength(); i++) + { + DOM::Element block = (const DOM::Element&)blocks->item(i); + DOM::Node parent = block.getParentNode(); + + if(parent == NULL) + continue; + + fix.resize(0); + val.resize(0); + + val = block.getAttribute(kAtFix); + if(val.length() > 0) + block.removeAttribute(kAtFix); + + + if(val.length() > 0) + { + val = block.getAttributeNS("", kAtFix); + if(val.length() > 0) + block.removeAttributeNS("", kAtFix); + } + + if(val.length() > 0) + DOM::transcode16to8(val, fix); + + if(fix.length() == 0) + fix = kElPara; + + DOM::Element el = doc.createElement(fix); + DOMHelpers::copyAttributes(block, el, NULL); + + while(block.hasChildNodes()) + el.appendChild(block.removeChild(block.getFirstChild())); + + parent.replaceChild(el, block); + } + } +} + + +/** + * Removes adjacent duplicate nodes of certain names + */ +void RtfFixups::removeDuplicates(const DOM::Document& doc) +{ + // Go through the list of nodes + for(const char** t = kNoDuplicates; *t = NULL; t++) + { + DOM::NodeList elements = doc.getElementsByTagName(*t); + if(elements != NULL) + { + int x = elements->getLength(); + for(int j = 0; j < elements->getLength(); j++) + { + + // Make sure it's a valid element + DOM::Element element = (const DOM::Element&)elements->item(j); + if(element == NULL) + continue; + + // Get neighbors + DOM::Node previous = element.getPreviousSibling(); + DOM::Node next = element.getNextSibling(); + + // Make sure it's still in the document, as we may have + // removed it on a previous loop + DOM::Node parent = element.getParentNode(); + if(parent == NULL) + continue; + + // Combine previous if valid + if(previous != NULL && previous.getNodeType() == DOM::Node::ELEMENT_NODE && + DOMHelpers::isEqualElement((DOM::Element&)previous, element)) + { + while(previous.hasChildNodes()) + { + DOM::Node child = previous.removeChild(previous.getLastChild()); + if(child != NULL) + { + if(element.hasChildNodes()) + element.insertBefore(child, element.getFirstChild()); + else + element.appendChild(child); + } + } + + // Remove duplicate node + parent.removeChild(previous); + } + + // Combine next if valid + if(next != NULL && next.getNodeType() == DOM::Node::ELEMENT_NODE && + DOMHelpers::isEqualElement((DOM::Element&)next, element)) + { + while(next.hasChildNodes()) + { + DOM::Node child = next.removeChild(next.getFirstChild()); + if(child != NULL) + element.appendChild(child); + } + + // Remove duplicate node + parent.removeChild(next); + } + } + } + } +} -- cgit v1.2.3