/* * Copyright (c) 2004, Nate Nielsen * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * * Redistributions of source code must retain the above * copyright notice, this list of conditions and the * following disclaimer. * * Redistributions in binary form must reproduce the * above copyright notice, this list of conditions and * the following disclaimer in the documentation and/or * other materials provided with the distribution. * * The names of contributors to this software may not be * used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * * * CONTRIBUTORS * Nate Nielsen * */ #include "usuals.h" #include "rtffixups.h" #include "domhelpers.h" #include "tags.h" static const char* kNoDuplicates[] = { kElB, kElU, kElI, kElColor, kElHide, kElColor, kElSuper, kElSub, NULL }; static const char* kRemoveTags[] = { kElDest, kElListdef, kElListtable, NULL }; static const char* kBlockTags[] = { kElTable, kElPara, NULL }; static const char* kHideList[] = { kAtId, kAtList, NULL }; static const char* kConsolidateEnd[] = { kElFootNote, NULL }; static const char* kConsolidateStart[] = { kElInfo, kElStylesheet, NULL }; void RtfFixups::breakBreak(DOM::Document& doc, const string& contain, const string& tag) { DOM::NodeList els = doc.getElementsByTagName(tag); if(els != NULL) { for(int i = 0; i < els->getLength(); i++) { DOM::Element el = (const DOM::Element&)els->item(i); #if 0 // See if parent node only has this break tag // in it. If so then replace parent with this DOM::Node parent = el.getParentNode(); if(parent != NULL) { DOM::Node grandparent = parent.getParentNode(); if(grandparent != NULL && el.getPreviousSibling() == NULL && el.getNextSibling() == NULL) { grandparent.replaceChild(parent.removeChild(el), parent); } } #endif breakElement(el, contain); } } } /** * Breaks a paragraph up through a previous level. Calls itself * recursively to break paragraphs totally free up to containing * destination. * * For example: * * * This is a * test of your concentration. * * * Becomes: * * * This is a * test of your concentration. * */ bool RtfFixups::breakElement(const DOM::Element& el, const string& contain) { ASSERT(el != NULL); DOM::Element parent = (const DOM::Element&)el.getParentNode(); DOM::Element grandparent; string s = el.getNodeName(); s = parent.getNodeName(); // Get the parent node if(parent != NULL) grandparent = (const DOM::Element&)parent.getParentNode(); // Make sure we have something to work with before continuing if(grandparent == NULL || parent == NULL || DOMHelpers::isElement(parent, contain)) return true; DOM::Node e; // Check to see if this is the first node in the parent. // If so then just move out to before if(el.getPreviousSibling() == NULL) { e = grandparent.insertBefore(parent.removeChild(el), parent); } // Check to see if this is the last node in the parent. // If so then just move out to after the parent else if(el.getNextSibling() == NULL) { DOM::Node next = parent.getNextSibling(); if(next == NULL) e = grandparent.appendChild(parent.removeChild(el)); else e = grandparent.insertBefore(parent.removeChild(el), next); } // Otherwise it's in the middle so split the parent // element etc... else { // Clone it but not deep DOM::Element parent2 = (const DOM::Element&)parent.cloneNode(false); if(parent2 == NULL) return false; // Flag that tells us whether we moved anything up to parent bool moved = false; // Now move all nodes after this one to the second parent. while((e = el.getNextSibling()) != NULL) { parent2.appendChild(parent.removeChild(e)); moved = true; } // Remove the element from it's parent e = parent.removeChild(el); // Okay now we move the paragraph up to the parent DOMHelpers::insertAfter(grandparent, e, parent); if(moved) DOMHelpers::insertAfter(grandparent, parent2, e); } // Now call it again with the paragraph in the new position // untill everything's cut through! return breakElement((DOM::Element&)e, contain); } /** * Changes from a marker based paragraph system to a contained * paragraph system. Also applies paragraph attributes to the * appropriate paragraph. * * For example: * * * This is a * test of your concentration. * * * Becomes: * * This is a * test of your concentration. */ void RtfFixups::breakBlocks(DOM::Document& document) { // First break out all the paragraphs to the destination level DOM::NodeList blocks = document.getElementsByTagName(kElBlock); if(blocks != NULL) { for(int i = 0; i < blocks->getLength(); i++) { DOM::Element block = (const DOM::Element&)blocks->item(i); // If it's the single closed style para then break it if(block != NULL && !block.hasChildNodes()) breakElement(block, kElDest); } } // Now group stuff in destinations into paras or other blocks DOM::NodeList destinations = document.getElementsByTagName(kElDest); if(destinations != NULL) { for(int i = 0; i < destinations->getLength(); i++) { DOM::Element dest = (const DOM::Element&)destinations->item(i); // Sanity Check if(dest == NULL || !dest.hasChildNodes()) continue; // Go through the children of this destination DOM::Node child = dest.getFirstChild(); DOM::Element block; while(child != NULL) { // If it's a block if(DOMHelpers::isElement(child, kElBlock)) { block = (DOM::Element&)child; child = child.getNextSibling(); continue; } // If it's already a real block element for(const char** t = kBlockTags; *t != NULL; t++) { if(DOMHelpers::isElement(child, *t)) { block = NULL; break; } } // If there's a block then add to it if(block != NULL) { block.appendChild(dest.removeChild(child)); child = block; } child = child.getNextSibling(); } } } } void RtfFixups::wrapTags(DOM::Document& doc, const string& tagName, const string& wrapName) { DOM::NodeList tags = doc.getElementsByTagName(tagName); if(tags != NULL) { for(int i = 0; i < tags->getLength(); i++) { DOM::Element tag = (const DOM::Element&)tags->item(i); DOM::Element wrap = doc.createElement(wrapName); while(tag.hasChildNodes()) wrap.appendChild(tag.removeChild(tag.getFirstChild())); tag.appendChild(wrap); } } } void RtfFixups::breakTags(DOM::Document& doc, const string& parentName, const string& tagName) { DOM::NodeList parents = doc.getElementsByTagName(parentName); if(parents != NULL) { for(int i = 0; i < parents->getLength(); i++) { DOM::Element parent = (const DOM::Element&)parents->item(i); if(!parent.hasChildNodes()) continue; DOM::NodeList tags = parent.getElementsByTagName(tagName); if(tags != NULL) { for(int i = 0; i < tags->getLength(); i++) breakElement((const DOM::Element&)tags->item(i), parentName); } DOM::Node tag = doc.createElement(tagName); parent.insertBefore(tag, parent.getFirstChild()); DOM::Node child = tag; while(child != NULL && (child = child.getNextSibling()) != NULL) { if(DOMHelpers::isElement(child, kElBlock)) { DOM::Node next = child.getNextSibling(); if(next == NULL) { parent.removeChild(child); continue; } if(DOMHelpers::isElement(next, tagName)) { DOM::Node twodown = next.getNextSibling(); if(!DOMHelpers::isElement(twodown, kElBlock)) { child = parent.insertBefore(parent.removeChild(next), child); } else { parent.removeChild(child); child = next; } } } if(DOMHelpers::isElement(child, tagName)) { if(!tag.hasChildNodes()) parent.removeChild(tag); tag = child; } else { tag.appendChild(parent.removeChild(child)); child = tag; } } if(!tag.hasChildNodes()) parent.removeChild(tag); } } DOM::NodeList tags = doc.getElementsByTagName(tagName); if(tags != NULL) { for(int i = 0; i < tags->getLength(); i++) { DOM::Element tag = (const DOM::Element&)tags->item(i); DOM::Node parent = tag.getParentNode(); if(parent != NULL && !DOMHelpers::isElement(parent, parentName)) parent.removeChild(tag); #if 0 else if(tag.hasChildNodes()) { DOM::NodeList children = tag.getChildNodes(); if(children != NULL && children->getLength() == 1) { DOM::Node child = children->item(0); if(child != NULL && !child.hasChildNodes() && DOMHelpers::isElement(child, kElBlock)) parent.removeChild(tag); } } #endif } } } void RtfFixups::breakLists(DOM::Document& doc) { // Now group stuff in destinations into tables DOM::NodeList destinations = doc.getElementsByTagName(kElDest); if(destinations != NULL) { for(int i = 0; i < destinations->getLength(); i++) { DOM::Element dest = (const DOM::Element&)destinations->item(i); // Sanity Check if(dest == NULL) continue; // Go through the children of this destination DOM::Node child = dest.getFirstChild(); DOM::Element list; DOM::Element e; wstring previd; while(child != NULL) { // If it's a block and has a cell attribute if(DOMHelpers::isElement(child, kElBlock)) { e = (DOM::Element&)child; // if it has a cell attribute wstring listid = e.getAttribute(kAtList); if(listid.length() > 0) { e.removeAttribute(kAtList); if(list == NULL || previd != listid) { list = doc.createElement(kElList); list.setAttribute(kAtList, listid); dest.insertBefore(list, child); previd = listid; } } else { list = NULL; previd.erase(); } } // It's not a block if(list != NULL) { list.appendChild(dest.removeChild(child)); child = list; } child = child.getNextSibling(); } } } } void RtfFixups::fixStyles(const DOM::Document doc) { DOM::NodeList styles = doc.getElementsByTagName(kElStyle); if(styles != NULL) { DOM::NodeList blocks = doc.getElementsByTagName(kElBlock); if(blocks != NULL) { for(int i = 0; i < blocks->getLength(); i++) { DOM::Element block = (const DOM::Element&)blocks->item(i); if(block == NULL || !block.hasAttribute(kElStyle)) continue; for(int j = 0; j < styles->getLength(); j++) { DOM::Element style = (const DOM::Element&)styles->item(j); if(style != NULL) { if(style.getAttribute(kAtId) == block.getAttribute(kElStyle)) { wstring name = style.getAttribute(kAtName); if(name.length() > 0) block.setAttribute(kElStyle, name); } } } } } for(int i = 0; i < styles->getLength(); i++) { DOM::Element style = (const DOM::Element&)styles->item(i); if(style != NULL) style.removeAttribute(kAtId); } } } void RtfFixups::breakTables(DOM::Document& doc) { DOM::NodeList rows = doc.getElementsByTagName(kElRow); if(rows != NULL) { for(int i = 0; i < rows->getLength(); i++) { DOM::Element row = (const DOM::Element&)rows->item(i); DOM::Node parent = row.getParentNode(); if(parent == NULL) continue; if(DOMHelpers::isElement(parent, kElBlock)) { DOM::Node grandparent = parent.getParentNode(); if(grandparent != NULL && !row.hasChildNodes()) { if(row.getPreviousSibling() == NULL) grandparent.insertBefore(parent.removeChild(row), parent); else if(row.getNextSibling() == NULL) DOMHelpers::insertAfter(grandparent, parent.removeChild(row), parent); } } breakElement(row, kElDest); } } // Now group stuff in destinations into tables DOM::NodeList destinations = doc.getElementsByTagName(kElDest); if(destinations != NULL) { for(int i = 0; i < destinations->getLength(); i++) { DOM::Element dest = (const DOM::Element&)destinations->item(i); // Sanity Check if(dest == NULL) continue; // Go through the children of this destination DOM::Node child = dest.getFirstChild(); DOM::Element table; DOM::Element e; while(child != NULL) { // If it's a block and has a cell attribute if(DOMHelpers::isElement(child, kElBlock)) { e = (DOM::Element&)child; // if it has a cell attribute if(e.getAttribute(kAtCell).length() > 0) { e.removeAttribute(kAtCell); if(table == NULL) { table = doc.createElement(kElTable); dest.insertBefore(table, child); } } else { table = NULL; } } // It's not a block if(table != NULL) { table.appendChild(dest.removeChild(child)); child = table; } child = child.getNextSibling(); } } } } void RtfFixups::removeTags(const DOM::Document& doc) { // Go through the list of nodes for(const char** t = kRemoveTags; *t != NULL; t++) { DOM::NodeList elements = doc.getElementsByTagName(*t); if(elements != NULL) { for(int j = 0; j < elements->getLength(); j++) { DOM::Element el = (const DOM::Element&)elements->item(j); DOM::Node parent = el->getParentNode(); if(parent == NULL) continue; while(el.hasChildNodes()) parent.insertBefore(el.removeChild(el.getFirstChild()), el); parent.removeChild(el); } } } } void RtfFixups::fixLists(const DOM::Document doc) { DOM::NodeList lists = doc.getElementsByTagName(kElList); if(lists != NULL) { DOM::NodeList listdefs = doc.getElementsByTagName(kElListdef); if(listdefs != NULL) { for(int i = 0; i < listdefs->getLength(); i++) { DOM::Element listdef = (const DOM::Element&)listdefs->item(i); if(listdef == NULL || !listdef.hasAttribute(kAtList)) continue; for(int j = 0; j < lists->getLength(); j++) { DOM::Element list = (const DOM::Element&)lists->item(j); if(list != NULL) { if(list.getAttribute(kAtList) == listdef.getAttribute(kAtList)) { DOMHelpers::copyAttributes(listdef, list, kHideList); list.removeAttribute(kAtList); } } } } } } } void RtfFixups::fixBlocks(const DOM::Document doc) { // First break out all the paragraphs to the destination level DOM::NodeList blocks = doc.getElementsByTagName(kElBlock); if(blocks != NULL) { string fix; wstring val; for(int i = 0; i < blocks->getLength(); i++) { DOM::Element block = (const DOM::Element&)blocks->item(i); DOM::Node parent = block.getParentNode(); if(parent == NULL) continue; fix.resize(0); val.resize(0); val = block.getAttribute(kAtFix); if(val.length() > 0) block.removeAttribute(kAtFix); if(val.length() > 0) { val = block.getAttributeNS("", kAtFix); if(val.length() > 0) block.removeAttributeNS("", kAtFix); } if(val.length() > 0) DOM::transcode16to8(val, fix); if(fix.length() == 0) fix = kElPara; DOM::Element el = doc.createElement(fix); DOMHelpers::copyAttributes(block, el, NULL); while(block.hasChildNodes()) el.appendChild(block.removeChild(block.getFirstChild())); parent.replaceChild(el, block); } } } /** * Consolidates a certain tag types at the end of the document */ void RtfFixups::consolidateEndTags(DOM::Document& doc) { DOM::Element top = doc.getDocumentElement(); ASSERT(top != NULL); for(const char** t = kConsolidateEnd; *t != NULL; t++) { DOM::NodeList elements = doc.getElementsByTagName(*t); if(elements != NULL) { int x = elements->getLength(); for(int j = 0; j < x; j++) { // Make sure it's a valid element DOM::Element element = (const DOM::Element&)elements->item(j); if(element == NULL) continue; DOM::Element parent = (const DOM::Element&)element.getParentNode(); if(parent == NULL) continue; // Remove it from it's child parent.removeChild(element); // And append it to the end of the document top.appendChild(element); } } } } /** * Consolidates a certain tag types at the start of the document */ void RtfFixups::consolidateStartTags(DOM::Document& doc) { DOM::Element top = doc.getDocumentElement(); ASSERT(top != NULL); DOM::Node first = top.getFirstChild(); for(const char** t = kConsolidateStart; *t != NULL; t++) { DOM::NodeList elements = doc.getElementsByTagName(*t); if(elements != NULL) { int x = elements->getLength(); for(int j = 0; j < x; j++) { // Make sure it's a valid element DOM::Element element = (const DOM::Element&)elements->item(j); if(element == NULL || element == first) continue; DOM::Element parent = (const DOM::Element&)element.getParentNode(); if(parent == NULL) continue; // Remove it from it's child parent.removeChild(element); // And put at start of the document of the document ASSERT(first != NULL); top.insertBefore(element, first); } } } } /** * Removes adjacent duplicate nodes of certain names */ void RtfFixups::removeDuplicates(const DOM::Document& doc) { bool found; do { found = false; // Go through the list of nodes for(const char** t = kNoDuplicates; *t != NULL; t++) { DOM::NodeList elements = doc.getElementsByTagName(*t); if(elements != NULL) { int x = elements->getLength(); for(int j = 0; j < x; j++) { // Make sure it's a valid element DOM::Element element = (const DOM::Element&)elements->item(j); if(element == NULL) continue; // Get neighbors DOM::Node previous = element.getPreviousSibling(); DOM::Node next = element.getNextSibling(); // Make sure it's still in the document, as we may have // removed it on a previous loop DOM::Node parent = element.getParentNode(); if(parent == NULL) continue; // Combine previous if valid if(previous != NULL && previous.getNodeType() == DOM::Node::ELEMENT_NODE && DOMHelpers::isEqualElement((DOM::Element&)previous, element)) { while(previous.hasChildNodes()) { DOM::Node child = previous.removeChild(previous.getLastChild()); if(child != NULL) { if(element.hasChildNodes()) element.insertBefore(child, element.getFirstChild()); else element.appendChild(child); } } // Remove duplicate node parent.removeChild(previous); found = true; } // Combine next if valid if(next != NULL && next.getNodeType() == DOM::Node::ELEMENT_NODE && DOMHelpers::isEqualElement((DOM::Element&)next, element)) { while(next.hasChildNodes()) { DOM::Node child = next.removeChild(next.getFirstChild()); if(child != NULL) element.appendChild(child); } // Remove duplicate node parent.removeChild(next); found = true; } } } } // Keep looping until no more duplicates found } while(found); }