From 507524b97ef3bedb42f6c15ec93eedff8ee4b150 Mon Sep 17 00:00:00 2001 From: "(no author)" <(no author)> Date: Wed, 26 Nov 2003 02:12:18 +0000 Subject: New repository initialized by cvs2svn. --- src/xmlfixups.cpp | 794 ------------------------------------------------------ 1 file changed, 794 deletions(-) delete mode 100644 src/xmlfixups.cpp (limited to 'src/xmlfixups.cpp') diff --git a/src/xmlfixups.cpp b/src/xmlfixups.cpp deleted file mode 100644 index 3b56f6b..0000000 --- a/src/xmlfixups.cpp +++ /dev/null @@ -1,794 +0,0 @@ -/* - * Copyright (c) 2004, Nate Nielsen - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above - * copyright notice, this list of conditions and the - * following disclaimer. - * * Redistributions in binary form must reproduce the - * above copyright notice, this list of conditions and - * the following disclaimer in the documentation and/or - * other materials provided with the distribution. - * * The names of contributors to this software may not be - * used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF - * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH - * DAMAGE. - * - * - * CONTRIBUTORS - * Nate Nielsen - * - */ - -#include "usuals.h" -#include "xmlfixups.h" -#include "domhelpers.h" -#include "tags.h" - -static const char* kNoDuplicates[] = - { kElB, kElU, kElI, kElFont, kElHide, kElSuper, kElSub, NULL }; - -static const char* kRequireAttrs[] = - { kElFont, NULL }; - -static const char* kRemoveTags[] = - { kElDest, kElListdef, kElListtable, kElFontTable, kElFontDef, NULL }; - -static const char* kRemoveEmpty[] = - { kElOptions, kElList, NULL }; - -static const char* kBlockTags[] = - { kElTable, kElPara, NULL }; - -static const char* kHideList[] = - { kAtId, kAtList, NULL }; - -static const char* kConsolidateEnd[] = - { kElFootNote, NULL }; - -static const char* kConsolidateStart[] = - { kElStylesheet, kElInfo, NULL }; - -void loadStringSet(StringSet& set, const char** strings) -{ - while(*strings) - set.insert(string(*(strings++))); -} - -XmlFixups::XmlFixups() -{ - loadStringSet(m_duplicates, kNoDuplicates); - loadStringSet(m_removes, kRemoveTags); - loadStringSet(m_removeEmpty, kRemoveEmpty); - loadStringSet(m_requireAttrs, kRequireAttrs); - loadStringSet(m_consolidateStart, kConsolidateStart); - loadStringSet(m_consolidateEnd, kConsolidateEnd); -} - -bool XmlFixups::breakElement(const DOM::Element& el, const string& contain) -{ - ASSERT(el != NULL); - - DOM::Element parent = (const DOM::Element&)el.getParentNode(); - DOM::Element grandparent; - - string s = el.getNodeName(); - s = parent.getNodeName(); - - // Get the parent node - if(parent != NULL) - grandparent = (const DOM::Element&)parent.getParentNode(); - - // Make sure we have something to work with before continuing - if(grandparent == NULL || parent == NULL || - DOMHelpers::isElement(parent, contain)) - return true; - - DOM::Node e; - - // Check to see if this is the first node in the parent. - // If so then just move out to before - if(el.getPreviousSibling() == NULL) - { - e = grandparent.insertBefore(parent.removeChild(el), parent); - } - - - // Check to see if this is the last node in the parent. - // If so then just move out to after the parent - else if(el.getNextSibling() == NULL) - { - DOM::Node next = parent.getNextSibling(); - if(next == NULL) - e = grandparent.appendChild(parent.removeChild(el)); - else - e = grandparent.insertBefore(parent.removeChild(el), next); - } - - - // Otherwise it's in the middle so split the parent - // element etc... - else - { - // Clone it but not deep - DOM::Element parent2 = (const DOM::Element&)parent.cloneNode(false); - - if(parent2 == NULL) - return false; - - // Flag that tells us whether we moved anything up to parent - bool moved = false; - - // Now move all nodes after this one to the second parent. - while((e = el.getNextSibling()) != NULL) - { - parent2.appendChild(parent.removeChild(e)); - moved = true; - } - - // Remove the element from it's parent - e = parent.removeChild(el); - - // Okay now we move the paragraph up to the parent - DOMHelpers::insertAfter(grandparent, e, parent); - if(moved) - DOMHelpers::insertAfter(grandparent, parent2, e); - } - - // Now call it again with the paragraph in the new position - // until everything's cut through! - return breakElement((DOM::Element&)e, contain); -} - -void XmlFixups::breakBlocks(DOM::Document& document) -{ - // First break out all the paragraphs to the destination level - DOM::NodeList blocks = document.getElementsByTagName(kElBlock); - if(blocks != NULL) - { - for(int i = 0; i < blocks->getLength(); i++) - { - DOM::Element block = (const DOM::Element&)blocks->item(i); - - // If it's the single closed style para then break it - if(block != NULL && !block.hasChildNodes()) - breakElement(block, kElDest); - } - } - - - // Now group stuff in destinations into paras or other blocks - DOM::NodeList destinations = document.getElementsByTagName(kElDest); - if(destinations != NULL) - { - for(int i = 0; i < destinations->getLength(); i++) - { - DOM::Element dest = (const DOM::Element&)destinations->item(i); - - // Sanity Check - if(dest == NULL || !dest.hasChildNodes()) - continue; - - // Go through the children of this destination - DOM::Node child = dest.getFirstChild(); - - DOM::Element block; - - while(child != NULL) - { - // If it's a block - if(DOMHelpers::isElement(child, kElBlock)) - { - block = (DOM::Element&)child; - child = child.getNextSibling(); - continue; - } - - // If it's already a real block element - for(const char** t = kBlockTags; *t != NULL; t++) - { - if(DOMHelpers::isElement(child, *t)) - { - block = NULL; - break; - } - } - - // If there's a block then add to it - if(block != NULL) - { - block.appendChild(dest.removeChild(child)); - child = block; - } - - child = child.getNextSibling(); - } - } - } -} - -void XmlFixups::wrapTags(DOM::Document& doc, const string& tagName, - const string& wrapName) -{ - DOM::NodeList tags = doc.getElementsByTagName(tagName); - if(tags != NULL) - { - for(int i = 0; i < tags->getLength(); i++) - { - DOM::Element tag = (const DOM::Element&)tags->item(i); - - DOM::Element wrap = doc.createElement(wrapName); - while(tag.hasChildNodes()) - wrap.appendChild(tag.removeChild(tag.getFirstChild())); - - tag.appendChild(wrap); - } - } -} - -void XmlFixups::breakTags(DOM::Document& doc, const string& parentName, - const string& tagName) -{ - DOM::NodeList parents = doc.getElementsByTagName(parentName); - if(parents != NULL) - { - for(int i = 0; i < parents->getLength(); i++) - { - DOM::Element parent = (const DOM::Element&)parents->item(i); - - if(!parent.hasChildNodes()) - continue; - - // First perform the breaks - DOM::NodeList tags = parent.getElementsByTagName(tagName); - if(tags != NULL) - { - for(int i = 0; i < tags->getLength(); i++) - breakElement((const DOM::Element&)tags->item(i), parentName); - } - - DOM::Node tag = doc.createElement(tagName); - parent.insertBefore(tag, parent.getFirstChild()); - - DOM::Node child = tag; - - while(child != NULL && (child = child.getNextSibling()) != NULL) - { - if(DOMHelpers::isElement(child, kElBlock)) - { - DOM::Node next = child.getNextSibling(); - if(next == NULL) - { - parent.removeChild(child); - continue; - } - - if(DOMHelpers::isElement(next, tagName)) - { - DOM::Node twodown = next.getNextSibling(); - if(!DOMHelpers::isElement(twodown, kElBlock)) - { - child = parent.insertBefore(parent.removeChild(next), child); - } - else - { - parent.removeChild(child); - child = next; - } - } - } - - if(DOMHelpers::isElement(child, tagName)) - { - if(!tag.hasChildNodes()) - parent.removeChild(tag); - tag = child; - } - else - { - tag.appendChild(parent.removeChild(child)); - child = tag; - } - } - - if(!tag.hasChildNodes()) - parent.removeChild(tag); - } - } - - DOM::NodeList tags = doc.getElementsByTagName(tagName); - if(tags != NULL) - { - for(int i = 0; i < tags->getLength(); i++) - { - DOM::Element tag = (const DOM::Element&)tags->item(i); - DOM::Node parent = tag.getParentNode(); - - if(parent != NULL && !DOMHelpers::isElement(parent, parentName)) - parent.removeChild(tag); - } - } -} - -void XmlFixups::breakLists(DOM::Document& doc) -{ - DOM::NodeList destinations = doc.getElementsByTagName(kElDest); - if(destinations != NULL) - { - for(int i = 0; i < destinations->getLength(); i++) - { - DOM::Element dest = (const DOM::Element&)destinations->item(i); - - // Sanity Check - if(dest == NULL) - continue; - - // Go through the children of this destination - DOM::Node child = dest.getFirstChild(); - - DOM::Element list; - DOM::Element e; - - wstring previd; - - while(child != NULL) - { - // If it's a block ... - if(DOMHelpers::isElement(child, kElBlock)) - { - e = (DOM::Element&)child; - - // ... and has a list attribute - wstring listid = e.getAttribute(kAtList); - if(listid.length() > 0) - { - e.removeAttribute(kAtList); - - if(list == NULL || previd != listid) - { - list = doc.createElement(kElList); - list.setAttribute(kAtList, listid); - dest.insertBefore(list, child); - previd = listid; - } - } - else - { - list = NULL; - previd.erase(); - } - } - - // It's not a block - if(list != NULL) - { - list.appendChild(dest.removeChild(child)); - child = list; - } - - child = child.getNextSibling(); - } - } - } -} - -void XmlFixups::runPassTwo(const DOM::Document& doc) -{ - /* - * Okay, this function is complicated and long. It was all broken up into - * shorter functions previously but that sucked for efficiency. Basically - * we want to iterate over the document as few times as possible and because - * of that we combine all of that here. - * - * In this pass: - * o Fix: - * - font names - * - style names - * - list attributes - * - block elements - * o Consolidate certain tags to end of doc - * o Consolidate certain tags to start of doc - * o Combine duplicates of certain tags - * o Remove certain tags - * o Break out pages and sections - */ - - bool haveStyles = false; - ElementTable styles; - - bool haveFonts = false; - ElementTable fonts; - - bool haveLists = false; - ElementTable lists; - - DOM::Element top = doc.getDocumentElement(); - - NodeStack toStart; // Nodes that get moved to beginning of document - NodeStack toEnd; // Nodes that get moved to the end of the document - - ElementIterator it(top); - ElementIterator end; - - DOM::Element el; - - for( ; it != end; ++it) - { - el = *it; - - // Mark each node as we've seen it so we don't - // do a given element twice - if((int)el.getUserData() == PASS_TWO) - continue; - - el.setUserData((void*)PASS_TWO); - string name = el.getNodeName(); - - // Get stylesheet block - if(name == kElStylesheet) - { - // Load the styles into a id mapped table - styles.load(el, kElStyle); - - if(!styles.empty()) - { - styles.removeIds(); - haveStyles = true; - } - } - - // The Font Table - else if(name == kElFontTable) - { - // Load the fonts into an id mapped table - fonts.load(el, kElFontDef); - - if(!fonts.empty()) - { - fonts.removeIds(); - haveFonts = true; - } - } - - // Get the list definition block - else if(name == kElListtable) - { - // Load the lists into an id mapped table - lists.load(el, kElListdef); - - if(!lists.empty()) - { - lists.removeIds(); - haveLists = true; - } - } - - else if(name == kElBlock) - { - // Change style attribute on blocks to name - if(haveStyles && el.hasAttribute(kElStyle)) - { - DOM::Element style = styles.get(el.getAttribute(kElStyle)); - if(style != NULL) - el.setAttribute(kElStyle, style.getAttribute(kAtName)); - } - - /* - * The below function call replaces the current element with another - * new element. The new element still needs to be processed, so we - * just backup one, and then short circuit the loop below. - */ - --it; - - // Now fix the block itself - fixBlock(doc, el); - - continue; // Current element no longer valid - } - - // Change id attribute on fonts to name - else if(haveFonts && name == kElFont) - { - if(el.hasAttribute(kAtId)) - { - DOM::Element font = fonts.get(el.getAttribute(kAtId)); - if(font != NULL) - el.setAttribute(kAtName, font.getAttribute(kAtName)); - - el.removeAttribute(kAtId); - } - } - - // Copy list attributes onto the lists - else if(haveLists && name == kElList) - { - if(el.hasAttribute(kAtList)) - { - DOM::Element list = lists.get(el.getAttribute(kAtList)); - if(list != NULL) - { - // And copy all the attributes from the list definition to the list - DOMHelpers::copyAttributes(list, el, kHideList); - el.removeAttribute(kAtList); - } - } - } - - // Break out pages and sections all the way to document - if(name == kElPage || name == kElSect) - { - breakElement(el, kElDoc); - - /* - * NOTE: The flow of the document is changed here. But the current - * element is still in a valid place for iterating over the document - * so we don't have to worry about it. - */ - } - - // Tags that get removed but contents preserved. Also here are - // tags that get removed if they have no attributes - if(m_removes.find(name) != m_removes.end() || - (m_requireAttrs.find(name) != m_requireAttrs.end() && !el.hasAttributes())) - { - DOM::Node parent = el->getParentNode(); - - if(parent != NULL) - { - /* - * After the element is removed, the current element is no longer - * valid for iterating over the document. In addition we insert - * all the child nodes of the current element before it. We need - * to be sure to iterate over these elements, and to do so we - * decrement the iterator. - */ - --it; - - while(el.hasChildNodes()) - parent.insertBefore(el.removeChild(el.getFirstChild()), el); - - parent.removeChild(el); - continue; /* Current element doesn't need any more processing */ - } - } - - // Tags that get removed when no child nodes exist - if(m_removeEmpty.find(name) != m_removeEmpty.end() && !el.hasChildNodes()) - { - DOM::Node parent = el->getParentNode(); - - if(parent != NULL) - { - /* - * After the element is removed, the current element is no longer - * valid for iterating over the document. In addition we insert - * all the child nodes of the current element before it. We need - * to be sure to iterate over these elements, and to do so we - * decrement the iterator. - */ - --it; - - parent.removeChild(el); - continue; /* Current element doesn't need any more processing */ - } - } - - // Tags that need to get consolidated to start - if(m_consolidateStart.find(name) != m_consolidateStart.end()) - toStart.push(el); - - // Tags that need to get consolidated to end - else if(m_consolidateEnd.find(name) != m_consolidateEnd.end()) - toEnd.push(el); - - - // Tags for which duplicates need to be combined - if(m_duplicates.find(name) != m_duplicates.end()) - { - DOM::Element parent = (const DOM::Element&)el.getParentNode(); - if(parent != NULL) - { - // Loop till we find no more of the same - for(;;) - { - DOM::Node next = el.getNextSibling(); - - if(next == NULL || next.getNodeType() != DOM::Node::ELEMENT_NODE) - break; - - // If it's the same type of element ... - if(!DOMHelpers::isEqualElement((DOM::Element&)next, el)) - break; - - // NOTE: Notice we do nothing with attributes. Currently - // all elements in the duplicates list don't need that. - - while(next.hasChildNodes()) - el.appendChild(next.removeChild(next.getFirstChild())); - - // Remove duplicate node - parent.removeChild(next); - } - } - } - } - - // Complete consolidation to front - while(!toStart.empty()) - { - DOM::Node node = toStart.top(); - DOM::Node parent = node.getParentNode(); - if(parent != NULL && DOMHelpers::hasAncestor(top, node)) - { - // Remove it from it's child - parent.removeChild(node); - - // And put at start of the document of the document - top.insertBefore(node, top.getFirstChild()); - } - - toStart.pop(); - } - - // Complete consolidation to end - while(!toEnd.empty()) - { - DOM::Node node = toEnd.top(); - DOM::Node parent = node.getParentNode(); - if(parent != NULL && DOMHelpers::hasAncestor(top, node)) - { - // Remove it from it's child - parent.removeChild(node); - - // And put at end of the document of the document - top.appendChild(node); - } - - toEnd.pop(); - } - -} - -void XmlFixups::breakTables(DOM::Document& doc) -{ - // Break rows out to destinations - DOM::NodeList rows = doc.getElementsByTagName(kElRow); - if(rows != NULL) - { - for(int i = 0; i < rows->getLength(); i++) - { - DOM::Element row = (const DOM::Element&)rows->item(i); - DOM::Node parent = row.getParentNode(); - - if(parent == NULL) - continue; - - if(DOMHelpers::isElement(parent, kElBlock)) - { - DOM::Node grandparent = parent.getParentNode(); - - if(grandparent != NULL && !row.hasChildNodes()) - { - if(row.getPreviousSibling() == NULL) - grandparent.insertBefore(parent.removeChild(row), parent); - else if(row.getNextSibling() == NULL) - DOMHelpers::insertAfter(grandparent, parent.removeChild(row), parent); - } - } - - breakElement(row, kElDest); - } - } - - // Now group stuff in destinations into tables - DOM::NodeList destinations = doc.getElementsByTagName(kElDest); - if(destinations != NULL) - { - for(int i = 0; i < destinations->getLength(); i++) - { - DOM::Element dest = (const DOM::Element&)destinations->item(i); - - // Sanity Check - if(dest == NULL) - continue; - - // Go through the children of this destination - DOM::Node child = dest.getFirstChild(); - - DOM::Element table; - DOM::Element e; - - while(child != NULL) - { - // If it's a block and has a cell attribute - if(DOMHelpers::isElement(child, kElBlock)) - { - e = (DOM::Element&)child; - - // if it has a cell attribute - if(e.getAttribute(kAtCell).length() > 0) - { - e.removeAttribute(kAtCell); - - if(table == NULL) - { - table = doc.createElement(kElTable); - dest.insertBefore(table, child); - } - } - else - { - table = NULL; - } - } - - // It's not a block - if(table != NULL) - { - table.appendChild(dest.removeChild(child)); - child = table; - } - - child = child.getNextSibling(); - } - } - } -} - - -void XmlFixups::fixBlock(const DOM::Document& doc, DOM::Element& block) -{ - // Okay now change blocks to whatever element they're supposed to be - string fix; - wstring val; - - DOM::Node parent = block.getParentNode(); - - if(parent != NULL) - { - // Figure out what kind of element they want block fixed to - val = block.getAttribute(kAtFix); - if(val.length() > 0) - block.removeAttribute(kAtFix); - - // BUG: Sablotron bug work around - if(val.length() == 0) - { - val = block.getAttributeNS("", kAtFix); - if(val.length() > 0) - block.removeAttributeNS("", kAtFix); - } - - if(val.length() > 0) - DOM::transcode16to8(val, fix); - - if(fix.length() == 0) - fix = kElPara; - - // Create duplicate of the 'fix' element - DOM::Element el = doc.createElement(fix); - DOMHelpers::copyAttributes(block, el, NULL); - - // Replace block with the given 'fix' element - while(block.hasChildNodes()) - el.appendChild(block.removeChild(block.getFirstChild())); - - parent.replaceChild(el, block); - } -} -- cgit v1.2.3