summaryrefslogtreecommitdiff
path: root/src/xmlfixups.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/xmlfixups.cpp')
-rw-r--r--src/xmlfixups.cpp794
1 files changed, 0 insertions, 794 deletions
diff --git a/src/xmlfixups.cpp b/src/xmlfixups.cpp
deleted file mode 100644
index 3b56f6b..0000000
--- a/src/xmlfixups.cpp
+++ /dev/null
@@ -1,794 +0,0 @@
-/*
- * Copyright (c) 2004, Nate Nielsen
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the
- * following disclaimer.
- * * Redistributions in binary form must reproduce the
- * above copyright notice, this list of conditions and
- * the following disclaimer in the documentation and/or
- * other materials provided with the distribution.
- * * The names of contributors to this software may not be
- * used to endorse or promote products derived from this
- * software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
- * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- *
- *
- * CONTRIBUTORS
- * Nate Nielsen <nielsen@memberwebs.com>
- *
- */
-
-#include "usuals.h"
-#include "xmlfixups.h"
-#include "domhelpers.h"
-#include "tags.h"
-
-static const char* kNoDuplicates[] =
- { kElB, kElU, kElI, kElFont, kElHide, kElSuper, kElSub, NULL };
-
-static const char* kRequireAttrs[] =
- { kElFont, NULL };
-
-static const char* kRemoveTags[] =
- { kElDest, kElListdef, kElListtable, kElFontTable, kElFontDef, NULL };
-
-static const char* kRemoveEmpty[] =
- { kElOptions, kElList, NULL };
-
-static const char* kBlockTags[] =
- { kElTable, kElPara, NULL };
-
-static const char* kHideList[] =
- { kAtId, kAtList, NULL };
-
-static const char* kConsolidateEnd[] =
- { kElFootNote, NULL };
-
-static const char* kConsolidateStart[] =
- { kElStylesheet, kElInfo, NULL };
-
-void loadStringSet(StringSet& set, const char** strings)
-{
- while(*strings)
- set.insert(string(*(strings++)));
-}
-
-XmlFixups::XmlFixups()
-{
- loadStringSet(m_duplicates, kNoDuplicates);
- loadStringSet(m_removes, kRemoveTags);
- loadStringSet(m_removeEmpty, kRemoveEmpty);
- loadStringSet(m_requireAttrs, kRequireAttrs);
- loadStringSet(m_consolidateStart, kConsolidateStart);
- loadStringSet(m_consolidateEnd, kConsolidateEnd);
-}
-
-bool XmlFixups::breakElement(const DOM::Element& el, const string& contain)
-{
- ASSERT(el != NULL);
-
- DOM::Element parent = (const DOM::Element&)el.getParentNode();
- DOM::Element grandparent;
-
- string s = el.getNodeName();
- s = parent.getNodeName();
-
- // Get the parent node
- if(parent != NULL)
- grandparent = (const DOM::Element&)parent.getParentNode();
-
- // Make sure we have something to work with before continuing
- if(grandparent == NULL || parent == NULL ||
- DOMHelpers::isElement(parent, contain))
- return true;
-
- DOM::Node e;
-
- // Check to see if this is the first node in the parent.
- // If so then just move out to before
- if(el.getPreviousSibling() == NULL)
- {
- e = grandparent.insertBefore(parent.removeChild(el), parent);
- }
-
-
- // Check to see if this is the last node in the parent.
- // If so then just move out to after the parent
- else if(el.getNextSibling() == NULL)
- {
- DOM::Node next = parent.getNextSibling();
- if(next == NULL)
- e = grandparent.appendChild(parent.removeChild(el));
- else
- e = grandparent.insertBefore(parent.removeChild(el), next);
- }
-
-
- // Otherwise it's in the middle so split the parent
- // element etc...
- else
- {
- // Clone it but not deep
- DOM::Element parent2 = (const DOM::Element&)parent.cloneNode(false);
-
- if(parent2 == NULL)
- return false;
-
- // Flag that tells us whether we moved anything up to parent
- bool moved = false;
-
- // Now move all nodes after this one to the second parent.
- while((e = el.getNextSibling()) != NULL)
- {
- parent2.appendChild(parent.removeChild(e));
- moved = true;
- }
-
- // Remove the element from it's parent
- e = parent.removeChild(el);
-
- // Okay now we move the paragraph up to the parent
- DOMHelpers::insertAfter(grandparent, e, parent);
- if(moved)
- DOMHelpers::insertAfter(grandparent, parent2, e);
- }
-
- // Now call it again with the paragraph in the new position
- // until everything's cut through!
- return breakElement((DOM::Element&)e, contain);
-}
-
-void XmlFixups::breakBlocks(DOM::Document& document)
-{
- // First break out all the paragraphs to the destination level
- DOM::NodeList blocks = document.getElementsByTagName(kElBlock);
- if(blocks != NULL)
- {
- for(int i = 0; i < blocks->getLength(); i++)
- {
- DOM::Element block = (const DOM::Element&)blocks->item(i);
-
- // If it's the single closed style para then break it
- if(block != NULL && !block.hasChildNodes())
- breakElement(block, kElDest);
- }
- }
-
-
- // Now group stuff in destinations into paras or other blocks
- DOM::NodeList destinations = document.getElementsByTagName(kElDest);
- if(destinations != NULL)
- {
- for(int i = 0; i < destinations->getLength(); i++)
- {
- DOM::Element dest = (const DOM::Element&)destinations->item(i);
-
- // Sanity Check
- if(dest == NULL || !dest.hasChildNodes())
- continue;
-
- // Go through the children of this destination
- DOM::Node child = dest.getFirstChild();
-
- DOM::Element block;
-
- while(child != NULL)
- {
- // If it's a block
- if(DOMHelpers::isElement(child, kElBlock))
- {
- block = (DOM::Element&)child;
- child = child.getNextSibling();
- continue;
- }
-
- // If it's already a real block element
- for(const char** t = kBlockTags; *t != NULL; t++)
- {
- if(DOMHelpers::isElement(child, *t))
- {
- block = NULL;
- break;
- }
- }
-
- // If there's a block then add to it
- if(block != NULL)
- {
- block.appendChild(dest.removeChild(child));
- child = block;
- }
-
- child = child.getNextSibling();
- }
- }
- }
-}
-
-void XmlFixups::wrapTags(DOM::Document& doc, const string& tagName,
- const string& wrapName)
-{
- DOM::NodeList tags = doc.getElementsByTagName(tagName);
- if(tags != NULL)
- {
- for(int i = 0; i < tags->getLength(); i++)
- {
- DOM::Element tag = (const DOM::Element&)tags->item(i);
-
- DOM::Element wrap = doc.createElement(wrapName);
- while(tag.hasChildNodes())
- wrap.appendChild(tag.removeChild(tag.getFirstChild()));
-
- tag.appendChild(wrap);
- }
- }
-}
-
-void XmlFixups::breakTags(DOM::Document& doc, const string& parentName,
- const string& tagName)
-{
- DOM::NodeList parents = doc.getElementsByTagName(parentName);
- if(parents != NULL)
- {
- for(int i = 0; i < parents->getLength(); i++)
- {
- DOM::Element parent = (const DOM::Element&)parents->item(i);
-
- if(!parent.hasChildNodes())
- continue;
-
- // First perform the breaks
- DOM::NodeList tags = parent.getElementsByTagName(tagName);
- if(tags != NULL)
- {
- for(int i = 0; i < tags->getLength(); i++)
- breakElement((const DOM::Element&)tags->item(i), parentName);
- }
-
- DOM::Node tag = doc.createElement(tagName);
- parent.insertBefore(tag, parent.getFirstChild());
-
- DOM::Node child = tag;
-
- while(child != NULL && (child = child.getNextSibling()) != NULL)
- {
- if(DOMHelpers::isElement(child, kElBlock))
- {
- DOM::Node next = child.getNextSibling();
- if(next == NULL)
- {
- parent.removeChild(child);
- continue;
- }
-
- if(DOMHelpers::isElement(next, tagName))
- {
- DOM::Node twodown = next.getNextSibling();
- if(!DOMHelpers::isElement(twodown, kElBlock))
- {
- child = parent.insertBefore(parent.removeChild(next), child);
- }
- else
- {
- parent.removeChild(child);
- child = next;
- }
- }
- }
-
- if(DOMHelpers::isElement(child, tagName))
- {
- if(!tag.hasChildNodes())
- parent.removeChild(tag);
- tag = child;
- }
- else
- {
- tag.appendChild(parent.removeChild(child));
- child = tag;
- }
- }
-
- if(!tag.hasChildNodes())
- parent.removeChild(tag);
- }
- }
-
- DOM::NodeList tags = doc.getElementsByTagName(tagName);
- if(tags != NULL)
- {
- for(int i = 0; i < tags->getLength(); i++)
- {
- DOM::Element tag = (const DOM::Element&)tags->item(i);
- DOM::Node parent = tag.getParentNode();
-
- if(parent != NULL && !DOMHelpers::isElement(parent, parentName))
- parent.removeChild(tag);
- }
- }
-}
-
-void XmlFixups::breakLists(DOM::Document& doc)
-{
- DOM::NodeList destinations = doc.getElementsByTagName(kElDest);
- if(destinations != NULL)
- {
- for(int i = 0; i < destinations->getLength(); i++)
- {
- DOM::Element dest = (const DOM::Element&)destinations->item(i);
-
- // Sanity Check
- if(dest == NULL)
- continue;
-
- // Go through the children of this destination
- DOM::Node child = dest.getFirstChild();
-
- DOM::Element list;
- DOM::Element e;
-
- wstring previd;
-
- while(child != NULL)
- {
- // If it's a block ...
- if(DOMHelpers::isElement(child, kElBlock))
- {
- e = (DOM::Element&)child;
-
- // ... and has a list attribute
- wstring listid = e.getAttribute(kAtList);
- if(listid.length() > 0)
- {
- e.removeAttribute(kAtList);
-
- if(list == NULL || previd != listid)
- {
- list = doc.createElement(kElList);
- list.setAttribute(kAtList, listid);
- dest.insertBefore(list, child);
- previd = listid;
- }
- }
- else
- {
- list = NULL;
- previd.erase();
- }
- }
-
- // It's not a block
- if(list != NULL)
- {
- list.appendChild(dest.removeChild(child));
- child = list;
- }
-
- child = child.getNextSibling();
- }
- }
- }
-}
-
-void XmlFixups::runPassTwo(const DOM::Document& doc)
-{
- /*
- * Okay, this function is complicated and long. It was all broken up into
- * shorter functions previously but that sucked for efficiency. Basically
- * we want to iterate over the document as few times as possible and because
- * of that we combine all of that here.
- *
- * In this pass:
- * o Fix:
- * - font names
- * - style names
- * - list attributes
- * - block elements
- * o Consolidate certain tags to end of doc
- * o Consolidate certain tags to start of doc
- * o Combine duplicates of certain tags
- * o Remove certain tags
- * o Break out pages and sections
- */
-
- bool haveStyles = false;
- ElementTable styles;
-
- bool haveFonts = false;
- ElementTable fonts;
-
- bool haveLists = false;
- ElementTable lists;
-
- DOM::Element top = doc.getDocumentElement();
-
- NodeStack toStart; // Nodes that get moved to beginning of document
- NodeStack toEnd; // Nodes that get moved to the end of the document
-
- ElementIterator it(top);
- ElementIterator end;
-
- DOM::Element el;
-
- for( ; it != end; ++it)
- {
- el = *it;
-
- // Mark each node as we've seen it so we don't
- // do a given element twice
- if((int)el.getUserData() == PASS_TWO)
- continue;
-
- el.setUserData((void*)PASS_TWO);
- string name = el.getNodeName();
-
- // Get stylesheet block
- if(name == kElStylesheet)
- {
- // Load the styles into a id mapped table
- styles.load(el, kElStyle);
-
- if(!styles.empty())
- {
- styles.removeIds();
- haveStyles = true;
- }
- }
-
- // The Font Table
- else if(name == kElFontTable)
- {
- // Load the fonts into an id mapped table
- fonts.load(el, kElFontDef);
-
- if(!fonts.empty())
- {
- fonts.removeIds();
- haveFonts = true;
- }
- }
-
- // Get the list definition block
- else if(name == kElListtable)
- {
- // Load the lists into an id mapped table
- lists.load(el, kElListdef);
-
- if(!lists.empty())
- {
- lists.removeIds();
- haveLists = true;
- }
- }
-
- else if(name == kElBlock)
- {
- // Change style attribute on blocks to name
- if(haveStyles && el.hasAttribute(kElStyle))
- {
- DOM::Element style = styles.get(el.getAttribute(kElStyle));
- if(style != NULL)
- el.setAttribute(kElStyle, style.getAttribute(kAtName));
- }
-
- /*
- * The below function call replaces the current element with another
- * new element. The new element still needs to be processed, so we
- * just backup one, and then short circuit the loop below.
- */
- --it;
-
- // Now fix the block itself
- fixBlock(doc, el);
-
- continue; // Current element no longer valid
- }
-
- // Change id attribute on fonts to name
- else if(haveFonts && name == kElFont)
- {
- if(el.hasAttribute(kAtId))
- {
- DOM::Element font = fonts.get(el.getAttribute(kAtId));
- if(font != NULL)
- el.setAttribute(kAtName, font.getAttribute(kAtName));
-
- el.removeAttribute(kAtId);
- }
- }
-
- // Copy list attributes onto the lists
- else if(haveLists && name == kElList)
- {
- if(el.hasAttribute(kAtList))
- {
- DOM::Element list = lists.get(el.getAttribute(kAtList));
- if(list != NULL)
- {
- // And copy all the attributes from the list definition to the list
- DOMHelpers::copyAttributes(list, el, kHideList);
- el.removeAttribute(kAtList);
- }
- }
- }
-
- // Break out pages and sections all the way to document
- if(name == kElPage || name == kElSect)
- {
- breakElement(el, kElDoc);
-
- /*
- * NOTE: The flow of the document is changed here. But the current
- * element is still in a valid place for iterating over the document
- * so we don't have to worry about it.
- */
- }
-
- // Tags that get removed but contents preserved. Also here are
- // tags that get removed if they have no attributes
- if(m_removes.find(name) != m_removes.end() ||
- (m_requireAttrs.find(name) != m_requireAttrs.end() && !el.hasAttributes()))
- {
- DOM::Node parent = el->getParentNode();
-
- if(parent != NULL)
- {
- /*
- * After the element is removed, the current element is no longer
- * valid for iterating over the document. In addition we insert
- * all the child nodes of the current element before it. We need
- * to be sure to iterate over these elements, and to do so we
- * decrement the iterator.
- */
- --it;
-
- while(el.hasChildNodes())
- parent.insertBefore(el.removeChild(el.getFirstChild()), el);
-
- parent.removeChild(el);
- continue; /* Current element doesn't need any more processing */
- }
- }
-
- // Tags that get removed when no child nodes exist
- if(m_removeEmpty.find(name) != m_removeEmpty.end() && !el.hasChildNodes())
- {
- DOM::Node parent = el->getParentNode();
-
- if(parent != NULL)
- {
- /*
- * After the element is removed, the current element is no longer
- * valid for iterating over the document. In addition we insert
- * all the child nodes of the current element before it. We need
- * to be sure to iterate over these elements, and to do so we
- * decrement the iterator.
- */
- --it;
-
- parent.removeChild(el);
- continue; /* Current element doesn't need any more processing */
- }
- }
-
- // Tags that need to get consolidated to start
- if(m_consolidateStart.find(name) != m_consolidateStart.end())
- toStart.push(el);
-
- // Tags that need to get consolidated to end
- else if(m_consolidateEnd.find(name) != m_consolidateEnd.end())
- toEnd.push(el);
-
-
- // Tags for which duplicates need to be combined
- if(m_duplicates.find(name) != m_duplicates.end())
- {
- DOM::Element parent = (const DOM::Element&)el.getParentNode();
- if(parent != NULL)
- {
- // Loop till we find no more of the same
- for(;;)
- {
- DOM::Node next = el.getNextSibling();
-
- if(next == NULL || next.getNodeType() != DOM::Node::ELEMENT_NODE)
- break;
-
- // If it's the same type of element ...
- if(!DOMHelpers::isEqualElement((DOM::Element&)next, el))
- break;
-
- // NOTE: Notice we do nothing with attributes. Currently
- // all elements in the duplicates list don't need that.
-
- while(next.hasChildNodes())
- el.appendChild(next.removeChild(next.getFirstChild()));
-
- // Remove duplicate node
- parent.removeChild(next);
- }
- }
- }
- }
-
- // Complete consolidation to front
- while(!toStart.empty())
- {
- DOM::Node node = toStart.top();
- DOM::Node parent = node.getParentNode();
- if(parent != NULL && DOMHelpers::hasAncestor(top, node))
- {
- // Remove it from it's child
- parent.removeChild(node);
-
- // And put at start of the document of the document
- top.insertBefore(node, top.getFirstChild());
- }
-
- toStart.pop();
- }
-
- // Complete consolidation to end
- while(!toEnd.empty())
- {
- DOM::Node node = toEnd.top();
- DOM::Node parent = node.getParentNode();
- if(parent != NULL && DOMHelpers::hasAncestor(top, node))
- {
- // Remove it from it's child
- parent.removeChild(node);
-
- // And put at end of the document of the document
- top.appendChild(node);
- }
-
- toEnd.pop();
- }
-
-}
-
-void XmlFixups::breakTables(DOM::Document& doc)
-{
- // Break rows out to destinations
- DOM::NodeList rows = doc.getElementsByTagName(kElRow);
- if(rows != NULL)
- {
- for(int i = 0; i < rows->getLength(); i++)
- {
- DOM::Element row = (const DOM::Element&)rows->item(i);
- DOM::Node parent = row.getParentNode();
-
- if(parent == NULL)
- continue;
-
- if(DOMHelpers::isElement(parent, kElBlock))
- {
- DOM::Node grandparent = parent.getParentNode();
-
- if(grandparent != NULL && !row.hasChildNodes())
- {
- if(row.getPreviousSibling() == NULL)
- grandparent.insertBefore(parent.removeChild(row), parent);
- else if(row.getNextSibling() == NULL)
- DOMHelpers::insertAfter(grandparent, parent.removeChild(row), parent);
- }
- }
-
- breakElement(row, kElDest);
- }
- }
-
- // Now group stuff in destinations into tables
- DOM::NodeList destinations = doc.getElementsByTagName(kElDest);
- if(destinations != NULL)
- {
- for(int i = 0; i < destinations->getLength(); i++)
- {
- DOM::Element dest = (const DOM::Element&)destinations->item(i);
-
- // Sanity Check
- if(dest == NULL)
- continue;
-
- // Go through the children of this destination
- DOM::Node child = dest.getFirstChild();
-
- DOM::Element table;
- DOM::Element e;
-
- while(child != NULL)
- {
- // If it's a block and has a cell attribute
- if(DOMHelpers::isElement(child, kElBlock))
- {
- e = (DOM::Element&)child;
-
- // if it has a cell attribute
- if(e.getAttribute(kAtCell).length() > 0)
- {
- e.removeAttribute(kAtCell);
-
- if(table == NULL)
- {
- table = doc.createElement(kElTable);
- dest.insertBefore(table, child);
- }
- }
- else
- {
- table = NULL;
- }
- }
-
- // It's not a block
- if(table != NULL)
- {
- table.appendChild(dest.removeChild(child));
- child = table;
- }
-
- child = child.getNextSibling();
- }
- }
- }
-}
-
-
-void XmlFixups::fixBlock(const DOM::Document& doc, DOM::Element& block)
-{
- // Okay now change blocks to whatever element they're supposed to be
- string fix;
- wstring val;
-
- DOM::Node parent = block.getParentNode();
-
- if(parent != NULL)
- {
- // Figure out what kind of element they want block fixed to
- val = block.getAttribute(kAtFix);
- if(val.length() > 0)
- block.removeAttribute(kAtFix);
-
- // BUG: Sablotron bug work around
- if(val.length() == 0)
- {
- val = block.getAttributeNS("", kAtFix);
- if(val.length() > 0)
- block.removeAttributeNS("", kAtFix);
- }
-
- if(val.length() > 0)
- DOM::transcode16to8(val, fix);
-
- if(fix.length() == 0)
- fix = kElPara;
-
- // Create duplicate of the 'fix' element
- DOM::Element el = doc.createElement(fix);
- DOMHelpers::copyAttributes(block, el, NULL);
-
- // Replace block with the given 'fix' element
- while(block.hasChildNodes())
- el.appendChild(block.removeChild(block.getFirstChild()));
-
- parent.replaceChild(el, block);
- }
-}