summaryrefslogtreecommitdiff
path: root/src/xmlfixups.cpp
diff options
context:
space:
mode:
authorStef Walter <stef@memberwebs.com>2004-07-24 19:06:51 +0000
committerStef Walter <stef@memberwebs.com>2004-07-24 19:06:51 +0000
commit8335fdb6b7e7afb57d096e0f3a453b662f7a23c0 (patch)
treeef3c3079f58b44cb9f1b2953f05e1628d6846e9b /src/xmlfixups.cpp
parentff4568d01651afd615751f9fc683dbe30f2ced9b (diff)
- Post processing code cleanup.
Diffstat (limited to 'src/xmlfixups.cpp')
-rw-r--r--src/xmlfixups.cpp566
1 files changed, 271 insertions, 295 deletions
diff --git a/src/xmlfixups.cpp b/src/xmlfixups.cpp
index 707294d..b8c84f4 100644
--- a/src/xmlfixups.cpp
+++ b/src/xmlfixups.cpp
@@ -45,7 +45,7 @@ static const char* kNoDuplicates[] =
{ kElB, kElU, kElI, kElColor, kElHide, kElColor, kElSuper, kElSub, NULL };
static const char* kRemoveTags[] =
- { kElDest, kElListdef, kElListtable, NULL };
+ { kElDest, kElListdef, kElListtable, kElFontTable, NULL };
static const char* kBlockTags[] =
{ kElTable, kElPara, NULL };
@@ -59,19 +59,18 @@ static const char* kConsolidateEnd[] =
static const char* kConsolidateStart[] =
{ kElStylesheet, kElInfo, NULL };
+void loadStringSet(StringSet& set, const char** strings)
+{
+ while(*strings)
+ set.insert(string(*strings));
+}
-void XmlFixups::breakBreak(DOM::Document& doc, const string& contain,
- const string& tag)
+XmlFixups::XmlFixups()
{
- DOM::NodeList els = doc.getElementsByTagName(tag);
- if(els != NULL)
- {
- for(int i = 0; i < els->getLength(); i++)
- {
- DOM::Element el = (const DOM::Element&)els->item(i);
- breakElement(el, contain);
- }
- }
+ loadStringSet(m_duplicates, kNoDuplicates);
+ loadStringSet(m_removes, kRemoveTags);
+ loadStringSet(m_consolidateStart, kConsolidateStart);
+ loadStringSet(m_consolidateEnd, kConsolidateEnd);
}
bool XmlFixups::breakElement(const DOM::Element& el, const string& contain)
@@ -315,19 +314,6 @@ void XmlFixups::breakTags(DOM::Document& doc, const string& parentName,
if(parent != NULL && !DOMHelpers::isElement(parent, parentName))
parent.removeChild(tag);
-#if 0
- else if(tag.hasChildNodes())
- {
- DOM::NodeList children = tag.getChildNodes();
- if(children != NULL && children->getLength() == 1)
- {
- DOM::Node child = children->item(0);
- if(child != NULL && !child.hasChildNodes() &&
- DOMHelpers::isElement(child, kElBlock))
- parent.removeChild(tag);
- }
- }
-#endif
}
}
}
@@ -394,51 +380,255 @@ void XmlFixups::breakLists(DOM::Document& doc)
}
}
-void XmlFixups::fixStyles(const DOM::Document doc)
+void XmlFixups::runPassTwo(const DOM::Document& doc)
{
- // Get all stylesheet styles
- DOM::NodeList styles = doc.getElementsByTagName(kElStyle);
- if(styles != NULL)
+ /*
+ * Okay, this function is complicated and long. It was all broken up into
+ * shorter functions previously but that sucked for efficiency. Basically
+ * we want to iterate over the document as few times as possible and because
+ * of that we combine all of that here.
+ *
+ * In this pass:
+ * o Fix:
+ * - font names
+ * - style names
+ * - list attributes
+ * - block elements
+ * o Consolidate certain tags to end of doc
+ * o Consolidate certain tags to start of doc
+ * o Combine duplicates of certain tags
+ * o Remove certain tags
+ * o Break out pages and sections
+ */
+
+ bool haveStyles = false;
+ ElementTable styles;
+
+ bool haveFonts = false;
+ ElementTable fonts;
+
+ bool haveLists = false;
+ ElementTable lists;
+
+ DOM::Element top = doc.getDocumentElement();
+
+ // Get stylesheet block
+ DOM::Element el = DOMHelpers::getChildElement(top, kElStylesheet);
+ if(el != NULL)
+ {
+ // Load the styles into a id mapped table
+ styles.load(el, kElStyle);
+
+ if(!styles.empty())
+ {
+ styles.removeIds();
+ haveStyles = true;
+ }
+ }
+
+ // Get the font block
+ el = DOMHelpers::getChildElement(top, kElFontTable);
+ if(el != NULL)
+ {
+ // Load the fonts into an id mapped table
+ fonts.load(el, kElFont);
+
+ if(!fonts.empty())
+ {
+ fonts.removeIds();
+ haveFonts = true;
+ }
+ }
+
+ // Get the list definition block
+ el = DOMHelpers::getChildElement(top, kElListtable);
+ if(el != NULL)
{
- // Get list of blocks in the document
- DOM::NodeList blocks = doc.getElementsByTagName(kElBlock);
- if(blocks != NULL)
+ // Load the lists into an id mapped table
+ lists.load(el, kElListdef);
+
+ if(!lists.empty())
{
- for(int i = 0; i < blocks->getLength(); i++)
+ lists.removeIds();
+ haveLists = true;
+ }
+ }
+
+ NodeStack toStart; // Nodes that get moved to beginning of document
+ NodeStack toEnd; // Nodes that get moved to the end of the document
+
+ ElementIterator it(top);
+ ElementIterator end;
+
+ for( ; it != end; ++it)
+ {
+ el = *it;
+
+ // Mark each node as we've seen it so we don't
+ // do a given element twice
+ if((int)el.getUserData() == PASS_TWO)
+ continue;
+
+ el.setUserData((void*)PASS_TWO);
+ string name = el.getNodeName();
+
+ if(name == kElBlock)
+ {
+ // Change style attribute on blocks to name
+ if(haveStyles && el.hasAttribute(kElStyle))
{
- DOM::Element block = (const DOM::Element&)blocks->item(i);
+ DOM::Element style = styles.get(el.getAttribute(kElStyle));
+ if(style != NULL)
+ el.setAttribute(kElStyle, style.getAttribute(kAtName));
+ }
- if(block == NULL || !block.hasAttribute(kElStyle))
- continue;
+ /*
+ * The below function call replaces the current element with another
+ * new element. The new element still needs to be processed, so we
+ * just backup one, and then short circuit the loop below.
+ */
- // Lookup block styles
- for(int j = 0; j < styles->getLength(); j++)
+ // Now fix the block itself
+ fixBlock(doc, el);
+
+ continue; // Current element no longer valid
+ }
+
+ // Change id attribute on fonts to name
+ else if(haveFonts && name == kElFont)
+ {
+ if(el.hasAttribute(kAtId))
+ {
+ DOM::Element font = fonts.get(el.getAttribute(kAtId));
+ if(font != NULL)
+ el.setAttribute(kAtName, font.getAttribute(kAtName));
+ }
+ }
+
+ // Copy list attributes onto the lists
+ else if(haveLists && name == kElList)
+ {
+ if(el.hasAttribute(kAtList))
+ {
+ DOM::Element list = lists.get(el.getAttribute(kAtList));
+ if(list != NULL)
{
- DOM::Element style = (const DOM::Element&)styles->item(j);
- if(style != NULL)
- {
- if(style.getAttribute(kAtId) == block.getAttribute(kElStyle))
- {
- // And change to the name
- wstring name = style.getAttribute(kAtName);
- if(name.length() > 0)
- block.setAttribute(kElStyle, name);
- }
- }
+ // And copy all the attributes from the list definition to the list
+ DOMHelpers::copyAttributes(list, el, kHideList);
+ el.removeAttribute(kAtList);
}
}
}
- // A little cleanup of the stylesheet styles
- for(int i = 0; i < styles->getLength(); i++)
+ // Break out pages and sections all the way to document
+ if(name == kElPage || name == kElSect)
{
- DOM::Element style = (const DOM::Element&)styles->item(i);
- if(style != NULL)
- style.removeAttribute(kAtId);
+ breakElement(el, kElDoc);
+
+ /*
+ * NOTE: The flow of the document is changed here. But the current
+ * element is still in a valid place for iterating over the document
+ * so we don't have to worry about it.
+ */
}
- }
-}
+ // Tags that just plain get removed
+ if(m_removes.find(name) != m_removes.end())
+ {
+ DOM::Node parent = el->getParentNode();
+
+ if(parent != NULL)
+ {
+ /*
+ * After the element is removed, the current element is no longer
+ * valid for iterating over the document. In addition we insert
+ * all the child nodes of the current element before it. We need
+ * to be sure to iterate over these elements, and to do so we
+ * decrement the iterator.
+ */
+ --it;
+
+ while(el.hasChildNodes())
+ parent.insertBefore(el.removeChild(el.getFirstChild()), el);
+
+ parent.removeChild(el);
+ continue; /* Current element doesn't need any more processing */
+ }
+ }
+
+
+ // Tags that need to get consolidated to start
+ if(m_consolidateStart.find(name) != m_consolidateStart.end())
+ toStart.push(el);
+
+ // Tags that need to get consolidated to end
+ else if(m_consolidateEnd.find(name) != m_consolidateEnd.end())
+ toEnd.push(el);
+
+
+ // Tags for which duplicates need to be combined
+ if(m_duplicates.find(name) != m_duplicates.end())
+ {
+ DOM::Element parent = (const DOM::Element&)el.getParentNode();
+ if(parent != NULL)
+ {
+ // Loop till we find no more of the same
+ for(;;)
+ {
+ DOM::Node next = el.getNextSibling();
+
+ // If it's the same type of element ...
+ if(!DOMHelpers::isElement(next, name))
+ break;
+
+ // NOTE: Notice we do nothing with attributes. Currently
+ // all elements in the duplicates list don't need that.
+
+ while(next.hasChildNodes())
+ el.appendChild(next.removeChild(next.getFirstChild()));
+
+ // Remove duplicate node
+ parent.removeChild(next);
+ }
+ }
+ }
+ }
+
+ // Complete consolidation to front
+ while(!toStart.empty())
+ {
+ DOM::Node node = toStart.top();
+ DOM::Node parent = node.getParentNode();
+ if(parent != NULL && DOMHelpers::hasAncestor(top, node))
+ {
+ // Remove it from it's child
+ parent.removeChild(el);
+
+ // And put at start of the document of the document
+ top.insertBefore(el, top.getFirstChild());
+ }
+
+ toStart.pop();
+ }
+
+ // Complete consolidation to end
+ while(!toEnd.empty())
+ {
+ DOM::Node node = toEnd.top();
+ DOM::Node parent = node.getParentNode();
+ if(parent != NULL && DOMHelpers::hasAncestor(top, node))
+ {
+ // Remove it from it's child
+ parent.removeChild(el);
+
+ // And put at end of the document of the document
+ top.appendChild(el);
+ }
+
+ toEnd.pop();
+ }
+
+}
void XmlFixups::breakTables(DOM::Document& doc)
{
@@ -526,258 +716,44 @@ void XmlFixups::breakTables(DOM::Document& doc)
}
}
-void XmlFixups::removeTags(const DOM::Document& doc)
-{
- // Go through the list of nodes
- for(const char** t = kRemoveTags; *t != NULL; t++)
- {
- DOM::NodeList elements = doc.getElementsByTagName(*t);
- if(elements != NULL)
- {
- for(int j = 0; j < elements->getLength(); j++)
- {
- DOM::Element el = (const DOM::Element&)elements->item(j);
- DOM::Node parent = el->getParentNode();
- if(parent == NULL)
- continue;
-
- while(el.hasChildNodes())
- parent.insertBefore(el.removeChild(el.getFirstChild()), el);
-
- parent.removeChild(el);
- }
- }
- }
-}
-
-void XmlFixups::fixLists(const DOM::Document doc)
+void XmlFixups::fixBlock(const DOM::Document& doc, DOM::Element& block)
{
- // Get all the lists
- DOM::NodeList lists = doc.getElementsByTagName(kElList);
- if(lists != NULL)
- {
- // And all the list definitions
- DOM::NodeList listdefs = doc.getElementsByTagName(kElListdef);
- if(listdefs != NULL)
- {
- for(int i = 0; i < listdefs->getLength(); i++)
- {
- DOM::Element listdef = (const DOM::Element&)listdefs->item(i);
+ // Okay now change blocks to whatever element they're supposed to be
+ string fix;
+ wstring val;
- if(listdef == NULL || !listdef.hasAttribute(kAtList))
- continue;
+ DOM::Node parent = block.getParentNode();
- for(int j = 0; j < lists->getLength(); j++)
- {
- DOM::Element list = (const DOM::Element&)lists->item(j);
- if(list != NULL)
- {
- if(list.getAttribute(kAtList) == listdef.getAttribute(kAtList))
- {
- // And copy all the attributes from the list definition to the list
- DOMHelpers::copyAttributes(listdef, list, kHideList);
- list.removeAttribute(kAtList);
- }
- }
- }
- }
- }
- }
-}
-
-void XmlFixups::fixBlocks(const DOM::Document doc)
-{
- // Get all the blocks
- DOM::NodeList blocks = doc.getElementsByTagName(kElBlock);
- if(blocks != NULL)
+ if(parent != NULL)
{
- string fix;
- wstring val;
+ // Figure out what kind of element they want block fixed to
+ val = block.getAttribute(kAtFix);
+ if(val.length() > 0)
+ block.removeAttribute(kAtFix);
- for(int i = 0; i < blocks->getLength(); i++)
+ // BUG: Sablotron bug work around
+ if(val.length() > 0)
{
- DOM::Element block = (const DOM::Element&)blocks->item(i);
- DOM::Node parent = block.getParentNode();
-
- if(parent == NULL)
- continue;
-
- fix.resize(0);
- val.resize(0);
-
- // Figure out what kind of element they want block fixed to
- val = block.getAttribute(kAtFix);
- if(val.length() > 0)
- block.removeAttribute(kAtFix);
-
- // BUG: Sablotron bug work around
- if(val.length() > 0)
- {
- val = block.getAttributeNS("", kAtFix);
- if(val.length() > 0)
- block.removeAttributeNS("", kAtFix);
- }
-
+ val = block.getAttributeNS("", kAtFix);
if(val.length() > 0)
- DOM::transcode16to8(val, fix);
-
- if(fix.length() == 0)
- fix = kElPara;
-
- // Create duplicate of the 'fix' element
- DOM::Element el = doc.createElement(fix);
- DOMHelpers::copyAttributes(block, el, NULL);
-
- // Replace block with the given 'fix' element
- while(block.hasChildNodes())
- el.appendChild(block.removeChild(block.getFirstChild()));
-
- parent.replaceChild(el, block);
+ block.removeAttributeNS("", kAtFix);
}
- }
-}
-void XmlFixups::consolidateEndTags(DOM::Document& doc)
-{
- DOM::Element top = doc.getDocumentElement();
- ASSERT(top != NULL);
+ if(val.length() > 0)
+ DOM::transcode16to8(val, fix);
- for(const char** t = kConsolidateEnd; *t != NULL; t++)
- {
- DOM::NodeList elements = doc.getElementsByTagName(*t);
- if(elements != NULL)
- {
- int x = elements->getLength();
- for(int j = 0; j < x; j++)
- {
- // Make sure it's a valid element
- DOM::Element element = (const DOM::Element&)elements->item(j);
- if(element == NULL)
- continue;
-
- DOM::Element parent = (const DOM::Element&)element.getParentNode();
- if(parent == NULL)
- continue;
-
- // Remove it from it's child
- parent.removeChild(element);
-
- // And append it to the end of the document
- top.appendChild(element);
- }
- }
- }
-}
-
-void XmlFixups::consolidateStartTags(DOM::Document& doc)
-{
- DOM::Element top = doc.getDocumentElement();
- ASSERT(top != NULL);
-
- DOM::Node first = top.getFirstChild();
+ if(fix.length() == 0)
+ fix = kElPara;
- for(const char** t = kConsolidateStart; *t != NULL; t++)
- {
- DOM::NodeList elements = doc.getElementsByTagName(*t);
- if(elements != NULL)
- {
- int x = elements->getLength();
- for(int j = 0; j < x; j++)
- {
- // Make sure it's a valid element
- DOM::Element element = (const DOM::Element&)elements->item(j);
- if(element == NULL || element == first)
- continue;
-
- DOM::Element parent = (const DOM::Element&)element.getParentNode();
- if(parent == NULL)
- continue;
-
- // Remove it from it's child
- parent.removeChild(element);
+ // Create duplicate of the 'fix' element
+ DOM::Element el = doc.createElement(fix);
+ DOMHelpers::copyAttributes(block, el, NULL);
- // And put at start of the document of the document
- ASSERT(first != NULL);
- top.insertBefore(element, first);
- }
- }
- }
-}
-
-void XmlFixups::combineDuplicates(const DOM::Document& doc)
-{
- bool found;
-
- do
- {
- found = false;
-
- // Go through the list of nodes
- for(const char** t = kNoDuplicates; *t != NULL; t++)
- {
- DOM::NodeList elements = doc.getElementsByTagName(*t);
- if(elements != NULL)
- {
- int x = elements->getLength();
- for(int j = 0; j < x; j++)
- {
- // Make sure it's a valid element
- DOM::Element element = (const DOM::Element&)elements->item(j);
- if(element == NULL)
- continue;
-
- // Get neighbors
- DOM::Node previous = element.getPreviousSibling();
- DOM::Node next = element.getNextSibling();
-
- // Make sure it's still in the document, as we may have
- // removed it on a previous loop
- DOM::Node parent = element.getParentNode();
- if(parent == NULL)
- continue;
-
- // Combine previous if valid
- if(previous != NULL && previous.getNodeType() == DOM::Node::ELEMENT_NODE &&
- DOMHelpers::isEqualElement((DOM::Element&)previous, element))
- {
- while(previous.hasChildNodes())
- {
- DOM::Node child = previous.removeChild(previous.getLastChild());
- if(child != NULL)
- {
- if(element.hasChildNodes())
- element.insertBefore(child, element.getFirstChild());
- else
- element.appendChild(child);
- }
- }
-
- // Remove duplicate node
- parent.removeChild(previous);
- found = true;
- }
-
- // Combine next if valid
- if(next != NULL && next.getNodeType() == DOM::Node::ELEMENT_NODE &&
- DOMHelpers::isEqualElement((DOM::Element&)next, element))
- {
- while(next.hasChildNodes())
- {
- DOM::Node child = next.removeChild(next.getFirstChild());
- if(child != NULL)
- element.appendChild(child);
- }
-
- // Remove duplicate node
- parent.removeChild(next);
- found = true;
- }
- }
- }
- }
+ // Replace block with the given 'fix' element
+ while(block.hasChildNodes())
+ el.appendChild(block.removeChild(block.getFirstChild()));
- // Keep looping until no more duplicates found
+ parent.replaceChild(el, block);
}
- while(found);
}