diff options
Diffstat (limited to 'src/xmlfixups.cpp')
-rw-r--r-- | src/xmlfixups.cpp | 566 |
1 files changed, 271 insertions, 295 deletions
diff --git a/src/xmlfixups.cpp b/src/xmlfixups.cpp index 707294d..b8c84f4 100644 --- a/src/xmlfixups.cpp +++ b/src/xmlfixups.cpp @@ -45,7 +45,7 @@ static const char* kNoDuplicates[] = { kElB, kElU, kElI, kElColor, kElHide, kElColor, kElSuper, kElSub, NULL }; static const char* kRemoveTags[] = - { kElDest, kElListdef, kElListtable, NULL }; + { kElDest, kElListdef, kElListtable, kElFontTable, NULL }; static const char* kBlockTags[] = { kElTable, kElPara, NULL }; @@ -59,19 +59,18 @@ static const char* kConsolidateEnd[] = static const char* kConsolidateStart[] = { kElStylesheet, kElInfo, NULL }; +void loadStringSet(StringSet& set, const char** strings) +{ + while(*strings) + set.insert(string(*strings)); +} -void XmlFixups::breakBreak(DOM::Document& doc, const string& contain, - const string& tag) +XmlFixups::XmlFixups() { - DOM::NodeList els = doc.getElementsByTagName(tag); - if(els != NULL) - { - for(int i = 0; i < els->getLength(); i++) - { - DOM::Element el = (const DOM::Element&)els->item(i); - breakElement(el, contain); - } - } + loadStringSet(m_duplicates, kNoDuplicates); + loadStringSet(m_removes, kRemoveTags); + loadStringSet(m_consolidateStart, kConsolidateStart); + loadStringSet(m_consolidateEnd, kConsolidateEnd); } bool XmlFixups::breakElement(const DOM::Element& el, const string& contain) @@ -315,19 +314,6 @@ void XmlFixups::breakTags(DOM::Document& doc, const string& parentName, if(parent != NULL && !DOMHelpers::isElement(parent, parentName)) parent.removeChild(tag); -#if 0 - else if(tag.hasChildNodes()) - { - DOM::NodeList children = tag.getChildNodes(); - if(children != NULL && children->getLength() == 1) - { - DOM::Node child = children->item(0); - if(child != NULL && !child.hasChildNodes() && - DOMHelpers::isElement(child, kElBlock)) - parent.removeChild(tag); - } - } -#endif } } } @@ -394,51 +380,255 @@ void XmlFixups::breakLists(DOM::Document& doc) } } -void XmlFixups::fixStyles(const DOM::Document doc) +void XmlFixups::runPassTwo(const DOM::Document& doc) { - // Get all stylesheet styles - DOM::NodeList styles = doc.getElementsByTagName(kElStyle); - if(styles != NULL) + /* + * Okay, this function is complicated and long. It was all broken up into + * shorter functions previously but that sucked for efficiency. Basically + * we want to iterate over the document as few times as possible and because + * of that we combine all of that here. + * + * In this pass: + * o Fix: + * - font names + * - style names + * - list attributes + * - block elements + * o Consolidate certain tags to end of doc + * o Consolidate certain tags to start of doc + * o Combine duplicates of certain tags + * o Remove certain tags + * o Break out pages and sections + */ + + bool haveStyles = false; + ElementTable styles; + + bool haveFonts = false; + ElementTable fonts; + + bool haveLists = false; + ElementTable lists; + + DOM::Element top = doc.getDocumentElement(); + + // Get stylesheet block + DOM::Element el = DOMHelpers::getChildElement(top, kElStylesheet); + if(el != NULL) + { + // Load the styles into a id mapped table + styles.load(el, kElStyle); + + if(!styles.empty()) + { + styles.removeIds(); + haveStyles = true; + } + } + + // Get the font block + el = DOMHelpers::getChildElement(top, kElFontTable); + if(el != NULL) + { + // Load the fonts into an id mapped table + fonts.load(el, kElFont); + + if(!fonts.empty()) + { + fonts.removeIds(); + haveFonts = true; + } + } + + // Get the list definition block + el = DOMHelpers::getChildElement(top, kElListtable); + if(el != NULL) { - // Get list of blocks in the document - DOM::NodeList blocks = doc.getElementsByTagName(kElBlock); - if(blocks != NULL) + // Load the lists into an id mapped table + lists.load(el, kElListdef); + + if(!lists.empty()) { - for(int i = 0; i < blocks->getLength(); i++) + lists.removeIds(); + haveLists = true; + } + } + + NodeStack toStart; // Nodes that get moved to beginning of document + NodeStack toEnd; // Nodes that get moved to the end of the document + + ElementIterator it(top); + ElementIterator end; + + for( ; it != end; ++it) + { + el = *it; + + // Mark each node as we've seen it so we don't + // do a given element twice + if((int)el.getUserData() == PASS_TWO) + continue; + + el.setUserData((void*)PASS_TWO); + string name = el.getNodeName(); + + if(name == kElBlock) + { + // Change style attribute on blocks to name + if(haveStyles && el.hasAttribute(kElStyle)) { - DOM::Element block = (const DOM::Element&)blocks->item(i); + DOM::Element style = styles.get(el.getAttribute(kElStyle)); + if(style != NULL) + el.setAttribute(kElStyle, style.getAttribute(kAtName)); + } - if(block == NULL || !block.hasAttribute(kElStyle)) - continue; + /* + * The below function call replaces the current element with another + * new element. The new element still needs to be processed, so we + * just backup one, and then short circuit the loop below. + */ - // Lookup block styles - for(int j = 0; j < styles->getLength(); j++) + // Now fix the block itself + fixBlock(doc, el); + + continue; // Current element no longer valid + } + + // Change id attribute on fonts to name + else if(haveFonts && name == kElFont) + { + if(el.hasAttribute(kAtId)) + { + DOM::Element font = fonts.get(el.getAttribute(kAtId)); + if(font != NULL) + el.setAttribute(kAtName, font.getAttribute(kAtName)); + } + } + + // Copy list attributes onto the lists + else if(haveLists && name == kElList) + { + if(el.hasAttribute(kAtList)) + { + DOM::Element list = lists.get(el.getAttribute(kAtList)); + if(list != NULL) { - DOM::Element style = (const DOM::Element&)styles->item(j); - if(style != NULL) - { - if(style.getAttribute(kAtId) == block.getAttribute(kElStyle)) - { - // And change to the name - wstring name = style.getAttribute(kAtName); - if(name.length() > 0) - block.setAttribute(kElStyle, name); - } - } + // And copy all the attributes from the list definition to the list + DOMHelpers::copyAttributes(list, el, kHideList); + el.removeAttribute(kAtList); } } } - // A little cleanup of the stylesheet styles - for(int i = 0; i < styles->getLength(); i++) + // Break out pages and sections all the way to document + if(name == kElPage || name == kElSect) { - DOM::Element style = (const DOM::Element&)styles->item(i); - if(style != NULL) - style.removeAttribute(kAtId); + breakElement(el, kElDoc); + + /* + * NOTE: The flow of the document is changed here. But the current + * element is still in a valid place for iterating over the document + * so we don't have to worry about it. + */ } - } -} + // Tags that just plain get removed + if(m_removes.find(name) != m_removes.end()) + { + DOM::Node parent = el->getParentNode(); + + if(parent != NULL) + { + /* + * After the element is removed, the current element is no longer + * valid for iterating over the document. In addition we insert + * all the child nodes of the current element before it. We need + * to be sure to iterate over these elements, and to do so we + * decrement the iterator. + */ + --it; + + while(el.hasChildNodes()) + parent.insertBefore(el.removeChild(el.getFirstChild()), el); + + parent.removeChild(el); + continue; /* Current element doesn't need any more processing */ + } + } + + + // Tags that need to get consolidated to start + if(m_consolidateStart.find(name) != m_consolidateStart.end()) + toStart.push(el); + + // Tags that need to get consolidated to end + else if(m_consolidateEnd.find(name) != m_consolidateEnd.end()) + toEnd.push(el); + + + // Tags for which duplicates need to be combined + if(m_duplicates.find(name) != m_duplicates.end()) + { + DOM::Element parent = (const DOM::Element&)el.getParentNode(); + if(parent != NULL) + { + // Loop till we find no more of the same + for(;;) + { + DOM::Node next = el.getNextSibling(); + + // If it's the same type of element ... + if(!DOMHelpers::isElement(next, name)) + break; + + // NOTE: Notice we do nothing with attributes. Currently + // all elements in the duplicates list don't need that. + + while(next.hasChildNodes()) + el.appendChild(next.removeChild(next.getFirstChild())); + + // Remove duplicate node + parent.removeChild(next); + } + } + } + } + + // Complete consolidation to front + while(!toStart.empty()) + { + DOM::Node node = toStart.top(); + DOM::Node parent = node.getParentNode(); + if(parent != NULL && DOMHelpers::hasAncestor(top, node)) + { + // Remove it from it's child + parent.removeChild(el); + + // And put at start of the document of the document + top.insertBefore(el, top.getFirstChild()); + } + + toStart.pop(); + } + + // Complete consolidation to end + while(!toEnd.empty()) + { + DOM::Node node = toEnd.top(); + DOM::Node parent = node.getParentNode(); + if(parent != NULL && DOMHelpers::hasAncestor(top, node)) + { + // Remove it from it's child + parent.removeChild(el); + + // And put at end of the document of the document + top.appendChild(el); + } + + toEnd.pop(); + } + +} void XmlFixups::breakTables(DOM::Document& doc) { @@ -526,258 +716,44 @@ void XmlFixups::breakTables(DOM::Document& doc) } } -void XmlFixups::removeTags(const DOM::Document& doc) -{ - // Go through the list of nodes - for(const char** t = kRemoveTags; *t != NULL; t++) - { - DOM::NodeList elements = doc.getElementsByTagName(*t); - if(elements != NULL) - { - for(int j = 0; j < elements->getLength(); j++) - { - DOM::Element el = (const DOM::Element&)elements->item(j); - DOM::Node parent = el->getParentNode(); - if(parent == NULL) - continue; - - while(el.hasChildNodes()) - parent.insertBefore(el.removeChild(el.getFirstChild()), el); - - parent.removeChild(el); - } - } - } -} - -void XmlFixups::fixLists(const DOM::Document doc) +void XmlFixups::fixBlock(const DOM::Document& doc, DOM::Element& block) { - // Get all the lists - DOM::NodeList lists = doc.getElementsByTagName(kElList); - if(lists != NULL) - { - // And all the list definitions - DOM::NodeList listdefs = doc.getElementsByTagName(kElListdef); - if(listdefs != NULL) - { - for(int i = 0; i < listdefs->getLength(); i++) - { - DOM::Element listdef = (const DOM::Element&)listdefs->item(i); + // Okay now change blocks to whatever element they're supposed to be + string fix; + wstring val; - if(listdef == NULL || !listdef.hasAttribute(kAtList)) - continue; + DOM::Node parent = block.getParentNode(); - for(int j = 0; j < lists->getLength(); j++) - { - DOM::Element list = (const DOM::Element&)lists->item(j); - if(list != NULL) - { - if(list.getAttribute(kAtList) == listdef.getAttribute(kAtList)) - { - // And copy all the attributes from the list definition to the list - DOMHelpers::copyAttributes(listdef, list, kHideList); - list.removeAttribute(kAtList); - } - } - } - } - } - } -} - -void XmlFixups::fixBlocks(const DOM::Document doc) -{ - // Get all the blocks - DOM::NodeList blocks = doc.getElementsByTagName(kElBlock); - if(blocks != NULL) + if(parent != NULL) { - string fix; - wstring val; + // Figure out what kind of element they want block fixed to + val = block.getAttribute(kAtFix); + if(val.length() > 0) + block.removeAttribute(kAtFix); - for(int i = 0; i < blocks->getLength(); i++) + // BUG: Sablotron bug work around + if(val.length() > 0) { - DOM::Element block = (const DOM::Element&)blocks->item(i); - DOM::Node parent = block.getParentNode(); - - if(parent == NULL) - continue; - - fix.resize(0); - val.resize(0); - - // Figure out what kind of element they want block fixed to - val = block.getAttribute(kAtFix); - if(val.length() > 0) - block.removeAttribute(kAtFix); - - // BUG: Sablotron bug work around - if(val.length() > 0) - { - val = block.getAttributeNS("", kAtFix); - if(val.length() > 0) - block.removeAttributeNS("", kAtFix); - } - + val = block.getAttributeNS("", kAtFix); if(val.length() > 0) - DOM::transcode16to8(val, fix); - - if(fix.length() == 0) - fix = kElPara; - - // Create duplicate of the 'fix' element - DOM::Element el = doc.createElement(fix); - DOMHelpers::copyAttributes(block, el, NULL); - - // Replace block with the given 'fix' element - while(block.hasChildNodes()) - el.appendChild(block.removeChild(block.getFirstChild())); - - parent.replaceChild(el, block); + block.removeAttributeNS("", kAtFix); } - } -} -void XmlFixups::consolidateEndTags(DOM::Document& doc) -{ - DOM::Element top = doc.getDocumentElement(); - ASSERT(top != NULL); + if(val.length() > 0) + DOM::transcode16to8(val, fix); - for(const char** t = kConsolidateEnd; *t != NULL; t++) - { - DOM::NodeList elements = doc.getElementsByTagName(*t); - if(elements != NULL) - { - int x = elements->getLength(); - for(int j = 0; j < x; j++) - { - // Make sure it's a valid element - DOM::Element element = (const DOM::Element&)elements->item(j); - if(element == NULL) - continue; - - DOM::Element parent = (const DOM::Element&)element.getParentNode(); - if(parent == NULL) - continue; - - // Remove it from it's child - parent.removeChild(element); - - // And append it to the end of the document - top.appendChild(element); - } - } - } -} - -void XmlFixups::consolidateStartTags(DOM::Document& doc) -{ - DOM::Element top = doc.getDocumentElement(); - ASSERT(top != NULL); - - DOM::Node first = top.getFirstChild(); + if(fix.length() == 0) + fix = kElPara; - for(const char** t = kConsolidateStart; *t != NULL; t++) - { - DOM::NodeList elements = doc.getElementsByTagName(*t); - if(elements != NULL) - { - int x = elements->getLength(); - for(int j = 0; j < x; j++) - { - // Make sure it's a valid element - DOM::Element element = (const DOM::Element&)elements->item(j); - if(element == NULL || element == first) - continue; - - DOM::Element parent = (const DOM::Element&)element.getParentNode(); - if(parent == NULL) - continue; - - // Remove it from it's child - parent.removeChild(element); + // Create duplicate of the 'fix' element + DOM::Element el = doc.createElement(fix); + DOMHelpers::copyAttributes(block, el, NULL); - // And put at start of the document of the document - ASSERT(first != NULL); - top.insertBefore(element, first); - } - } - } -} - -void XmlFixups::combineDuplicates(const DOM::Document& doc) -{ - bool found; - - do - { - found = false; - - // Go through the list of nodes - for(const char** t = kNoDuplicates; *t != NULL; t++) - { - DOM::NodeList elements = doc.getElementsByTagName(*t); - if(elements != NULL) - { - int x = elements->getLength(); - for(int j = 0; j < x; j++) - { - // Make sure it's a valid element - DOM::Element element = (const DOM::Element&)elements->item(j); - if(element == NULL) - continue; - - // Get neighbors - DOM::Node previous = element.getPreviousSibling(); - DOM::Node next = element.getNextSibling(); - - // Make sure it's still in the document, as we may have - // removed it on a previous loop - DOM::Node parent = element.getParentNode(); - if(parent == NULL) - continue; - - // Combine previous if valid - if(previous != NULL && previous.getNodeType() == DOM::Node::ELEMENT_NODE && - DOMHelpers::isEqualElement((DOM::Element&)previous, element)) - { - while(previous.hasChildNodes()) - { - DOM::Node child = previous.removeChild(previous.getLastChild()); - if(child != NULL) - { - if(element.hasChildNodes()) - element.insertBefore(child, element.getFirstChild()); - else - element.appendChild(child); - } - } - - // Remove duplicate node - parent.removeChild(previous); - found = true; - } - - // Combine next if valid - if(next != NULL && next.getNodeType() == DOM::Node::ELEMENT_NODE && - DOMHelpers::isEqualElement((DOM::Element&)next, element)) - { - while(next.hasChildNodes()) - { - DOM::Node child = next.removeChild(next.getFirstChild()); - if(child != NULL) - element.appendChild(child); - } - - // Remove duplicate node - parent.removeChild(next); - found = true; - } - } - } - } + // Replace block with the given 'fix' element + while(block.hasChildNodes()) + el.appendChild(block.removeChild(block.getFirstChild())); - // Keep looping until no more duplicates found + parent.replaceChild(el, block); } - while(found); } |