diff options
| author | Stef Walter <stef@memberwebs.com> | 2004-07-24 19:06:51 +0000 | 
|---|---|---|
| committer | Stef Walter <stef@memberwebs.com> | 2004-07-24 19:06:51 +0000 | 
| commit | 8335fdb6b7e7afb57d096e0f3a453b662f7a23c0 (patch) | |
| tree | ef3c3079f58b44cb9f1b2953f05e1628d6846e9b /src | |
| parent | ff4568d01651afd615751f9fc683dbe30f2ced9b (diff) | |
- Post processing code cleanup.
Diffstat (limited to 'src')
| -rw-r--r-- | src/domhelpers.cpp | 153 | ||||
| -rw-r--r-- | src/domhelpers.h | 85 | ||||
| -rw-r--r-- | src/sablo.h | 18 | ||||
| -rw-r--r-- | src/xmlcomposer.cpp | 42 | ||||
| -rw-r--r-- | src/xmlfixups.cpp | 566 | ||||
| -rw-r--r-- | src/xmlfixups.h | 55 | 
6 files changed, 586 insertions, 333 deletions
| diff --git a/src/domhelpers.cpp b/src/domhelpers.cpp index 6cf8052..ac93f10 100644 --- a/src/domhelpers.cpp +++ b/src/domhelpers.cpp @@ -40,6 +40,8 @@  #include "domhelpers.h"  #include "tags.h" +using std::make_pair; +  bool DOMHelpers::isElement(const DOM::Node& node, const string& name)  {      return node != NULL && node.getNodeType() == DOM::Node::ELEMENT_NODE && @@ -176,3 +178,154 @@ void DOMHelpers::insertAfter(DOM::Node& parent, const DOM::Node& node,      else          parent.insertBefore(node, sibling);  } + +DOM::Element DOMHelpers::getChildElement(const DOM::Node& parent, const string& name) +{ +    DOM::Node child = parent.getFirstChild(); +    while(child != NULL) +    { +        if(isElement(child, name)) +            return (DOM::Element&)child; +    } + +    return DOM::Element(); +} + +bool DOMHelpers::hasAncestor(const DOM::Node& ancestor, const DOM::Node& node) +{ +    DOM::Node n = node; + +    while(n != NULL) +    { +        if(n == ancestor) +            return true; + +        n = n.getParentNode(); +    } + +    return false; +} + + +/* ---------------------------------------------------------------------------------- + *  ElementTable + */ + +void ElementTable::load(const DOM::Node& parent, const string& name) +{ +    clear(); + +    DOM::Node child = parent.getFirstChild(); +    while(child != NULL) +    { +        if(DOMHelpers::isElement(child, name)) +        { +            DOM::Element& el = (DOM::Element&)child; +            wstring id = el.getAttribute(kAtId); + +            if(!id.empty()) +                insert(make_pair(id, el)); +        } +    } +} + +DOM::Element ElementTable::get(const wstring& id) const +{ +    const_iterator it = find(id); +    return it == end() ? DOM::Element() : it->second; +} + +void ElementTable::removeIds() +{ +    iterator it = begin(); +    iterator e = end(); + +    for( ; it != e; it++) +        it->second.removeAttribute(kAtId); +} + +/* ---------------------------------------------------------------------------------- + *  ElementIterator + */ + +void ElementIterator::next() +{ +    if(m_current == NULL) +        return; + +    DOM::Node n; + +    // Always descend into children first +    if(m_current.hasChildNodes()) +    { +        m_current = nextel(m_current.getFirstChild()); +        if(m_current != NULL) +            return; +    } + +    // Look for siblings along the current level +    m_current = nextel(m_current.getNextSibling()); +    if(m_current != NULL) +        return; + +    // Go back up to parent +    m_current = m_current.getParentNode(); + +    // But check top against parent +    if(m_current == NULL || m_current == m_top) +        m_current = NULL; +} + +DOM::Element ElementIterator::nextel(DOM::Node node) +{ +    while(node != NULL) +    { +        if(node.getNodeType() == DOM::Element::ELEMENT_NODE) +            return (DOM::Element&)node; + +        node = node.getNextSibling(); +    } + +    return DOM::Element(); +} + +void ElementIterator::prev() +{ +    /* Allow backing into the iterator */ +    if(m_current == NULL) +        m_current = m_top; + +    DOM::Node n; + +    // Always descend into children first +    if(m_current.hasChildNodes()) +    { +        m_current = prevel(m_current.getLastChild()); +        if(m_current != NULL) +            return; +    } + +    // Look for siblings along the current level +    m_current = prevel(m_current.getPreviousSibling()); +    if(m_current != NULL) +        return; + +    // Go back up to parent +    DOM::Node parent = m_current.getParentNode(); +    if(parent != m_top) +        m_current = (DOM::Element&)parent; +} + + +DOM::Element ElementIterator::prevel(DOM::Node node) +{ +    while(node != NULL) +    { +        if(node.getNodeType() == DOM::Element::ELEMENT_NODE) +            return (DOM::Element&)node; + +        node = node.getPreviousSibling(); +    } + +    return DOM::Element(); +} diff --git a/src/domhelpers.h b/src/domhelpers.h index 043ffd4..d125f80 100644 --- a/src/domhelpers.h +++ b/src/domhelpers.h @@ -40,6 +40,9 @@  #define __DOMHELPERS_H__  #include "sablo.h" +#include <map> +#include <stack> +#include <set>  /*   * DOMHelpers @@ -68,6 +71,88 @@ public:      // Get previous element (in XML flow) of a given name      static DOM::Element getPriorElement(const DOM::Node& node, const string& name); + +    // Get first child element of a given name +    static DOM::Element getChildElement(const DOM::Node& parent, const string& name); + +    // Check if a given element is anothers ancestor +    static bool hasAncestor(const DOM::Node& ancestor, const DOM::Node& node); +}; + +/* + * ElementTable + * + * A table of elements matched to their ids for quick access while applying + * things like fonts, styles, lists from their definitions. + */ +class ElementTable : +    public std::map<wstring, DOM::Element> +{ +public: +    void load(const DOM::Node& parent, const string& name); + +    DOM::Element get(const wstring& id) const; + +    bool has(const wstring& id) const +        { return find(id) != end(); } + +    void removeIds(); +}; + +// Some other handy types +typedef std::set<string> StringSet; +typedef std::stack<DOM::Node> NodeStack; + +/* + * ElementIterator + * + * For iterating through the elements in a document. + */ +class ElementIterator +    : public std::iterator<std::input_iterator_tag, DOM::Element, ptrdiff_t> +{ +public: +    ElementIterator() +        { m_current = NULL; } +    ElementIterator(const DOM::Element& top) +        { m_top = top; m_current = top; next(); } +    ElementIterator(const ElementIterator& x) +        { m_top = x.m_top; m_current = x.m_current; } + +    const DOM::Element& operator*() const +        { return m_current; } +    const DOM::Element* operator->() const +        { return (&**this); } +    const ElementIterator& operator++() +        { next(); return (*this); } +    const ElementIterator& operator--() +        { prev(); return (*this); } + +    // Friend comparision functions +    friend bool operator==(const ElementIterator& x, const ElementIterator& y); +    friend bool operator!=(const ElementIterator& x, const ElementIterator& y); + +// Implementation +protected: + +    void next(); +    DOM::Element nextel(DOM::Node node); + +    void prev(); +    DOM::Element prevel(DOM::Node node); + +// Data +protected: + +    DOM::Element m_top; +    DOM::Element m_current; +    bool m_done;  }; +// friend functions +inline bool operator==(const ElementIterator& x, const ElementIterator& y) +   { return y.m_current == x.m_current && y.m_top == x.m_top; } +inline bool operator!=(const ElementIterator& x, const ElementIterator& y) +   { return (!(x == y)); } +  #endif // __DOMHELPERS_H__ diff --git a/src/sablo.h b/src/sablo.h index aecde18..196b70b 100644 --- a/src/sablo.h +++ b/src/sablo.h @@ -777,6 +777,8 @@ namespace DOM  		Element(const Element& node) :  			Node(node) {} +        Element& operator=(const Node& other) +            { Node::operator=(other); return *this; }  	    Element& operator=(const Element& other)  			{ Node::operator=(other); return *this; }  		Element& operator=(const void* null) @@ -946,6 +948,8 @@ namespace DOM  		CharacterData(const Node& node) :  			Node(node) { } +        CharacterData& operator=(const Node& other) +            { Node::operator=(other); return *this; }  	    CharacterData& operator=(const CharacterData& other)  			{ Node::operator=(other); return *this; }  		CharacterData& operator=(const void* null) @@ -1109,6 +1113,8 @@ namespace DOM  		Comment(const Comment& node) :  			CharacterData(node) { } +        Comment& operator=(const Node& other) +            { Node::operator=(other); return *this; }  		Comment& operator=(const Comment& other)  			{ CharacterData::operator=(other); return *this; }  		Comment& operator=(void* null) @@ -1133,6 +1139,8 @@ namespace DOM  		ProcessingInstruction(const ProcessingInstruction& node) :  			Node(node) { } +        ProcessingInstruction& operator=(const Node& other) +            { Node::operator=(other); return *this; }  		ProcessingInstruction& operator=(const ProcessingInstruction& other)  			{ Node::operator=(other); return *this; }  		ProcessingInstruction& operator=(void* null) @@ -1182,6 +1190,8 @@ namespace DOM  		DocumentFragment(const DocumentFragment& node) :  			Node(node) { } +        DocumentFragment& operator=(const Node& other) +            { Node::operator=(other); return *this; }  		DocumentFragment& operator=(const DocumentFragment& other)  			{ Node::operator=(other); return *this; }  		DocumentFragment& operator=(void* null) @@ -1200,6 +1210,8 @@ namespace DOM  		Entity(const Entity& node) :  			Node(node) { } +        Entity& operator=(const Node& other) +            { Node::operator=(other); return *this; }  		Entity& operator=(const Entity& other)  			{ Node::operator=(other); return *this; }  		Entity& operator=(void* null) @@ -1245,6 +1257,8 @@ namespace DOM  		EntityReference(const EntityReference& node) :  			Node(node) { } +        EntityReference& operator=(const Node& other) +            { Node::operator=(other); return *this; }  		EntityReference& operator=(const EntityReference& other)  			{ Node::operator=(other); return *this; }  		EntityReference& operator=(void* null) @@ -1263,6 +1277,8 @@ namespace DOM  		Notation(const Notation& node) :  			Node(node) { } +        Notation& operator=(const Node& other) +            { Node::operator=(other); return *this; }  		Notation& operator=(const Notation& other)  			{ Node::operator=(other); return *this; }  		Notation& operator=(void* null) @@ -1299,6 +1315,8 @@ namespace DOM  		DocumentType(const DocumentType& node) :  			Node(node) { } +        DocumentType& operator=(const Node& other) +            { Node::operator=(other); return *this; }  		DocumentType& operator=(const DocumentType& other)  			{ Node::operator=(other); return *this; }  		DocumentType& operator=(void* null) diff --git a/src/xmlcomposer.cpp b/src/xmlcomposer.cpp index cb1bf16..dcd4c34 100644 --- a/src/xmlcomposer.cpp +++ b/src/xmlcomposer.cpp @@ -124,29 +124,33 @@ void XmlComposer::endDocument()  {  	LevelHandler::endDocument(); -	// Pass 0: Cleanup the tree -	XmlFixups::combineDuplicates(m_document); -	XmlFixups::consolidateStartTags(m_document); -    XmlFixups::consolidateEndTags(m_document); +    XmlFixups fix; + +    // Pass 0: Cleanup the tree +    // XmlFixups::combineDuplicates(m_document); +    // XmlFixups::consolidateStartTags(m_document); +    // XmlFixups::consolidateEndTags(m_document);      // Pass 1: Block breakout -	XmlFixups::breakTables(m_document); -	XmlFixups::breakTags(m_document, kElTable, kElRow); -	XmlFixups::breakTags(m_document, kElRow, kElCell); -	XmlFixups::wrapTags(m_document, kElCell, kElDest); -	XmlFixups::breakBlocks(m_document); -	XmlFixups::breakLists(m_document); +	fix.breakTables(m_document); +	fix.breakTags(m_document, kElTable, kElRow); +	fix.breakTags(m_document, kElRow, kElCell); +	fix.wrapTags(m_document, kElCell, kElDest); +	fix.breakBlocks(m_document); +	fix.breakLists(m_document);      // Pass 2: Fixups -	XmlFixups::fixLists(m_document); -	XmlFixups::fixStyles(m_document); -	XmlFixups::fixBlocks(m_document); -	XmlFixups::removeTags(m_document); -	XmlFixups::breakBreak(m_document, kElDoc, kElPage); -	XmlFixups::breakBreak(m_document, kElDoc, kElSect); +    fix.runPassTwo(m_document); + +	// XmlFixups::fixLists(m_document); +	// XmlFixups::fixStyles(m_document); +	// XmlFixups::fixBlocks(m_document); +	// XmlFixups::removeTags(m_document); +	// XmlFixups::breakBreak(m_document, kElDoc, kElPage); +	// XmlFixups::breakBreak(m_document, kElDoc, kElSect);      // Pass 3: Final cleanup -    XmlFixups::combineDuplicates(m_document); +    // XmlFixups::combineDuplicates(m_document);  	return;  } @@ -330,7 +334,7 @@ void XmlComposer::incrementAutoCount(int type)  #define DEFAULT_CONTROLWORD processDefault(cw, flags, param)  #define DUMMY  1 == 1  #define NUM_ATTR(x) formatInt(x) -#define DO_EXTRAS() (m_composer->GetOptions().extras) +#define DO_EXTRAS() (m_composer->getOptions().extras)  /* ----------------------------------------------------------------------------------   *  BASE ANALYSER @@ -1049,7 +1053,7 @@ ON_CHARDATA(Content)              AN_ELEMENT(kElFont);              if(font != -1) -                AN_ATTRIBUTE(kAtId, font); +                AN_ATTRIBUTE(kAtName, font);              if(fontsize != -1)                  AN_ATTRIBUTE(kAtSize, fontsize); diff --git a/src/xmlfixups.cpp b/src/xmlfixups.cpp index 707294d..b8c84f4 100644 --- a/src/xmlfixups.cpp +++ b/src/xmlfixups.cpp @@ -45,7 +45,7 @@ static const char* kNoDuplicates[] =      { kElB, kElU, kElI, kElColor, kElHide, kElColor, kElSuper, kElSub, NULL };  static const char* kRemoveTags[] = - { kElDest, kElListdef, kElListtable, NULL }; + { kElDest, kElListdef, kElListtable, kElFontTable, NULL };  static const char* kBlockTags[] =    { kElTable, kElPara, NULL }; @@ -59,19 +59,18 @@ static const char* kConsolidateEnd[] =  static const char* kConsolidateStart[] =      { kElStylesheet, kElInfo, NULL }; +void loadStringSet(StringSet& set, const char** strings) +{ +    while(*strings) +        set.insert(string(*strings)); +} -void XmlFixups::breakBreak(DOM::Document& doc, const string& contain, -                              const string& tag) +XmlFixups::XmlFixups()  { -    DOM::NodeList els = doc.getElementsByTagName(tag); -    if(els != NULL) -    { -        for(int i = 0; i < els->getLength(); i++) -        { -            DOM::Element el = (const DOM::Element&)els->item(i); -            breakElement(el, contain); -        } -    } +    loadStringSet(m_duplicates, kNoDuplicates); +    loadStringSet(m_removes, kRemoveTags); +    loadStringSet(m_consolidateStart, kConsolidateStart); +    loadStringSet(m_consolidateEnd, kConsolidateEnd);  }  bool XmlFixups::breakElement(const DOM::Element& el, const string& contain) @@ -315,19 +314,6 @@ void XmlFixups::breakTags(DOM::Document& doc, const string& parentName,              if(parent != NULL && !DOMHelpers::isElement(parent, parentName))                  parent.removeChild(tag); -#if 0 -            else if(tag.hasChildNodes()) -            { -                DOM::NodeList children = tag.getChildNodes(); -                if(children != NULL && children->getLength() == 1) -                { -                    DOM::Node child = children->item(0); -                    if(child != NULL && !child.hasChildNodes() && -                       DOMHelpers::isElement(child, kElBlock)) -                        parent.removeChild(tag); -                } -            } -#endif          }      }  } @@ -394,51 +380,255 @@ void XmlFixups::breakLists(DOM::Document& doc)      }  } -void XmlFixups::fixStyles(const DOM::Document doc) +void XmlFixups::runPassTwo(const DOM::Document& doc)  { -    // Get all stylesheet styles -    DOM::NodeList styles = doc.getElementsByTagName(kElStyle); -    if(styles != NULL) +    /* +     * Okay, this function is complicated and long. It was all broken up into +     * shorter functions previously but that sucked for efficiency. Basically +     * we want to iterate over the document as few times as possible and because +     * of that we combine all of that here. +     * +     * In this pass: +     * o Fix: +     *   - font names +     *   - style names +     *   - list attributes +     *   - block elements +     * o Consolidate certain tags to end of doc +     * o Consolidate certain tags to start of doc +     * o Combine duplicates of certain tags +     * o Remove certain tags +     * o Break out pages and sections +     */ + +    bool haveStyles = false; +    ElementTable styles; + +    bool haveFonts = false; +    ElementTable fonts; + +    bool haveLists = false; +    ElementTable lists; + +    DOM::Element top = doc.getDocumentElement(); + +    // Get stylesheet block +    DOM::Element el = DOMHelpers::getChildElement(top, kElStylesheet); +    if(el != NULL) +    { +        // Load the styles into a id mapped table +        styles.load(el, kElStyle); + +        if(!styles.empty()) +        { +            styles.removeIds(); +            haveStyles = true; +        } +    } + +    // Get the font block +    el = DOMHelpers::getChildElement(top, kElFontTable); +    if(el != NULL) +    { +        // Load the fonts into an id mapped table +        fonts.load(el, kElFont); + +        if(!fonts.empty()) +        { +            fonts.removeIds(); +            haveFonts = true; +        } +    } + +    // Get the list definition block +    el = DOMHelpers::getChildElement(top, kElListtable); +    if(el != NULL)      { -        // Get list of blocks in the document -        DOM::NodeList blocks = doc.getElementsByTagName(kElBlock); -        if(blocks != NULL) +        // Load the lists into an id mapped table +        lists.load(el, kElListdef); + +        if(!lists.empty())          { -            for(int i = 0; i < blocks->getLength(); i++) +            lists.removeIds(); +            haveLists = true; +        } +    } + +    NodeStack toStart;      // Nodes that get moved to beginning of document +    NodeStack toEnd;        // Nodes that get moved to the end of the document + +    ElementIterator it(top); +    ElementIterator end; + +    for( ; it != end; ++it) +    { +        el = *it; + +        // Mark each node as we've seen it so we don't +        // do a given element twice +        if((int)el.getUserData() == PASS_TWO) +            continue; + +        el.setUserData((void*)PASS_TWO); +        string name = el.getNodeName(); + +        if(name == kElBlock) +        { +            // Change style attribute on blocks to name +            if(haveStyles && el.hasAttribute(kElStyle))              { -                DOM::Element block = (const DOM::Element&)blocks->item(i); +                DOM::Element style = styles.get(el.getAttribute(kElStyle)); +                if(style != NULL) +                    el.setAttribute(kElStyle, style.getAttribute(kAtName)); +            } -                if(block == NULL || !block.hasAttribute(kElStyle)) -                    continue; +            /* +             * The below function call replaces the current element with another +             * new element. The new element still needs to be processed, so we +             * just backup one, and then short circuit the loop below. +             */ -                // Lookup block styles -                for(int j = 0; j < styles->getLength(); j++) +            // Now fix the block itself +            fixBlock(doc, el); + +            continue;   // Current element no longer valid +        } + +        // Change id attribute on fonts to name +        else if(haveFonts && name == kElFont) +        { +            if(el.hasAttribute(kAtId)) +            { +                DOM::Element font = fonts.get(el.getAttribute(kAtId)); +                if(font != NULL) +                    el.setAttribute(kAtName, font.getAttribute(kAtName)); +            } +        } + +        // Copy list attributes onto the lists +        else if(haveLists && name == kElList) +        { +            if(el.hasAttribute(kAtList)) +            { +                DOM::Element list = lists.get(el.getAttribute(kAtList)); +                if(list != NULL)                  { -                    DOM::Element style = (const DOM::Element&)styles->item(j); -                    if(style != NULL) -                    { -                        if(style.getAttribute(kAtId) == block.getAttribute(kElStyle)) -                        { -                            // And change to the name -                            wstring name = style.getAttribute(kAtName); -                            if(name.length() > 0) -                                block.setAttribute(kElStyle, name); -                        } -                    } +                    // And copy all the attributes from the list definition to the list +                    DOMHelpers::copyAttributes(list, el, kHideList); +                    el.removeAttribute(kAtList);                  }              }          } -        // A little cleanup of the stylesheet styles -        for(int i = 0; i < styles->getLength(); i++) +        // Break out pages and sections all the way to document +        if(name == kElPage || name == kElSect)          { -            DOM::Element style = (const DOM::Element&)styles->item(i); -            if(style != NULL) -                style.removeAttribute(kAtId); +            breakElement(el, kElDoc); + +            /* +             * NOTE: The flow of the document is changed here. But the current +             * element is still in a valid place for iterating over the document +             * so we don't have to worry about it. +             */          } -   } -} +        // Tags that just plain get removed +        if(m_removes.find(name) != m_removes.end()) +        { +            DOM::Node parent = el->getParentNode(); + +            if(parent != NULL) +            { +                /* +                 * After the element is removed, the current element is no longer +                 * valid for iterating over the document. In addition we insert +                 * all the child nodes of the current element before it. We need +                 * to be sure to iterate over these elements, and to do so we +                 * decrement the iterator. +                 */ +                --it; + +                while(el.hasChildNodes()) +                    parent.insertBefore(el.removeChild(el.getFirstChild()), el); + +                parent.removeChild(el); +                continue;   /* Current element doesn't need any more processing */ +            } +        } + + +        // Tags that need to get consolidated to start +        if(m_consolidateStart.find(name) != m_consolidateStart.end()) +            toStart.push(el); + +        // Tags that need to get consolidated to end +        else if(m_consolidateEnd.find(name) != m_consolidateEnd.end()) +            toEnd.push(el); + + +        // Tags for which duplicates need to be combined +        if(m_duplicates.find(name) != m_duplicates.end()) +        { +            DOM::Element parent = (const DOM::Element&)el.getParentNode(); +            if(parent != NULL) +            { +                // Loop till we find no more of the same +                for(;;) +                { +                    DOM::Node next = el.getNextSibling(); + +                    // If it's the same type of element ... +                    if(!DOMHelpers::isElement(next, name)) +                        break; + +                    // NOTE: Notice we do nothing with attributes. Currently +                    // all elements in the duplicates list don't need that. + +                    while(next.hasChildNodes()) +                        el.appendChild(next.removeChild(next.getFirstChild())); + +                    // Remove duplicate node +                    parent.removeChild(next); +                } +            } +        } +    } + +    // Complete consolidation to front +    while(!toStart.empty()) +    { +        DOM::Node node = toStart.top(); +        DOM::Node parent = node.getParentNode(); +        if(parent != NULL && DOMHelpers::hasAncestor(top, node)) +        { +            // Remove it from it's child +            parent.removeChild(el); + +            // And put at start of the document of the document +            top.insertBefore(el, top.getFirstChild()); +        } + +        toStart.pop(); +    } + +    // Complete consolidation to end +    while(!toEnd.empty()) +    { +        DOM::Node node = toEnd.top(); +        DOM::Node parent = node.getParentNode(); +        if(parent != NULL && DOMHelpers::hasAncestor(top, node)) +        { +            // Remove it from it's child +            parent.removeChild(el); + +            // And put at end of the document of the document +            top.appendChild(el); +        } + +        toEnd.pop(); +    } + +}  void XmlFixups::breakTables(DOM::Document& doc)  { @@ -526,258 +716,44 @@ void XmlFixups::breakTables(DOM::Document& doc)      }  } -void XmlFixups::removeTags(const DOM::Document& doc) -{ -    // Go through the list of nodes -    for(const char** t = kRemoveTags; *t != NULL; t++) -    { -        DOM::NodeList elements = doc.getElementsByTagName(*t); -        if(elements != NULL) -        { -            for(int j = 0; j < elements->getLength(); j++) -            { -                DOM::Element el = (const DOM::Element&)elements->item(j); -                DOM::Node parent = el->getParentNode(); -                if(parent == NULL) -                    continue; - -                while(el.hasChildNodes()) -                    parent.insertBefore(el.removeChild(el.getFirstChild()), el); - -                parent.removeChild(el); -            } -        } -    } -} - -void XmlFixups::fixLists(const DOM::Document doc) +void XmlFixups::fixBlock(const DOM::Document& doc, DOM::Element& block)  { -    // Get all the lists -    DOM::NodeList lists = doc.getElementsByTagName(kElList); -    if(lists != NULL) -    { -        // And all the list definitions -        DOM::NodeList listdefs = doc.getElementsByTagName(kElListdef); -        if(listdefs != NULL) -        { -            for(int i = 0; i < listdefs->getLength(); i++) -            { -                DOM::Element listdef = (const DOM::Element&)listdefs->item(i); +    // Okay now change blocks to whatever element they're supposed to be +    string fix; +    wstring val; -                if(listdef == NULL || !listdef.hasAttribute(kAtList)) -                    continue; +    DOM::Node parent = block.getParentNode(); -                for(int j = 0; j < lists->getLength(); j++) -                { -                    DOM::Element list = (const DOM::Element&)lists->item(j); -                    if(list != NULL) -                    { -                        if(list.getAttribute(kAtList) == listdef.getAttribute(kAtList)) -                        { -                            // And copy all the attributes from the list definition to the list -                            DOMHelpers::copyAttributes(listdef, list, kHideList); -                            list.removeAttribute(kAtList); -                        } -                    } -                } -            } -        } -    } -} - -void XmlFixups::fixBlocks(const DOM::Document doc) -{ -    // Get all the blocks -    DOM::NodeList blocks = doc.getElementsByTagName(kElBlock); -    if(blocks != NULL) +    if(parent != NULL)      { -        string fix; -        wstring val; +        // Figure out what kind of element they want block fixed to +        val = block.getAttribute(kAtFix); +        if(val.length() > 0) +            block.removeAttribute(kAtFix); -        for(int i = 0; i < blocks->getLength(); i++) +        // BUG: Sablotron bug work around +        if(val.length() > 0)          { -            DOM::Element block = (const DOM::Element&)blocks->item(i); -            DOM::Node parent = block.getParentNode(); - -            if(parent == NULL) -                continue; - -            fix.resize(0); -            val.resize(0); - -            // Figure out what kind of element they want block fixed to -            val = block.getAttribute(kAtFix); -            if(val.length() > 0) -                block.removeAttribute(kAtFix); - -            // BUG: Sablotron bug work around -            if(val.length() > 0) -            { -                val = block.getAttributeNS("", kAtFix); -                if(val.length() > 0) -                    block.removeAttributeNS("", kAtFix); -            } - +            val = block.getAttributeNS("", kAtFix);              if(val.length() > 0) -                DOM::transcode16to8(val, fix); - -            if(fix.length() == 0) -                fix = kElPara; - -            // Create duplicate of the 'fix' element -            DOM::Element el = doc.createElement(fix); -            DOMHelpers::copyAttributes(block, el, NULL); - -            // Replace block with the given 'fix' element -            while(block.hasChildNodes()) -                el.appendChild(block.removeChild(block.getFirstChild())); - -            parent.replaceChild(el, block); +                block.removeAttributeNS("", kAtFix);          } -    } -} -void XmlFixups::consolidateEndTags(DOM::Document& doc) -{ -    DOM::Element top = doc.getDocumentElement(); -    ASSERT(top != NULL); +        if(val.length() > 0) +            DOM::transcode16to8(val, fix); -    for(const char** t = kConsolidateEnd; *t != NULL; t++) -    { -        DOM::NodeList elements = doc.getElementsByTagName(*t); -        if(elements != NULL) -        { -            int x = elements->getLength(); -            for(int j = 0; j < x; j++) -            { -                // Make sure it's a valid element -                DOM::Element element = (const DOM::Element&)elements->item(j); -                if(element == NULL) -                    continue; - -                DOM::Element parent = (const DOM::Element&)element.getParentNode(); -                if(parent == NULL) -                    continue; - -                // Remove it from it's child -                parent.removeChild(element); - -                // And append it to the end of the document -                top.appendChild(element); -            } -        } -    } -} - -void XmlFixups::consolidateStartTags(DOM::Document& doc) -{ -    DOM::Element top = doc.getDocumentElement(); -    ASSERT(top != NULL); - -	DOM::Node first = top.getFirstChild(); +        if(fix.length() == 0) +            fix = kElPara; -    for(const char** t = kConsolidateStart; *t != NULL; t++) -    { -        DOM::NodeList elements = doc.getElementsByTagName(*t); -        if(elements != NULL) -        { -            int x = elements->getLength(); -            for(int j = 0; j < x; j++) -            { -                // Make sure it's a valid element -                DOM::Element element = (const DOM::Element&)elements->item(j); -                if(element == NULL || element == first) -                    continue; - -                DOM::Element parent = (const DOM::Element&)element.getParentNode(); -                if(parent == NULL) -                    continue; - -                // Remove it from it's child -                parent.removeChild(element); +        // Create duplicate of the 'fix' element +        DOM::Element el = doc.createElement(fix); +        DOMHelpers::copyAttributes(block, el, NULL); -                // And put at start of the document of the document -				ASSERT(first != NULL); -				top.insertBefore(element, first); -            } -        } -    } -} - -void XmlFixups::combineDuplicates(const DOM::Document& doc) -{ -    bool found; - -    do -    { -        found = false; - -        // Go through the list of nodes -        for(const char** t = kNoDuplicates; *t != NULL; t++) -        { -            DOM::NodeList elements = doc.getElementsByTagName(*t); -            if(elements != NULL) -            { -                int x = elements->getLength(); -                for(int j = 0; j < x; j++) -                { -                    // Make sure it's a valid element -                    DOM::Element element = (const DOM::Element&)elements->item(j); -                    if(element == NULL) -                        continue; - -                    // Get neighbors -                    DOM::Node previous = element.getPreviousSibling(); -                    DOM::Node next = element.getNextSibling(); - -                    // Make sure it's still in the document, as we may have -                    // removed it on a previous loop -                    DOM::Node parent = element.getParentNode(); -                    if(parent == NULL) -                        continue; - -                    // Combine previous if valid -                    if(previous != NULL && previous.getNodeType() == DOM::Node::ELEMENT_NODE && -                        DOMHelpers::isEqualElement((DOM::Element&)previous, element)) -                    { -                        while(previous.hasChildNodes()) -                        { -                            DOM::Node child = previous.removeChild(previous.getLastChild()); -                            if(child != NULL) -                            { -                                if(element.hasChildNodes()) -                                    element.insertBefore(child, element.getFirstChild()); -                                else -                                    element.appendChild(child); -                            } -                        } - -                        // Remove duplicate node -                        parent.removeChild(previous); -                        found = true; -                    } - -                    // Combine next if valid -                    if(next != NULL && next.getNodeType() == DOM::Node::ELEMENT_NODE && -                        DOMHelpers::isEqualElement((DOM::Element&)next, element)) -                    { -                        while(next.hasChildNodes()) -                        { -                            DOM::Node child = next.removeChild(next.getFirstChild()); -                            if(child != NULL) -                                element.appendChild(child); -                        } - -                        // Remove duplicate node -                        parent.removeChild(next); -                        found = true; -                    } -                } -            } -        } +        // Replace block with the given 'fix' element +        while(block.hasChildNodes()) +            el.appendChild(block.removeChild(block.getFirstChild())); -        // Keep looping until no more duplicates found +        parent.replaceChild(el, block);      } -    while(found);  } diff --git a/src/xmlfixups.h b/src/xmlfixups.h index f00bb66..01c2d67 100644 --- a/src/xmlfixups.h +++ b/src/xmlfixups.h @@ -40,6 +40,7 @@  #define __XMLFIXUPS_H__  #include "sablo.h" +#include "domhelpers.h"  /*   * XmlFixups @@ -55,14 +56,7 @@  class XmlFixups  {  public: -    // Replace blocks with 'fix' elements like paragraphs -    static void fixBlocks(DOM::Document doc); - -    // Pass 2 list fixups -    static void fixLists(const DOM::Document doc); - -    // Pass 2 style fixups -    static void fixStyles(const DOM::Document doc); +    XmlFixups();      /*       * Breaks a paragraph up through a previous level. Calls itself @@ -83,19 +77,19 @@ public:       *   <b>test of </b> your concentration.       * </dest>       */ -    static bool breakElement(const DOM::Element& el, const string& contain); +    bool breakElement(const DOM::Element& el, const string& contain);      // Break all tags of a given type to a previous level (see above) -    static void breakBreak(DOM::Document& doc, const string& contain, const string& tag); +    void breakBreak(DOM::Document& doc, const string& contain, const string& tag);      // Used to break tables cells and rows into blocks (but more complicated) -    static void breakTags(DOM::Document& doc, const string& parentName, const string& tagName); +    void breakTags(DOM::Document& doc, const string& parentName, const string& tagName);      // Fixes and combines list elements with the same id -    static void breakLists(DOM::Document& document); +    void breakLists(DOM::Document& document);      // Used to find and create tables and perform initial break out -    static void breakTables(DOM::Document& document); +    void breakTables(DOM::Document& document);      /* @@ -115,22 +109,45 @@ public:       * <para style="10"> This is <b> a </b></para>       * <para><b>test of </b> your concentration.</para>       */ -    static void breakBlocks(DOM::Document& document); +    void breakBlocks(DOM::Document& document);      // Wrap certain tags in a wrapper tag of given name -    static void wrapTags(DOM::Document& document, const string& tagName, const string& wrapName); +    void wrapTags(DOM::Document& document, const string& tagName, const string& wrapName);      // Remove certain tags from document -    static void removeTags(const DOM::Document& doc); +    void removeTags(const DOM::Document& doc);      // Combines certain adjacent duplicate tags -    static void combineDuplicates(const DOM::Document& doc); +    void combineDuplicates(const DOM::Document& doc);      // Consolidates a certain tag types at the beginning of the document -    static void consolidateStartTags(DOM::Document& doc); +    void consolidateStartTags(DOM::Document& doc);      // Consolidates a certain tag types at the end of the document -    static void consolidateEndTags(DOM::Document& doc); +    void consolidateEndTags(DOM::Document& doc); + + +    // The main pass 2 function +    void runPassTwo(const DOM::Document& doc); + +    // Replace blocks with 'fix' elements like paragraphs +    void fixBlock(const DOM::Document& doc, DOM::Element& block); + + +protected: + +    enum +    { +        PASS_0, +        PASS_1, +        PASS_TWO +    }; + +    // Our tables cached for efficiency +    StringSet m_duplicates; +    StringSet m_removes; +    StringSet m_consolidateStart; +    StringSet m_consolidateEnd;  };  #endif // __XMLFIXUPS_H__ | 
