diff options
| author | Stef Walter <stef@memberwebs.com> | 2004-07-24 19:06:51 +0000 | 
|---|---|---|
| committer | Stef Walter <stef@memberwebs.com> | 2004-07-24 19:06:51 +0000 | 
| commit | 8335fdb6b7e7afb57d096e0f3a453b662f7a23c0 (patch) | |
| tree | ef3c3079f58b44cb9f1b2953f05e1628d6846e9b /src/xmlfixups.cpp | |
| parent | ff4568d01651afd615751f9fc683dbe30f2ced9b (diff) | |
- Post processing code cleanup.
Diffstat (limited to 'src/xmlfixups.cpp')
| -rw-r--r-- | src/xmlfixups.cpp | 566 | 
1 files changed, 271 insertions, 295 deletions
| diff --git a/src/xmlfixups.cpp b/src/xmlfixups.cpp index 707294d..b8c84f4 100644 --- a/src/xmlfixups.cpp +++ b/src/xmlfixups.cpp @@ -45,7 +45,7 @@ static const char* kNoDuplicates[] =      { kElB, kElU, kElI, kElColor, kElHide, kElColor, kElSuper, kElSub, NULL };  static const char* kRemoveTags[] = - { kElDest, kElListdef, kElListtable, NULL }; + { kElDest, kElListdef, kElListtable, kElFontTable, NULL };  static const char* kBlockTags[] =    { kElTable, kElPara, NULL }; @@ -59,19 +59,18 @@ static const char* kConsolidateEnd[] =  static const char* kConsolidateStart[] =      { kElStylesheet, kElInfo, NULL }; +void loadStringSet(StringSet& set, const char** strings) +{ +    while(*strings) +        set.insert(string(*strings)); +} -void XmlFixups::breakBreak(DOM::Document& doc, const string& contain, -                              const string& tag) +XmlFixups::XmlFixups()  { -    DOM::NodeList els = doc.getElementsByTagName(tag); -    if(els != NULL) -    { -        for(int i = 0; i < els->getLength(); i++) -        { -            DOM::Element el = (const DOM::Element&)els->item(i); -            breakElement(el, contain); -        } -    } +    loadStringSet(m_duplicates, kNoDuplicates); +    loadStringSet(m_removes, kRemoveTags); +    loadStringSet(m_consolidateStart, kConsolidateStart); +    loadStringSet(m_consolidateEnd, kConsolidateEnd);  }  bool XmlFixups::breakElement(const DOM::Element& el, const string& contain) @@ -315,19 +314,6 @@ void XmlFixups::breakTags(DOM::Document& doc, const string& parentName,              if(parent != NULL && !DOMHelpers::isElement(parent, parentName))                  parent.removeChild(tag); -#if 0 -            else if(tag.hasChildNodes()) -            { -                DOM::NodeList children = tag.getChildNodes(); -                if(children != NULL && children->getLength() == 1) -                { -                    DOM::Node child = children->item(0); -                    if(child != NULL && !child.hasChildNodes() && -                       DOMHelpers::isElement(child, kElBlock)) -                        parent.removeChild(tag); -                } -            } -#endif          }      }  } @@ -394,51 +380,255 @@ void XmlFixups::breakLists(DOM::Document& doc)      }  } -void XmlFixups::fixStyles(const DOM::Document doc) +void XmlFixups::runPassTwo(const DOM::Document& doc)  { -    // Get all stylesheet styles -    DOM::NodeList styles = doc.getElementsByTagName(kElStyle); -    if(styles != NULL) +    /* +     * Okay, this function is complicated and long. It was all broken up into +     * shorter functions previously but that sucked for efficiency. Basically +     * we want to iterate over the document as few times as possible and because +     * of that we combine all of that here. +     * +     * In this pass: +     * o Fix: +     *   - font names +     *   - style names +     *   - list attributes +     *   - block elements +     * o Consolidate certain tags to end of doc +     * o Consolidate certain tags to start of doc +     * o Combine duplicates of certain tags +     * o Remove certain tags +     * o Break out pages and sections +     */ + +    bool haveStyles = false; +    ElementTable styles; + +    bool haveFonts = false; +    ElementTable fonts; + +    bool haveLists = false; +    ElementTable lists; + +    DOM::Element top = doc.getDocumentElement(); + +    // Get stylesheet block +    DOM::Element el = DOMHelpers::getChildElement(top, kElStylesheet); +    if(el != NULL) +    { +        // Load the styles into a id mapped table +        styles.load(el, kElStyle); + +        if(!styles.empty()) +        { +            styles.removeIds(); +            haveStyles = true; +        } +    } + +    // Get the font block +    el = DOMHelpers::getChildElement(top, kElFontTable); +    if(el != NULL) +    { +        // Load the fonts into an id mapped table +        fonts.load(el, kElFont); + +        if(!fonts.empty()) +        { +            fonts.removeIds(); +            haveFonts = true; +        } +    } + +    // Get the list definition block +    el = DOMHelpers::getChildElement(top, kElListtable); +    if(el != NULL)      { -        // Get list of blocks in the document -        DOM::NodeList blocks = doc.getElementsByTagName(kElBlock); -        if(blocks != NULL) +        // Load the lists into an id mapped table +        lists.load(el, kElListdef); + +        if(!lists.empty())          { -            for(int i = 0; i < blocks->getLength(); i++) +            lists.removeIds(); +            haveLists = true; +        } +    } + +    NodeStack toStart;      // Nodes that get moved to beginning of document +    NodeStack toEnd;        // Nodes that get moved to the end of the document + +    ElementIterator it(top); +    ElementIterator end; + +    for( ; it != end; ++it) +    { +        el = *it; + +        // Mark each node as we've seen it so we don't +        // do a given element twice +        if((int)el.getUserData() == PASS_TWO) +            continue; + +        el.setUserData((void*)PASS_TWO); +        string name = el.getNodeName(); + +        if(name == kElBlock) +        { +            // Change style attribute on blocks to name +            if(haveStyles && el.hasAttribute(kElStyle))              { -                DOM::Element block = (const DOM::Element&)blocks->item(i); +                DOM::Element style = styles.get(el.getAttribute(kElStyle)); +                if(style != NULL) +                    el.setAttribute(kElStyle, style.getAttribute(kAtName)); +            } -                if(block == NULL || !block.hasAttribute(kElStyle)) -                    continue; +            /* +             * The below function call replaces the current element with another +             * new element. The new element still needs to be processed, so we +             * just backup one, and then short circuit the loop below. +             */ -                // Lookup block styles -                for(int j = 0; j < styles->getLength(); j++) +            // Now fix the block itself +            fixBlock(doc, el); + +            continue;   // Current element no longer valid +        } + +        // Change id attribute on fonts to name +        else if(haveFonts && name == kElFont) +        { +            if(el.hasAttribute(kAtId)) +            { +                DOM::Element font = fonts.get(el.getAttribute(kAtId)); +                if(font != NULL) +                    el.setAttribute(kAtName, font.getAttribute(kAtName)); +            } +        } + +        // Copy list attributes onto the lists +        else if(haveLists && name == kElList) +        { +            if(el.hasAttribute(kAtList)) +            { +                DOM::Element list = lists.get(el.getAttribute(kAtList)); +                if(list != NULL)                  { -                    DOM::Element style = (const DOM::Element&)styles->item(j); -                    if(style != NULL) -                    { -                        if(style.getAttribute(kAtId) == block.getAttribute(kElStyle)) -                        { -                            // And change to the name -                            wstring name = style.getAttribute(kAtName); -                            if(name.length() > 0) -                                block.setAttribute(kElStyle, name); -                        } -                    } +                    // And copy all the attributes from the list definition to the list +                    DOMHelpers::copyAttributes(list, el, kHideList); +                    el.removeAttribute(kAtList);                  }              }          } -        // A little cleanup of the stylesheet styles -        for(int i = 0; i < styles->getLength(); i++) +        // Break out pages and sections all the way to document +        if(name == kElPage || name == kElSect)          { -            DOM::Element style = (const DOM::Element&)styles->item(i); -            if(style != NULL) -                style.removeAttribute(kAtId); +            breakElement(el, kElDoc); + +            /* +             * NOTE: The flow of the document is changed here. But the current +             * element is still in a valid place for iterating over the document +             * so we don't have to worry about it. +             */          } -   } -} +        // Tags that just plain get removed +        if(m_removes.find(name) != m_removes.end()) +        { +            DOM::Node parent = el->getParentNode(); + +            if(parent != NULL) +            { +                /* +                 * After the element is removed, the current element is no longer +                 * valid for iterating over the document. In addition we insert +                 * all the child nodes of the current element before it. We need +                 * to be sure to iterate over these elements, and to do so we +                 * decrement the iterator. +                 */ +                --it; + +                while(el.hasChildNodes()) +                    parent.insertBefore(el.removeChild(el.getFirstChild()), el); + +                parent.removeChild(el); +                continue;   /* Current element doesn't need any more processing */ +            } +        } + + +        // Tags that need to get consolidated to start +        if(m_consolidateStart.find(name) != m_consolidateStart.end()) +            toStart.push(el); + +        // Tags that need to get consolidated to end +        else if(m_consolidateEnd.find(name) != m_consolidateEnd.end()) +            toEnd.push(el); + + +        // Tags for which duplicates need to be combined +        if(m_duplicates.find(name) != m_duplicates.end()) +        { +            DOM::Element parent = (const DOM::Element&)el.getParentNode(); +            if(parent != NULL) +            { +                // Loop till we find no more of the same +                for(;;) +                { +                    DOM::Node next = el.getNextSibling(); + +                    // If it's the same type of element ... +                    if(!DOMHelpers::isElement(next, name)) +                        break; + +                    // NOTE: Notice we do nothing with attributes. Currently +                    // all elements in the duplicates list don't need that. + +                    while(next.hasChildNodes()) +                        el.appendChild(next.removeChild(next.getFirstChild())); + +                    // Remove duplicate node +                    parent.removeChild(next); +                } +            } +        } +    } + +    // Complete consolidation to front +    while(!toStart.empty()) +    { +        DOM::Node node = toStart.top(); +        DOM::Node parent = node.getParentNode(); +        if(parent != NULL && DOMHelpers::hasAncestor(top, node)) +        { +            // Remove it from it's child +            parent.removeChild(el); + +            // And put at start of the document of the document +            top.insertBefore(el, top.getFirstChild()); +        } + +        toStart.pop(); +    } + +    // Complete consolidation to end +    while(!toEnd.empty()) +    { +        DOM::Node node = toEnd.top(); +        DOM::Node parent = node.getParentNode(); +        if(parent != NULL && DOMHelpers::hasAncestor(top, node)) +        { +            // Remove it from it's child +            parent.removeChild(el); + +            // And put at end of the document of the document +            top.appendChild(el); +        } + +        toEnd.pop(); +    } + +}  void XmlFixups::breakTables(DOM::Document& doc)  { @@ -526,258 +716,44 @@ void XmlFixups::breakTables(DOM::Document& doc)      }  } -void XmlFixups::removeTags(const DOM::Document& doc) -{ -    // Go through the list of nodes -    for(const char** t = kRemoveTags; *t != NULL; t++) -    { -        DOM::NodeList elements = doc.getElementsByTagName(*t); -        if(elements != NULL) -        { -            for(int j = 0; j < elements->getLength(); j++) -            { -                DOM::Element el = (const DOM::Element&)elements->item(j); -                DOM::Node parent = el->getParentNode(); -                if(parent == NULL) -                    continue; - -                while(el.hasChildNodes()) -                    parent.insertBefore(el.removeChild(el.getFirstChild()), el); - -                parent.removeChild(el); -            } -        } -    } -} - -void XmlFixups::fixLists(const DOM::Document doc) +void XmlFixups::fixBlock(const DOM::Document& doc, DOM::Element& block)  { -    // Get all the lists -    DOM::NodeList lists = doc.getElementsByTagName(kElList); -    if(lists != NULL) -    { -        // And all the list definitions -        DOM::NodeList listdefs = doc.getElementsByTagName(kElListdef); -        if(listdefs != NULL) -        { -            for(int i = 0; i < listdefs->getLength(); i++) -            { -                DOM::Element listdef = (const DOM::Element&)listdefs->item(i); +    // Okay now change blocks to whatever element they're supposed to be +    string fix; +    wstring val; -                if(listdef == NULL || !listdef.hasAttribute(kAtList)) -                    continue; +    DOM::Node parent = block.getParentNode(); -                for(int j = 0; j < lists->getLength(); j++) -                { -                    DOM::Element list = (const DOM::Element&)lists->item(j); -                    if(list != NULL) -                    { -                        if(list.getAttribute(kAtList) == listdef.getAttribute(kAtList)) -                        { -                            // And copy all the attributes from the list definition to the list -                            DOMHelpers::copyAttributes(listdef, list, kHideList); -                            list.removeAttribute(kAtList); -                        } -                    } -                } -            } -        } -    } -} - -void XmlFixups::fixBlocks(const DOM::Document doc) -{ -    // Get all the blocks -    DOM::NodeList blocks = doc.getElementsByTagName(kElBlock); -    if(blocks != NULL) +    if(parent != NULL)      { -        string fix; -        wstring val; +        // Figure out what kind of element they want block fixed to +        val = block.getAttribute(kAtFix); +        if(val.length() > 0) +            block.removeAttribute(kAtFix); -        for(int i = 0; i < blocks->getLength(); i++) +        // BUG: Sablotron bug work around +        if(val.length() > 0)          { -            DOM::Element block = (const DOM::Element&)blocks->item(i); -            DOM::Node parent = block.getParentNode(); - -            if(parent == NULL) -                continue; - -            fix.resize(0); -            val.resize(0); - -            // Figure out what kind of element they want block fixed to -            val = block.getAttribute(kAtFix); -            if(val.length() > 0) -                block.removeAttribute(kAtFix); - -            // BUG: Sablotron bug work around -            if(val.length() > 0) -            { -                val = block.getAttributeNS("", kAtFix); -                if(val.length() > 0) -                    block.removeAttributeNS("", kAtFix); -            } - +            val = block.getAttributeNS("", kAtFix);              if(val.length() > 0) -                DOM::transcode16to8(val, fix); - -            if(fix.length() == 0) -                fix = kElPara; - -            // Create duplicate of the 'fix' element -            DOM::Element el = doc.createElement(fix); -            DOMHelpers::copyAttributes(block, el, NULL); - -            // Replace block with the given 'fix' element -            while(block.hasChildNodes()) -                el.appendChild(block.removeChild(block.getFirstChild())); - -            parent.replaceChild(el, block); +                block.removeAttributeNS("", kAtFix);          } -    } -} -void XmlFixups::consolidateEndTags(DOM::Document& doc) -{ -    DOM::Element top = doc.getDocumentElement(); -    ASSERT(top != NULL); +        if(val.length() > 0) +            DOM::transcode16to8(val, fix); -    for(const char** t = kConsolidateEnd; *t != NULL; t++) -    { -        DOM::NodeList elements = doc.getElementsByTagName(*t); -        if(elements != NULL) -        { -            int x = elements->getLength(); -            for(int j = 0; j < x; j++) -            { -                // Make sure it's a valid element -                DOM::Element element = (const DOM::Element&)elements->item(j); -                if(element == NULL) -                    continue; - -                DOM::Element parent = (const DOM::Element&)element.getParentNode(); -                if(parent == NULL) -                    continue; - -                // Remove it from it's child -                parent.removeChild(element); - -                // And append it to the end of the document -                top.appendChild(element); -            } -        } -    } -} - -void XmlFixups::consolidateStartTags(DOM::Document& doc) -{ -    DOM::Element top = doc.getDocumentElement(); -    ASSERT(top != NULL); - -	DOM::Node first = top.getFirstChild(); +        if(fix.length() == 0) +            fix = kElPara; -    for(const char** t = kConsolidateStart; *t != NULL; t++) -    { -        DOM::NodeList elements = doc.getElementsByTagName(*t); -        if(elements != NULL) -        { -            int x = elements->getLength(); -            for(int j = 0; j < x; j++) -            { -                // Make sure it's a valid element -                DOM::Element element = (const DOM::Element&)elements->item(j); -                if(element == NULL || element == first) -                    continue; - -                DOM::Element parent = (const DOM::Element&)element.getParentNode(); -                if(parent == NULL) -                    continue; - -                // Remove it from it's child -                parent.removeChild(element); +        // Create duplicate of the 'fix' element +        DOM::Element el = doc.createElement(fix); +        DOMHelpers::copyAttributes(block, el, NULL); -                // And put at start of the document of the document -				ASSERT(first != NULL); -				top.insertBefore(element, first); -            } -        } -    } -} - -void XmlFixups::combineDuplicates(const DOM::Document& doc) -{ -    bool found; - -    do -    { -        found = false; - -        // Go through the list of nodes -        for(const char** t = kNoDuplicates; *t != NULL; t++) -        { -            DOM::NodeList elements = doc.getElementsByTagName(*t); -            if(elements != NULL) -            { -                int x = elements->getLength(); -                for(int j = 0; j < x; j++) -                { -                    // Make sure it's a valid element -                    DOM::Element element = (const DOM::Element&)elements->item(j); -                    if(element == NULL) -                        continue; - -                    // Get neighbors -                    DOM::Node previous = element.getPreviousSibling(); -                    DOM::Node next = element.getNextSibling(); - -                    // Make sure it's still in the document, as we may have -                    // removed it on a previous loop -                    DOM::Node parent = element.getParentNode(); -                    if(parent == NULL) -                        continue; - -                    // Combine previous if valid -                    if(previous != NULL && previous.getNodeType() == DOM::Node::ELEMENT_NODE && -                        DOMHelpers::isEqualElement((DOM::Element&)previous, element)) -                    { -                        while(previous.hasChildNodes()) -                        { -                            DOM::Node child = previous.removeChild(previous.getLastChild()); -                            if(child != NULL) -                            { -                                if(element.hasChildNodes()) -                                    element.insertBefore(child, element.getFirstChild()); -                                else -                                    element.appendChild(child); -                            } -                        } - -                        // Remove duplicate node -                        parent.removeChild(previous); -                        found = true; -                    } - -                    // Combine next if valid -                    if(next != NULL && next.getNodeType() == DOM::Node::ELEMENT_NODE && -                        DOMHelpers::isEqualElement((DOM::Element&)next, element)) -                    { -                        while(next.hasChildNodes()) -                        { -                            DOM::Node child = next.removeChild(next.getFirstChild()); -                            if(child != NULL) -                                element.appendChild(child); -                        } - -                        // Remove duplicate node -                        parent.removeChild(next); -                        found = true; -                    } -                } -            } -        } +        // Replace block with the given 'fix' element +        while(block.hasChildNodes()) +            el.appendChild(block.removeChild(block.getFirstChild())); -        // Keep looping until no more duplicates found +        parent.replaceChild(el, block);      } -    while(found);  } | 
