summaryrefslogtreecommitdiff
path: root/src/xmlfixups.h
blob: 99fe8765aad2cc39b299cb80962f7fe984479792 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
/*
 * Copyright (c) 2004, Nate Nielsen
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 *     * Redistributions of source code must retain the above
 *       copyright notice, this list of conditions and the
 *       following disclaimer.
 *     * Redistributions in binary form must reproduce the
 *       above copyright notice, this list of conditions and
 *       the following disclaimer in the documentation and/or
 *       other materials provided with the distribution.
 *     * The names of contributors to this software may not be
 *       used to endorse or promote products derived from this
 *       software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
 * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
 * DAMAGE.
 *
 *
 * CONTRIBUTORS
 *  Nate Nielsen <nielsen@memberwebs.com>
 *
 */

#ifndef __XMLFIXUPS_H__
#define __XMLFIXUPS_H__

#include "sablo.h"
#include "domhelpers.h"

/*
 * XmlFixups
 *
 * Because RTF is so 'different' (read: brain dead) we need to do all sorts
 * of antics to get it into a nice XML format. Some of the XML Composition
 * is done in XmlComposer, but whatever can't be done there as we're parsing
 * gets done here after the fact.
 *
 * These functions are called from XmlComposer::endDocument and massage the
 * resulting XML DOM into shape.
 */
class XmlFixups
{
public:
    XmlFixups();

    /*
     * Breaks a paragraph up through a previous level. Calls itself
     * recursively to break paragraphs totally free up to containing
     * destination.
     *
     * For example:
     *
     * <dest>
     *     This is <b> a <block fix="para"/>
     *    test of </b> your concentration.
     *  </dest>
     *
     * Becomes:
     *
     * <dest>
     *     This is <b> a </b><block fix="para"/>
     *   <b>test of </b> your concentration.
     * </dest>
     */
    bool breakElement(const DOM::Element& el, const string& contain);

    // Break all tags of a given type to a previous level (see above)
    void breakBreak(DOM::Document& doc, const string& contain, const string& tag);

    // Used to break tables cells and rows into blocks (but more complicated)
    void breakTags(DOM::Document& doc, const string& parentName, const string& tagName);

    // Fixes and combines list elements with the same id
    void breakLists(DOM::Document& document);

    // Used to find and create tables and perform initial break out
    void breakTables(DOM::Document& document);


    /*
     * Changes from a marker based paragraph system to a contained
     * paragraph system. Also applies paragraph attributes to the
     * appropriate paragraph.
     *
     * For example:
     *
     * <dest>
     *   This <blockattr style="10"> is <b> a <block fix="para"/>
     *     test of </b> your concentration.
     *  </dest>
     *
     * Becomes:
     *
     * <para style="10"> This is <b> a </b></para>
     * <para><b>test of </b> your concentration.</para>
     */
    void breakBlocks(DOM::Document& document);

    // Wrap certain tags in a wrapper tag of given name
    void wrapTags(DOM::Document& document, const string& tagName, const string& wrapName);

    // Remove certain tags from document
    void removeTags(const DOM::Document& doc);

    // Combines certain adjacent duplicate tags
    void combineDuplicates(const DOM::Document& doc);

    // Consolidates a certain tag types at the beginning of the document
    void consolidateStartTags(DOM::Document& doc);

    // Consolidates a certain tag types at the end of the document
    void consolidateEndTags(DOM::Document& doc);


    // The main pass 2 function
    void runPassTwo(const DOM::Document& doc);

    // Replace blocks with 'fix' elements like paragraphs
    void fixBlock(const DOM::Document& doc, DOM::Element& block);


protected:

    enum
    {
        PASS_0,
        PASS_1,
        PASS_TWO
    };

    // Our tables cached for efficiency
    StringSet m_duplicates;
    StringSet m_removes;
    StringSet m_removeEmpty;
    StringSet m_consolidateStart;
    StringSet m_consolidateEnd;
    StringSet m_requireAttrs;
};

#endif // __XMLFIXUPS_H__