From d2105fb3fb2082bb32875eaab6db6a2c04c7eae5 Mon Sep 17 00:00:00 2001 From: Stef Walter Date: Tue, 27 Jul 2004 21:22:58 +0000 Subject: - Preserve mode - Document options - Font fixes --- src/domhelpers.cpp | 14 ------------ src/rtfformatting.h | 2 +- src/rtfx.cpp | 50 +++++++++++++++++++++++++++++++++-------- src/tags.h | 17 +++++++++----- src/xmlcomposer.cpp | 64 ++++++++++++++++++++++++++++++++++------------------- src/xmlcomposer.h | 5 ++++- src/xmlfixups.cpp | 49 ++++++++++++++++++++++++++++++++++------ src/xmlfixups.h | 2 ++ 8 files changed, 143 insertions(+), 60 deletions(-) diff --git a/src/domhelpers.cpp b/src/domhelpers.cpp index 038e2df..4836d17 100644 --- a/src/domhelpers.cpp +++ b/src/domhelpers.cpp @@ -191,20 +191,6 @@ DOM::Element DOMHelpers::findChildElement(const DOM::Node& parent, const string& return DOM::Element(); } -DOM::Element DOMHelpers::findDescendantElement(const DOM::Node& parent, const string& name) -{ - DOM::Node child = parent.getFirstChild(); - while(child != NULL) - { - if(isElement(child, name)) - return (DOM::Element&)child; - - child = child.getNextSibling(); - } - - return DOM::Element(); -} - bool DOMHelpers::hasAncestor(const DOM::Node& ancestor, const DOM::Node& node) { DOM::Node n = node; diff --git a/src/rtfformatting.h b/src/rtfformatting.h index f2d0581..aa5ac58 100644 --- a/src/rtfformatting.h +++ b/src/rtfformatting.h @@ -155,7 +155,7 @@ public: void textSetFont(int font) { m_font = font; } void textSetFontSize(int fsize) - { m_fsize = fsize; } + { m_fsize = fsize == 24 ? -1 : fsize; } // default font size is always 24 void paraSetStyle(int style) { m_style = style; } void paraSetList(int list) diff --git a/src/rtfx.cpp b/src/rtfx.cpp index cbecdb9..541432e 100644 --- a/src/rtfx.cpp +++ b/src/rtfx.cpp @@ -46,23 +46,56 @@ int usage() { - fprintf(stderr, "usage: rtfx \n"); - return 2; + fprintf(stderr, "usage: rtfx [-p] \n"); + exit(2); } int main(int argc, char* argv[]) { - if(argc < 3) - return usage(); + XmlComposerOptions options; + + while(argc > 1) + { + argc--; + argv++; + + char* arg = argv[0]; + if(*arg != '-') + break; + + while(arg && *(++arg)) + { + switch(*arg) + { + case 'p': + options.extras = true; + break; + + case '-': + arg = NULL; + break; + + case '?': + default: + usage(); + } + } + + if(arg == NULL) + break; + } + + if(argc < 2) + usage(); try { // The input file - FILE* file = fopen(argv[1], "rb"); + FILE* file = fopen(argv[0], "rb"); if(!file) { - fprintf(stderr, "rtfx: couldn't open rtf file: %s: %s\n", argv[1], strerror(errno)); + fprintf(stderr, "rtfx: couldn't open rtf file: %s: %s\n", argv[0], strerror(errno)); return 1; } @@ -70,7 +103,6 @@ int main(int argc, char* argv[]) RtfParser rtf; // Interprets tags and blocks from RTFParser - XmlComposerOptions options; XmlComposer composer(options); rtf.setHandler(&composer); @@ -88,10 +120,10 @@ int main(int argc, char* argv[]) DOM::Document doc = composer.getDocument(); string xml = doc.serialize(); - FILE* out = fopen(argv[2], "wb"); + FILE* out = fopen(argv[1], "wb"); if(!out) { - fprintf(stderr, "rtfx: couldn't open file: %s: %s\n", argv[2], strerror(errno)); + fprintf(stderr, "rtfx: couldn't open file: %s: %s\n", argv[1], strerror(errno)); return 1; } diff --git a/src/tags.h b/src/tags.h index cd7b211..98787d9 100644 --- a/src/tags.h +++ b/src/tags.h @@ -39,17 +39,24 @@ #ifndef __TAGS_H__ #define __TAGS_H__ +/* + * IMPORTANT: When adding tags, attributes or values here make sure to update + * the XSL Schema for the output formats. + */ + static const char* kNSPrefix = "xmlns"; +static const wchar_t* kNSRtfx = L"http://memberwebs.com/ns/rtfx/"; // Tags used internally static const char* kElDest = "i_dest"; static const char* kElBlock = "i_block"; -static const char* kAtFix = "i_fix"; -static const char* kAtCell = "i_cell"; static const char* kElListtable = "i_listtable"; static const char* kElListdef = "i_listdef"; static const char* kElFontTable = "i_fonttable"; static const char* kElFontDef = "i_fontdef"; +static const char* kAtFix = "i_fix"; +static const char* kAtCell = "i_cell"; +static const char* kAtList = "i_list"; // All the main tags static const char* kElPara = "para"; @@ -70,7 +77,6 @@ static const char* kElHide = "hide"; static const char* kElI = "i"; static const char* kElStrike = "strike"; static const char* kElU = "u"; -static const char* kElColor = "color"; static const char* kElSuper = "super"; static const char* kElSub = "sub"; static const char* kElCell = "cell"; @@ -79,9 +85,9 @@ static const char* kElTable = "table"; static const char* kElFootNote = "footnote"; static const char* kElRef = "ref"; static const char* kElFont = "font"; +static const char* kElOptions = "options"; // Attributes -static const char* kAtList = "list"; static const char* kAtName = "name"; static const char* kAtBold = "bold"; static const char* kAtHidden = "hide"; @@ -93,7 +99,6 @@ static const char* kAtType = "type"; static const char* kAtOrdered = "ordered"; static const char* kAtStart = "start"; static const char* kAtId = "id"; -static const char* kAtIndex = "id"; static const char* kAtTo = "to"; static const char* kAtSize = "size"; @@ -109,5 +114,7 @@ static const wchar_t* kValFootNote = L"footnote"; static const wchar_t* kValList = L"list"; static const wchar_t* kValPara = L"para"; static const wchar_t* kValTable = L"table"; +static const wchar_t* kValTrue = L"true"; +static const wchar_t* kValZero = L"0"; #endif // __TAGS_H__ diff --git a/src/xmlcomposer.cpp b/src/xmlcomposer.cpp index 398cb55..a90c25a 100644 --- a/src/xmlcomposer.cpp +++ b/src/xmlcomposer.cpp @@ -111,13 +111,18 @@ void XmlComposer::startDocument(RtfParser* reader) ASSERT(m_document != NULL); // Hook up the top level element - m_curLevel->setElement(m_document.getDocumentElement(), true); + DOM::Element top = m_document.getDocumentElement(); + m_curLevel->setElement(top, true); // Set the attributes on the top level setAnalyser(AnalyserPtr(new Root)); setDestination(DestinationPtr(new Content)); getTextFormatting().resetPara(); getTextFormatting().resetText(); + + // Create the options element + m_docOptions = m_document.createElement(kElOptions); + top.appendChild(m_docOptions); } void XmlComposer::endDocument() @@ -151,6 +156,11 @@ void XmlComposer::endDocument() // Pass 3: Final cleanup // XmlFixups::combineDuplicates(m_document); + + DOM::Element top = m_document.getDocumentElement(); + ASSERT(top != NULL); + top.setAttribute(kNSPrefix, kNSRtfx); + return; } @@ -323,6 +333,12 @@ void XmlComposer::incrementAutoCount(int type) m_autocount[type]++; } +void XmlComposer::addDocumentOption(DOM::Element& option) +{ + ASSERT(m_docOptions != NULL); + m_docOptions.appendChild(option); +} + /* ---------------------------------------------------------------------------------- * CONVENIENCE MACROS USED BELOW @@ -694,23 +710,17 @@ ON_GROUPEND(Style) // Dig out all the formatting attributes if(props.textIsBold()) - AN_ATTRIBUTE(kAtBold, L"1"); + AN_ATTRIBUTE(kAtBold, kValTrue); if(props.textIsHidden()) - AN_ATTRIBUTE(kAtHidden, L"1"); + AN_ATTRIBUTE(kAtHidden, kValTrue); if(props.textIsItalic()) - AN_ATTRIBUTE(kAtItalic, L"1"); + AN_ATTRIBUTE(kAtItalic, kValTrue); if(props.textIsStrike()) - AN_ATTRIBUTE(kAtStrike, L"1"); + AN_ATTRIBUTE(kAtStrike, kValTrue); if(props.textIsUnderline()) - AN_ATTRIBUTE(kAtUnderline, L"1"); + AN_ATTRIBUTE(kAtUnderline, kValTrue); - if(DO_EXTRAS()) - { - // TODO: Do fonts here - - if(props.textColor() != -1) - AN_ATTRIBUTE(kAtColor, props.textColor()); - } + // TODO: Do fonts and colors here } @@ -728,6 +738,21 @@ ON_GROUPSTART(FontTable) AN_ANALYSER(Font); } +ON_DONE(FontTable) +{ + DOM::Element deffont = m_composer->createElement(kElFont); + + // Default font is always the first in the list + deffont->setAttribute(kAtId, kValZero); + + // Default size is always 12 pt + deffont->setAttribute(kAtSize, L"12"); + + // TODO: Is this correct? + deffont->setAttribute(kAtColor, kValZero); + + m_composer->addDocumentOption(deffont); +} // Font Analyser -------------------------------------------------------------------- @@ -1072,6 +1097,9 @@ ON_CHARDATA(Content) if(fontsize != -1) AN_ATTRIBUTE(kAtSize, fontsize); + if(format.textColor() != -1) + AN_ATTRIBUTE(kAtColor, format.textColor()); + elements++; } } @@ -1108,16 +1136,6 @@ ON_CHARDATA(Content) elements++; } - if(DO_EXTRAS()) - { - if(format.textColor() != -1) - { - AN_ELEMENT(kElColor); - AN_ATTRIBUTE(kAtIndex, format.textColor()); - elements++; - } - } - if(format.textSuScript() == RtfFormatting::SUPERSCRIPT) { AN_ELEMENT(kElSuper); diff --git a/src/xmlcomposer.h b/src/xmlcomposer.h index 83ea4cd..41aad0e 100644 --- a/src/xmlcomposer.h +++ b/src/xmlcomposer.h @@ -117,6 +117,8 @@ public: int getAutoCount(int type); void incrementAutoCount(int type); + // Add a document option to the option block + void addDocumentOption(DOM::Element& option); // Get the current formatting options RtfFormatting& getTextFormatting(); @@ -139,7 +141,7 @@ protected: DOM::Document m_document; // The current document XmlComposerOptions m_options; // Configurable options for parsing int m_autocount[AUTOCOUNT_MAX]; // Auto counters for the document - + DOM::Element m_docOptions; // For storing document options // Sub classes protected: @@ -250,6 +252,7 @@ protected: ANALYSER(FontTable) INITIALIZE GROUPSTART + DONE END_ANALYSER // Handle a Font in the Table diff --git a/src/xmlfixups.cpp b/src/xmlfixups.cpp index 10b35f1..45fa59a 100644 --- a/src/xmlfixups.cpp +++ b/src/xmlfixups.cpp @@ -42,16 +42,22 @@ #include "tags.h" static const char* kNoDuplicates[] = - { kElB, kElU, kElI, kElColor, kElHide, kElSuper, kElSub, NULL }; + { kElB, kElU, kElI, kElFont, kElHide, kElSuper, kElSub, NULL }; + +static const char* kRequireAttrs[] = + { kElFont, NULL }; static const char* kRemoveTags[] = - { kElDest, kElListdef, kElListtable, kElFontTable, NULL }; + { kElDest, kElListdef, kElListtable, kElFontTable, kElFontDef, NULL }; + +static const char* kRemoveEmpty[] = + { kElOptions, kElList, NULL }; static const char* kBlockTags[] = - { kElTable, kElPara, NULL }; + { kElTable, kElPara, NULL }; static const char* kHideList[] = - { kAtId, kAtList, NULL }; + { kAtId, kAtList, NULL }; static const char* kConsolidateEnd[] = { kElFootNote, NULL }; @@ -69,6 +75,8 @@ XmlFixups::XmlFixups() { loadStringSet(m_duplicates, kNoDuplicates); loadStringSet(m_removes, kRemoveTags); + loadStringSet(m_removes, kRemoveEmpty); + loadStringSet(m_requireAttrs, kRequireAttrs); loadStringSet(m_consolidateStart, kConsolidateStart); loadStringSet(m_consolidateEnd, kConsolidateEnd); } @@ -502,6 +510,8 @@ void XmlFixups::runPassTwo(const DOM::Document& doc) DOM::Element font = fonts.get(el.getAttribute(kAtId)); if(font != NULL) el.setAttribute(kAtName, font.getAttribute(kAtName)); + + font.removeAttribute(kAtId); } } @@ -532,8 +542,10 @@ void XmlFixups::runPassTwo(const DOM::Document& doc) */ } - // Tags that just plain get removed - if(m_removes.find(name) != m_removes.end()) + // Tags that get removed but contents preserved. Also here are + // tags that get removed if they have no attributes + if(m_removes.find(name) != m_removes.end() || + (m_requireAttrs.find(name) != m_requireAttrs.end() && el.hasAttributes())) { DOM::Node parent = el->getParentNode(); @@ -556,6 +568,26 @@ void XmlFixups::runPassTwo(const DOM::Document& doc) } } + // Tags that get removed when no child nodes exist + if(m_removeEmpty.find(name) != m_removeEmpty.end() && el.hasChildNodes()) + { + DOM::Node parent = el->getParentNode(); + + if(parent != NULL) + { + /* + * After the element is removed, the current element is no longer + * valid for iterating over the document. In addition we insert + * all the child nodes of the current element before it. We need + * to be sure to iterate over these elements, and to do so we + * decrement the iterator. + */ + --it; + + parent.removeChild(el); + continue; /* Current element doesn't need any more processing */ + } + } // Tags that need to get consolidated to start if(m_consolidateStart.find(name) != m_consolidateStart.end()) @@ -577,8 +609,11 @@ void XmlFixups::runPassTwo(const DOM::Document& doc) { DOM::Node next = el.getNextSibling(); + if(next == NULL || next.getNodeType() != DOM::Node::ELEMENT_NODE) + break; + // If it's the same type of element ... - if(!DOMHelpers::isElement(next, name)) + if(!DOMHelpers::isEqualElement((DOM::Element&)next, el)) break; // NOTE: Notice we do nothing with attributes. Currently diff --git a/src/xmlfixups.h b/src/xmlfixups.h index 01c2d67..99fe876 100644 --- a/src/xmlfixups.h +++ b/src/xmlfixups.h @@ -146,8 +146,10 @@ protected: // Our tables cached for efficiency StringSet m_duplicates; StringSet m_removes; + StringSet m_removeEmpty; StringSet m_consolidateStart; StringSet m_consolidateEnd; + StringSet m_requireAttrs; }; #endif // __XMLFIXUPS_H__ -- cgit v1.2.3