From 099b3f5ad0a70070fd183b85aeee7d2420960f33 Mon Sep 17 00:00:00 2001 From: Stef Walter Date: Wed, 8 Jun 2005 16:14:54 +0000 Subject: Added option for pretty printing whitespace in XML. --- ChangeLog | 1 + src/rtfx.1 | 4 +++- src/rtfx.cpp | 6 +++++- src/xmlcomposer.cpp | 4 ++++ src/xmlcomposer.h | 1 + src/xmlfixups.cpp | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++ src/xmlfixups.h | 8 ++++++++ 7 files changed, 79 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index a82f430..05500be 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,7 @@ Version 0.9.6 - Add tag for hyperlink fields - Add 'align' attribute to tags for paragraph alignment - Add 'indent' attriute to tags for paragraph indent + - Now has option for pretty printing output XML Version 0.9.5 - Allow conversions on stdin and stdout diff --git a/src/rtfx.1 b/src/rtfx.1 index 64f8547..3d234c6 100644 --- a/src/rtfx.1 +++ b/src/rtfx.1 @@ -42,7 +42,7 @@ .Nd converts RTF to a generic XML format .Sh SYNOPSIS .Nm -.Op Fl p +.Op Fl pw .Op Ar inrtf .Op Ar outxml .Sh DESCRIPTION @@ -62,6 +62,8 @@ The options are as follows: Puts .Nm into 'presentation' output mode. +.It Fl w +Output XML is written with indents for easy reading. .It Ar inrtf The RTF file to convert. If not specified reads from stdin. .It Ar outxml diff --git a/src/rtfx.cpp b/src/rtfx.cpp index 8239b49..e305959 100644 --- a/src/rtfx.cpp +++ b/src/rtfx.cpp @@ -46,7 +46,7 @@ int usage() { - fprintf(stderr, "usage: rtfx [-p] [inrtf] [outxml]\n"); + fprintf(stderr, "usage: rtfx [-pw] [inrtf] [outxml]\n"); exit(2); } @@ -71,6 +71,10 @@ int main(int argc, char* argv[]) options.extras = true; break; + case 'w': + options.pretty = true; + break; + case '-': arg = NULL; break; diff --git a/src/xmlcomposer.cpp b/src/xmlcomposer.cpp index f10ce0c..6b1b725 100644 --- a/src/xmlcomposer.cpp +++ b/src/xmlcomposer.cpp @@ -131,6 +131,10 @@ void XmlComposer::endDocument() ASSERT(top != NULL); top.setAttribute(kNSPrefix, kNSRtfx); + // Pass 3: Pretty printing + if(m_options.pretty) + fix.prettyXml(m_document); + return; } diff --git a/src/xmlcomposer.h b/src/xmlcomposer.h index 26839d7..c9c321c 100644 --- a/src/xmlcomposer.h +++ b/src/xmlcomposer.h @@ -47,6 +47,7 @@ struct XmlComposerOptions { memset(this, 0, sizeof(*this)); } bool extras; + bool pretty; }; /* diff --git a/src/xmlfixups.cpp b/src/xmlfixups.cpp index 458798a..2c05e3a 100644 --- a/src/xmlfixups.cpp +++ b/src/xmlfixups.cpp @@ -65,6 +65,9 @@ static const char* kConsolidateEnd[] = static const char* kConsolidateStart[] = { kElStylesheet, kElInfo, NULL }; +static const char* kNoPretty[] = + { kElPara, NULL }; + void loadStringSet(StringSet& set, const char** strings) { while(*strings) @@ -74,6 +77,7 @@ void loadStringSet(StringSet& set, const char** strings) XmlFixups::XmlFixups() { loadStringSet(m_duplicates, kNoDuplicates); + loadStringSet(m_nopretty, kNoPretty); loadStringSet(m_removes, kRemoveTags); loadStringSet(m_removeEmpty, kRemoveEmpty); loadStringSet(m_requireAttrs, kRequireAttrs); @@ -791,3 +795,56 @@ void XmlFixups::fixBlock(const DOM::Document& doc, DOM::Element& block) parent.replaceChild(el, block); } } + +void XmlFixups::prettyXml(DOM::Document& doc) +{ + DOM::Element el = doc.getDocumentElement(); + internalPrettyXml(doc, el, 0); +} + +void XmlFixups::internalPrettyXml(const DOM::Document& doc, DOM::Element& el, + int level) +{ + DOM::Node child; + + // Empty elements remain as before + if(!el.hasChildNodes()) + return; + + // Make sure we're allowed to pretty print this + if(m_nopretty.find(el.getNodeName()) != m_nopretty.end()) + return; + + // First see if we have any text nodes + for(child = el.getFirstChild(); child != NULL; child = child.getNextSibling()) + { + // Any non-empty text nodes means we don't pretty print this + if(child.getNodeType() == DOM::Node::TEXT_NODE && + !child.getNodeValue().empty()) + return; + } + + // The main indent string + string indent("\r\n"); + indent.append((level + 1) * 4, ' '); + + for(child = el.getFirstChild(); child != NULL; child = child.getNextSibling()) + { + if(child.getNodeType() == DOM::Node::TEXT_NODE) + continue; + + DOM::Text text = doc.createTextNode(indent); + el.insertBefore(text, child); + + if(child.getNodeType() == DOM::Node::ELEMENT_NODE) + internalPrettyXml(doc, (DOM::Element&)child, level + 1); + } + + // The last indent + indent.assign("\r\n"); + indent.append(level * 4, ' '); + + DOM::Text text = doc.createTextNode(indent); + el.appendChild(text); +} + diff --git a/src/xmlfixups.h b/src/xmlfixups.h index f51d4a8..85d7c7e 100644 --- a/src/xmlfixups.h +++ b/src/xmlfixups.h @@ -133,7 +133,14 @@ public: // Replace blocks with 'fix' elements like paragraphs void fixBlock(const DOM::Document& doc, DOM::Element& block); + // Make XML ready for pretty printing + void prettyXml(DOM::Document& doc); +// Internal Helpers +protected: + void internalPrettyXml(const DOM::Document& doc, DOM::Element& el, int level); + +// Data protected: enum @@ -145,6 +152,7 @@ protected: // Our tables cached for efficiency StringSet m_duplicates; + StringSet m_nopretty; StringSet m_removes; StringSet m_removeEmpty; StringSet m_consolidateStart; -- cgit v1.2.3