From 53914f770f1e1dc1ab4342c64846fd995825b7e6 Mon Sep 17 00:00:00 2001 From: Stef Date: Wed, 17 Sep 2003 18:34:42 +0000 Subject: Initial Import --- src/xmlcomposer.cpp | 1811 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1811 insertions(+) create mode 100644 src/xmlcomposer.cpp (limited to 'src/xmlcomposer.cpp') diff --git a/src/xmlcomposer.cpp b/src/xmlcomposer.cpp new file mode 100644 index 0000000..1bfeb30 --- /dev/null +++ b/src/xmlcomposer.cpp @@ -0,0 +1,1811 @@ +// RtfParser.cpp: implementation of the RtfParser class. +// +////////////////////////////////////////////////////////////////////// + +#include "stdafx.h" +#include "RtfAnalyser.h" + +const char* kElDest = "i_dest"; +const char* kElBlock = "i_block"; +const char* kAtFix = "i_fix"; +const char* kAtCell = "i_cell"; +const char* kElListtable = "i_listtable"; +const char* kElListdef = "i_listdef"; + +const char* kElPara = "para"; +const char* kElDoc = "document"; +const char* kElTab = "tab"; +const char* kElSect = "sect"; +const char* kElPage = "page"; +const char* kElStyle = "style"; +const char* kElLine = "line"; +const char* kElList = "list"; +const char* kElStylesheet = "stylesheet"; +const char* kElInfo = "info"; +const char* kElTitle = "title"; +const char* kElAuthor = "author"; +const char* kElOperator = "operator"; +const char* kElB = "b"; +const char* kElHide = "hide"; +const char* kElI = "i"; +const char* kElStrike = "strike"; +const char* kElU = "u"; +const char* kElColor = "color"; +const char* kElCell = "cell"; +const char* kElRow = "row"; +const char* kElTable = "table"; + +const char* kAtList = "list"; +const char* kAtName = "name"; +const char* kAtBold = "bold"; +const char* kAtHidden = "hide"; +const char* kAtItalic = "italic"; +const char* kAtStrike = "strike"; +const char* kAtUnderline = "underline"; +const char* kAtColor = "color"; +const char* kAtType = "type"; +const char* kAtOrdered = "ordered"; +const char* kAtStart = "start"; +const char* kAtId = "id"; +const char* kAtIndex = "id"; + +const wchar_t* kValDisc = L"disc"; +const wchar_t* kValLowerAlpha = L"lower-alpha"; +const wchar_t* kValUpperAlpha = L"upper-alpha"; +const wchar_t* kValLowerRoman = L"lower-roman"; +const wchar_t* kValUpperRoman = L"upper-roman"; +const wchar_t* kValArabic = L"arabic"; +const wchar_t* kValNull = L""; + +const wchar_t* kValList = L"list"; +const wchar_t* kValPara = L"para"; +const wchar_t* kValTable = L"table"; + +const char* kNoDuplicates[] = + { kElB, kElU, kElI, kElColor, kElHide, kElColor, NULL }; + +const char* kRemoveTags[] = + { kElDest, kElListdef, kElListtable, NULL }; + +const char* kBlockTags[] = + { kElTable, kElPara, NULL }; + +const char* kHideList[] = + { kAtId, kAtList, NULL }; + +const char* kNSPrefix = "xmlns"; + +////////////////////////////////////////////////////////////////////// +// Construction/Destruction +////////////////////////////////////////////////////////////////////// + +RtfParser::RtfParser(const RtfParserOptions& options) +{ + m_document = NULL; + memcpy(&m_options, &options, sizeof(options)); +} + +RtfParser::~RtfParser() +{ + clear(); + + if(m_impl != NULL) + m_impl.release(); +} + +void RtfParser::clear() +{ + if(m_document != NULL) + { + try + { + m_document.release(); + } + catch(...) { } + + m_document = NULL; + } + LevelHandler::clear(); +} + +void RtfParser::startDocument(RtfReader* reader) +{ + LevelHandler::startDocument(reader); + + // Create a new document + m_document = m_impl.createDocument("", kElDoc, DOM::DocumentType()); + + // TODO: Throw error if document is null + ASSERT(m_document != NULL); + + ASSERT(m_curLevel != NULL); + m_curLevel->setElement(m_document.getDocumentElement(), true); + + // Set the attributes on the top level + setAnalyser(AnalyserPtr(new Root)); + setDestination(DestinationPtr(new Content)); + getTextFormatting().resetPara(); + getTextFormatting().resetText(); +} + +void RtfParser::endDocument() +{ + LevelHandler::endDocument(); + + // Cleanup the tree + removeDuplicates(m_document); + breakTables(m_document); + breakTags(m_document, kElTable, kElRow); + breakTags(m_document, kElRow, kElCell); + wrapTags(m_document, kElCell, kElDest); + breakBlocks(m_document); + breakLists(m_document); + fixLists(m_document); + fixStyles(m_document); + fixBlocks(m_document); + removeTags(m_document); + breakBreak(m_document, kElDoc, kElPage); + breakBreak(m_document, kElDoc, kElSect); + return; +} + + + + + +// ----------------------------------------------------------------------- +// Helper functions + +DOM::Element RtfParser::createElement(const string& name) +{ + ASSERT(name.length() > 0); + return m_document.createElement(name); + + // TODO: Throw exception here if necessary +} + +void RtfParser::replaceElement(const DOM::Element& element) +{ + ASSERT(m_curLevel != NULL); + m_curLevel->setElement(element, true); +} + +void RtfParser::pushElement(const DOM::Element& element) +{ + ASSERT(m_curLevel != NULL); + getElement().appendChild(element); + m_curLevel->setElement(element); +} + +DOM::Element RtfParser::popElement() +{ + DOM::Element element = getElement(); + ASSERT(m_curLevel != NULL); + + DOM::Node parent = element.getParentNode(); + ASSERT(parent.getNodeType() == DOM::Node::ELEMENT_NODE); + + // Set it deep so it replaces the current element + m_curLevel->setElement((DOM::Element&)parent, true); + return element; +} + +void RtfParser::setAttribute(const string& name, const wstring& value, DOM::Element el) +{ + ASSERT(name.length() > 0); + if(el == NULL) + el = getElement(); + el.setAttribute(name, value); +} + +void RtfParser::setDestination(DestinationPtr dest) +{ + ASSERT(m_curLevel); + + m_curLevel->setDestination(dest); + dest->m_parser = this; + dest->initialize(); +} + +DestinationPtr RtfParser::replaceDestination(DestinationPtr dest) +{ + ASSERT(m_curLevel); + + DestinationPtr old = m_curLevel->getDestination(); + m_curLevel->setDestination(dest, true); + dest->m_parser = this; + dest->initialize(); + + return old; +} + + +void RtfParser::setAnalyser(AnalyserPtr analy) +{ + ASSERT(m_curLevel); + ASSERT(analy != NULL); + + analy->m_parser = this; + m_curLevel->setAnalyser(analy); + analy->initialize(); +} + +AnalyserPtr RtfParser::getAnalyser() +{ + ASSERT(m_curLevel); + return m_curLevel->getAnalyser(); +} + +DestinationPtr RtfParser::getDestination() +{ + ASSERT(m_curLevel); + return m_curLevel->getDestination(); +} + +RtfFormatting& RtfParser::getTextFormatting() +{ + ASSERT(m_curLevel); + return m_curLevel->getFormatting(); +} + + +// --------------------------------------------------------------------------------- +// Pass this stuff on through to the appropriate analysers etc... + +void RtfParser::charData(wstring data) +{ + ASSERT(m_curLevel != NULL); + DestinationPtr destination = m_curLevel->getDestination(); + if(destination) + { + destination->charData(data); + } + else + { + destination = DestinationPtr(new Content); + setDestination(destination); + } + +} + +void RtfParser::controlWord(const string& cw, int flags, int param) +{ + ASSERT(m_curLevel != NULL); + AnalyserPtr analyser = m_curLevel->getAnalyser(); + if(analyser) + analyser->controlWord(cw, flags, param); +} + +void RtfParser::groupStart() +{ + LevelHandler::groupStart(); + + ASSERT(m_curLevel != NULL); + AnalyserPtr analyser = m_curLevel->getAnalyser(); + if(analyser) + analyser->groupStart(); +} + +void RtfParser::groupEnd() +{ + ASSERT(m_curLevel != NULL); + AnalyserPtr analyser = m_curLevel->getAnalyser(); + if(analyser) + analyser->groupEnd(); + + LevelHandler::groupEnd(); +} + +#define ON_INITIALIZE(cls) \ + void RtfParser::cls::initialize() +#define ON_CONTROLWORD(cls) \ + void RtfParser::cls::controlWord(const string& cw, int flags, int param) +#define ON_CHARDATA(cls) \ + void RtfParser::cls::charData(wstring data) +#define ON_GROUPSTART(cls) \ + void RtfParser::cls::groupStart() +#define ON_GROUPEND(cls) \ + void RtfParser::cls::groupEnd() +#define ON_DONE(cls) \ + void RtfParser::cls::done() +#define AN_ELEMENT(name) \ + m_parser->pushElement(m_parser->createElement(name)) +#define AN_POP_ELEMENT() \ + m_parser->popElement() +#define AN_ATTRIBUTE(name, value) \ + m_parser->setAttribute(name, value) +#define AN_DESTINATION_ATTR(name) \ + m_parser->setDestination(new Attribute(name)) +#define AN_DESTINATION(cls) \ + m_parser->setDestination(new cls) +#define AN_ANALYSER(cls) \ + m_parser->setAnalyser(AnalyserPtr(new cls)) +#define AN_SET_ANALYSER(cls) \ + m_parser->setAnalyser(AnalyserPtr(cls)) +#define HAS_PARAM (flags & kHasParam) +#define DEFAULT_CONTROLWORD processDefault(cw, flags, param) +#define DUMMY 1 == 1 +#define NUM_ATTR(n) m_parser->formatInt(n) + +bool RtfParser::ParseAnalyser::processDefault(const string& cw, int flags, int param) +{ + if(cw == "upr") + { + AnalyserPtr analy = m_parser->getAnalyser(); + ASSERT(analy != NULL); + AN_SET_ANALYSER(new Upr(analy)); + return true; + } + + return false; +} + +void RtfParser::ParseAnalyser::applyParaFormatting(RtfFormatting* format, + DOM::Element& el) +{ + if(format == NULL) + format = &(m_parser->getTextFormatting()); + + wstring fix = kValPara; + + int list = format->paraList(); + if(list != -1) + { + el.setAttribute(kAtList, NUM_ATTR(list)); + } + else + { + el.removeAttribute(kAtList); + } + + if(format->paraInTable()) + el.setAttribute(kAtCell, L"1"); + else + el.removeAttribute(kAtCell); + + int style = format->paraStyle(); + if(style != -1) + el.setAttribute(kElStyle, NUM_ATTR(style)); + else + el.removeAttribute(kElStyle); + + el.setAttribute(kAtFix, fix); +} + +DOM::Element RtfParser::ParseAnalyser::getCurrentBlock() +{ + DOM::Node node = m_parser->getElement(); + + if(node.hasChildNodes()) + node = node.getLastChild(); + + return m_parser->getPriorElement(node, kElBlock); + +} + +bool RtfParser::ParseAnalyser::processTextContent(const string& cw, int flags, int param) +{ + DOM::Element el; + bool process = false; + + RtfFormatting& format = m_parser->getTextFormatting(); + + if(cw == "par") + { + el = getCurrentBlock(); + if(el != NULL) + applyParaFormatting(&format, el); + + el = m_parser->createElement(kElBlock); + applyParaFormatting(&format, el); + } + + else if(cw == "intbl") + format.paraSetTable(true); + + else if(cw == "cell") + { + el = getCurrentBlock(); + if(el != NULL) + applyParaFormatting(&format, el); + + el = m_parser->createElement(kElCell); + m_parser->pushElement(el); + m_parser->popElement(); + el = m_parser->createElement(kElBlock); + applyParaFormatting(&format, el); + } + + else if(cw == "trowd") + el = m_parser->createElement(kElRow); + + else if(cw == "tab") + el = m_parser->createElement(kElTab); + + else if(cw == "sect") + el = m_parser->createElement(kElSect); + + else if(cw == "page") + el = m_parser->createElement(kElPage); + + else if(cw == "s" && HAS_PARAM) + format.paraSetStyle(param); + + else if(cw == "line") + el = m_parser->createElement(kElLine); + + else if(cw == "header") + AN_ANALYSER(Skip); + else if(cw == "footer") + AN_ANALYSER(Skip); + else if(cw == "bkmkstart") + AN_ANALYSER(Skip); + else if(cw == "listtext") + AN_ANALYSER(Skip); + + else if(cw == "ls" && HAS_PARAM) + format.paraSetList(param); + + if(el != NULL) + { + // This ensures that our content destination is open and ready + DestinationPtr dest = m_parser->getDestination(); + ASSERT(dest != NULL); + dest->charData(kValNull); + + m_parser->pushElement(el); + m_parser->popElement(); + } + + return (el != NULL) || process; + + /* TODO: cell, row, intbl, cellx, trowd*/ +} + +bool RtfParser::ParseAnalyser::processTextFormatting(const string& cw, int flags, + int param, RtfFormatting& format) +{ + bool on = true; + if(flags & HAS_PARAM && param == 0) + on = false; + + if(cw == "pard") + { + format.resetPara(); +// applyParaFormatting(); + } + else if(cw == "plain") + format.resetText(); + else if(cw == "b") + format.textSetBold(on); + else if(cw == "i") + format.textSetItalic(on); + else if(cw == "v") + format.textSetHidden(on); + else if(cw == "ul") + format.textSetUnderline(on); + else if(cw == "cf" && HAS_PARAM) + format.textSetColor(param); + else + return false; + + return true; +} + +bool RtfParser::ParseAnalyser::processTextFormatting(const string& cw, int flags, int param) +{ + return processTextFormatting(cw, flags, param, m_parser->getTextFormatting()); +} + + +ON_INITIALIZE(Skip) + { AN_DESTINATION(Null); } +ON_GROUPSTART(Skip) + { AN_ANALYSER(Skip); } + + +RtfParser::Upr::Upr(AnalyserPtr prv) +{ + ASSERT(prv); + prev = prv; +} +ON_GROUPSTART(Upr) + { AN_ANALYSER(Skip); } +ON_GROUPEND(Upr) +{ + ASSERT(prev); + m_parser->setAnalyser(prev); + prev = NULL; +} + + +ON_INITIALIZE(Stylesheet) +{ + AN_ELEMENT(kElStylesheet); +} +ON_GROUPSTART(Stylesheet) +{ + AN_ANALYSER(Style); + AN_DESTINATION(Null); +} + + + +ON_INITIALIZE(Style) +{ + // Were not sure if this element is really something + // so we can't always create + haveStyle = false; +} +ON_CONTROLWORD(Style) +{ + // Get the style id + if(flags & kAsterisk) + { + AN_ANALYSER(Skip); + return; + } + + if(!haveStyle) + { + AN_ELEMENT(kElStyle); + AN_DESTINATION_ATTR(kAtName); + haveStyle = true; + } + + if(cw == "s" && flags & kHasParam) + { + AN_ATTRIBUTE(kAtId, NUM_ATTR(param)); + } + + // Otherwise get as much formatting out of the tag as possible + else if(processTextFormatting(cw, flags, param)) + DUMMY; + + else + DEFAULT_CONTROLWORD; +} +ON_GROUPSTART(Style) +{ + AN_ANALYSER(Skip); +} +ON_GROUPEND(Style) +{ + RtfFormatting& props = m_parser->getTextFormatting(); + if(props.textIsBold()) + AN_ATTRIBUTE(kAtBold, L"1"); + if(props.textIsHidden()) + AN_ATTRIBUTE(kAtHidden, L"1"); + if(props.textIsItalic()) + AN_ATTRIBUTE(kAtItalic, L"1"); + if(props.textIsStrike()) + AN_ATTRIBUTE(kAtStrike, L"1"); + if(props.textIsUnderline()) + AN_ATTRIBUTE(kAtUnderline, L"1"); + if(props.textColor() != -1 && m_parser->getOptions().doColors) + AN_ATTRIBUTE(kAtColor, NUM_ATTR(props.textColor())); +} + + + +ON_INITIALIZE(ListTable) +{ + AN_ELEMENT(kElListtable); +} +ON_GROUPSTART(ListTable) +{ + AN_ANALYSER(List); + AN_DESTINATION(Null); +} + + + +ON_INITIALIZE(List) +{ + AN_ELEMENT(kElListdef); + AN_ATTRIBUTE(kAtType, kValDisc); + AN_ATTRIBUTE(kAtOrdered, L"0"); + levelsSeen = 0; +} +ON_CONTROLWORD(List) +{ + if(cw == "listname") + AN_DESTINATION_ATTR(kAtName); + else if(cw == "listid" && HAS_PARAM) + AN_ATTRIBUTE(kAtId, NUM_ATTR(param)); + + // We let listlevel in here too + else if(cw == "levelstartat" && HAS_PARAM) + AN_ATTRIBUTE(kAtStart, NUM_ATTR(param)); + + else if(cw == "levelnfc" && HAS_PARAM) + { + switch(param) + { + case 0: // 1, 2, 3 + case 5: // 1st, 2nd, 3rd + case 6: // One, Two, Three + case 7: // First, Second, Third + case 22: // 01, 02, 03 + AN_ATTRIBUTE(kAtType, kValArabic); + break; + case 1: // I, II, III + AN_ATTRIBUTE(kAtType, kValUpperRoman); + break; + case 2: // i, ii, iii + AN_ATTRIBUTE(kAtType, kValLowerRoman); + break; + case 3: // A, B, C + AN_ATTRIBUTE(kAtType, kValUpperAlpha); + break; + case 4: // a, b, c + AN_ATTRIBUTE(kAtType, kValLowerAlpha); + break; + default: + AN_ATTRIBUTE(kAtType, kValDisc); + break; + } + + switch(param) + { + case 0: case 5: case 6: case 7: case 22: + case 1: case 2: case 3: case 4: + AN_ATTRIBUTE(kAtOrdered, L"1"); + break; + default: + AN_ATTRIBUTE(kAtOrdered, L"0"); + } + } + + else + DEFAULT_CONTROLWORD; +} +ON_GROUPSTART(List) +{ + if(levelsSeen > 0) + AN_ANALYSER(Skip); + levelsSeen++; +} + + + + +ON_INITIALIZE(ListOverrideTable) +{ + DOM::Document document = m_parser->getDocument(); + lists = document.getElementsByTagName(kElListdef); + curList = NULL; + lsId = -1; +} +ON_GROUPSTART(ListOverrideTable) +{ + AN_DESTINATION(Null); +} +ON_CONTROLWORD(ListOverrideTable) +{ + // New list override clear + if(cw == "listoverride") + curList = NULL; + + // List id for current listoverride + else if(cw == "listid" && HAS_PARAM) + { + wstring id = NUM_ATTR(param); + + if(lists != NULL) + { + for(int i = 0; i < lists->getLength(); i++) + { + DOM::Node node = lists->item(i); + if(node != NULL && node.getNodeType() == DOM::Node::ELEMENT_NODE) + { + DOM::Element element = (DOM::Element&)node; + if(element.getAttribute(kAtId) == id) + { + curList = element; + break; + } + } + } + } + } + + // The actual list code + else if(cw == "ls" && HAS_PARAM) + lsId = param; + + // Override the starting level for the node + else if(cw == "levelstartat" && HAS_PARAM) + { + if(curList != NULL) + curList.setAttribute(kAtStart, NUM_ATTR(param)); + } + + else + DEFAULT_CONTROLWORD; + + + // Okay before any overrides take effect we need to duplicate + // the list node for overriding, using the 'listid' and 'ls' we gathered + if(curList != NULL && lsId != -1) + { + DOM::Element parent = (DOM::Element&)curList.getParentNode(); + if(parent != NULL) + { + curList = (DOM::Element&)curList.cloneNode(true); + if(curList != NULL) + { + parent.appendChild(curList); + curList.setAttribute(kAtList, NUM_ATTR(lsId)); + } + } + + lsId = -1; + } + +} +ON_GROUPEND(ListOverrideTable) +{ + +} + + + + +ON_INITIALIZE(Info) +{ + // Create a new element + AN_ELEMENT(kElInfo); + AN_DESTINATION(Null); +} +ON_CONTROLWORD(Info) +{ + // The title + if(cw == "title") + { + AN_ELEMENT(kElTitle); + AN_DESTINATION(Raw); + } + else if(cw == "author") + { + AN_ELEMENT(kElAuthor); + AN_DESTINATION(Raw); + } + else if(cw == "operator") + { + AN_ELEMENT(kElOperator); + AN_DESTINATION(Raw); + } + else if(flags & kAsterisk) + AN_ANALYSER(Skip); + else + DEFAULT_CONTROLWORD; +} + + + + +ON_INITIALIZE(Root) +{ + +} +ON_CONTROLWORD(Root) +{ + if(cw == "stylesheet") + AN_ANALYSER(Stylesheet); + else if(cw == "listtable") + AN_ANALYSER(ListTable); + else if(cw == "listoverridetable") + AN_ANALYSER(ListOverrideTable); + else if(cw == "info") + AN_ANALYSER(Info); + else if(cw == "fonttbl") + AN_ANALYSER(Skip); + else if(cw == "colortbl") + AN_ANALYSER(Skip); + else if(cw == "pict") + { + AN_ANALYSER(Skip); + AN_DESTINATION(Null); + } + else if(flags & kAsterisk) + AN_ANALYSER(Skip); + else if(processTextContent(cw, flags, param)) + DUMMY; + else if(processTextFormatting(cw, flags, param)) + DUMMY; + else + DEFAULT_CONTROLWORD; +} + + +ON_INITIALIZE(Content) +{ + parent = m_parser->getElement(); + created = false; +} +ON_CHARDATA(Content) +{ + // Create the first time we get content + if(!created) + { + DOM::Element dest = m_parser->createElement(kElDest); + parent.appendChild(dest); + m_parser->replaceElement(dest); + + DOM::Element el = m_parser->createElement(kElBlock); + m_parser->pushElement(el); + m_parser->popElement(); + + created = true; + } + + if(data.length() == 0) + return; + + int elements = 0; + RtfFormatting& format = m_parser->getTextFormatting(); + + // Now do text Properties if necessary + if(format.textIsBold()) + { + AN_ELEMENT(kElB); + elements++; + } + if(format.textIsHidden()) + { + AN_ELEMENT(kElHide); + elements++; + } + if(format.textIsItalic()) + { + AN_ELEMENT(kElI); + elements++; + } + if(format.textIsStrike()) + { + AN_ELEMENT(kElStrike); + elements++; + } + if(format.textIsUnderline()) + { + AN_ELEMENT(kElU); + elements++; + } + if(format.textColor() != -1 && m_parser->getOptions().doColors) + { + AN_ELEMENT(kElColor); + AN_ATTRIBUTE(kAtIndex, NUM_ATTR(format.textColor())); + elements++; + } + + // Write the data to the element + m_parser->getElement().appendChild( + m_parser->getDocument().createTextNode(data)); + + // Now drop out of all the above formatting + while(elements-- > 0) + AN_POP_ELEMENT(); +} + +#if 0 +ON_INITIALIZE(Table) +{ + stack = 0; + level = m_parser->getLevel(); + AN_ELEMENT(kElTable); + AN_DESTINATION(Content); +} + +ON_CONTROLWORD(Table) +{ + ASSERT(stack >= 0); + ASSERT(level != NULL); + + if(cw == "trowd") + { + stack++; + } + else if(cw == "row") + { + stack--; + if(stack <= 0) + m_parser->rewindLevel(level); + } + + else if(processTextContent(cw, flags, param)) + DUMMY; + else if(processTextFormatting(cw, flags, param)) + DUMMY; + else + DEFAULT_CONTROLWORD; + + if(!m_parser->getTextFormatting().paraInTable()) + { + m_parser->rewindLevel(level); + } + +} +#endif + + + + +ON_CHARDATA(Raw) +{ + // Write the data to the element + m_parser->getElement().appendChild( + m_parser->getDocument().createTextNode(data)); +} + + + + +ON_INITIALIZE(Attribute) +{ + element = m_parser->getElement(); + ASSERT(element != NULL); +} + +ON_CHARDATA(Attribute) +{ + // Get the current value + wstring cur = element.getAttribute(name); + + if(data.at(data.size() - 1) == L';') + data.resize(data.size() - 1); + + // Append data + cur.append(data); + + // Write it back + element.setAttribute(name, cur); +} + + + + + +/** + * A quick check to see if a node is an element of a certain + * name + */ +bool RtfParser::isElement(const DOM::Node& node, const string& name) +{ + return node != NULL && node.getNodeType() == DOM::Node::ELEMENT_NODE && + node.getNodeName() == name; +} + +bool RtfParser::isEqualElement(const DOM::Element& el1, const DOM::Element& el2) +{ + if(el1.getNodeName() == el2.getNodeName()) + return false; + + DOM::NamedNodeMap at1 = el1.getAttributes(); + DOM::NamedNodeMap at2 = el2.getAttributes(); + + if(at1 == NULL && at2 == NULL) + return true; + + if(at1 == NULL || at2 == NULL || + at1->getLength() != at2->getLength()) + return false; + + for(int i = 0; i < at1->getLength(); i++) + { + DOM::Attr attr1 = (DOM::Attr&)at1->item(0); + if(attr1 != NULL) + return false; + + DOM::Attr attr2 = (DOM::Attr&)at2->getNamedItem(attr1.getNodeName()); + if(attr2 != NULL) + return false; + + if(attr1.getNodeValue() == attr2.getNodeValue()) + return false; + } + + return true; +} + +wstring RtfParser::formatInt(int num) +{ + wstring n; + n.format(L"%d", num); + return n; +} + +/** + * Gets the pertinent ancestor of this node, or returns null + * if not found. + */ +DOM::Element RtfParser::getContainingElement(const DOM::Node& node, const string& name) +{ + DOM::Node n = node; + + while(true) + { + n = n.getParentNode(); + if(n == NULL) + break; + + if(isElement(n, name)) + return (DOM::Element&)n; + } + + return DOM::Element(); +} + +DOM::Element RtfParser::getPriorElement(const DOM::Node& node, const string& name) +{ + DOM::Node n = node; + + while(n != NULL) + { + if(isElement(n, name)) + return (DOM::Element&)n; + + n = n.getPreviousSibling(); + } + + DOM::Node parent = node.getParentNode(); + + if(parent == NULL) + return DOM::Element(); + else + return getPriorElement(parent, name); +} + +bool isNsAttr(const string& name) +{ + return strncmp(name.c_str(), kNSPrefix, strlen(kNSPrefix)) ? false : true; +} + +void RtfParser::copyAttributes(const DOM::Element& src, DOM::Element& dest, + const char** hideList) +{ + // Now get both sets of attributes + DOM::NamedNodeMap srcMap = src.getAttributes(); + DOM::NamedNodeMap destMap = dest.getAttributes(); + + if(srcMap == NULL || destMap == NULL) + return; + + // And copy them from one to the other + for(int j = 0; j < srcMap->getLength(); j++) + { + DOM::Node attr = srcMap->item(j); + if(attr != NULL) + { + // BUG: Sablotron seems to have a bug in it's + // setAttributeNode implementation. It always + // adds a blank namespace + // attr = attr.cloneNode(false); + // if(attr != NULL) + // destMap.setNamedItem(attr); + + string name = attr.getNodeName(); + + if(hideList) + { + + for(const char** t = hideList; *t != NULL; t++) + { + if(name == *t) + name.clear(); + } + } + + if(name.length() > 0 && !isNsAttr(name)) + dest.setAttribute(attr.getNodeName(), attr.getNodeValue()); + } + } +} + + +void RtfParser::breakBreak(DOM::Document& doc, const string& contain, + const string& tag) +{ + DOM::NodeList els = doc.getElementsByTagName(tag); + if(els != NULL) + { + for(int i = 0; i < els->getLength(); i++) + { + DOM::Element el = (DOM::Element&)els->item(i); +#if 0 + // See if parent node only has this break tag + // in it. If so then replace parent with this + + DOM::Node parent = el.getParentNode(); + + if(parent != NULL) + { + DOM::Node grandparent = parent.getParentNode(); + + if(grandparent != NULL && + el.getPreviousSibling() == NULL && + el.getNextSibling() == NULL) + { + grandparent.replaceChild(parent.removeChild(el), parent); + } + } +#endif + + breakElement(el, contain); + } + } +} + +/** + * Breaks a paragraph up through a previous level. Calls itself + * recursively to break paragraphs totally free up to containing + * destination. + * + * For example: + * + * + * This is a + * test of your concentration. + * + * + * Becomes: + * + * + * This is a + * test of your concentration. + * + */ +bool RtfParser::breakElement(DOM::Element& el, const string& contain) +{ + ASSERT(el != NULL); + + DOM::Element parent = (DOM::Element&)el.getParentNode(); + DOM::Element grandparent; + + string s = el.getNodeName(); + s = parent.getNodeName(); + + // Get the parent node + if(parent != NULL) + grandparent = (DOM::Element&)parent.getParentNode(); + + // Make sure we have something to work with before continuing + if(grandparent == NULL || parent == NULL || + isElement(parent, contain)) + return true; + + DOM::Node e; + + // Check to see if this is the first node in the parent. + // If so then just move out to before + if(el.getPreviousSibling() == NULL) + { + e = grandparent.insertBefore(parent.removeChild(el), parent); + } + + + // Check to see if this is the last node in the parent. + // If so then just move out to after the parent + else if(el.getNextSibling() == NULL) + { + DOM::Node next = parent.getNextSibling(); + if(next == NULL) + e = grandparent.appendChild(parent.removeChild(el)); + else + e = grandparent.insertBefore(parent.removeChild(el), next); + } + + + // Otherwise it's in the middle so split the parent + // element etc... + else + { + // Clone it but not deep + DOM::Element parent2 = (DOM::Element&)parent.cloneNode(false); + + if(parent2 == NULL) + return false; + + // Flag that tells us whether we moved anything up to parent + bool moved = false; + + // Now move all nodes after this one to the second parent. + while((e = el.getNextSibling()) != NULL) + { + parent2.appendChild(parent.removeChild(e)); + moved = true; + } + + // Remove the element from it's parent + e = parent.removeChild(el); + + // Okay now we move the paragraph up to the parent + insertAfter(grandparent, e, parent); + if(moved) + insertAfter(grandparent, parent2, e); + } + + // Now call it again with the paragraph in the new position + // untill everything's cut through! + return breakElement((DOM::Element&)e, contain); +} + +/** + * Changes from a marker based paragraph system to a contained + * paragraph system. Also applies paragraph attributes to the + * appropriate paragraph. + * + * For example: + * + * + * This is a + * test of your concentration. + * + * + * Becomes: + * + * This is a + * test of your concentration. + */ +void RtfParser::breakBlocks(DOM::Document& document) +{ + // First break out all the paragraphs to the destination level + DOM::NodeList blocks = document.getElementsByTagName(kElBlock); + if(blocks != NULL) + { + for(int i = 0; i < blocks->getLength(); i++) + { + DOM::Element block = (DOM::Element&)blocks->item(i); + + // If it's the single closed style para then break it + if(block != NULL && !block.hasChildNodes()) + breakElement(block, kElDest); + } + } + + + // Now group stuff in destinations into paras or other blocks + DOM::NodeList destinations = document.getElementsByTagName(kElDest); + if(destinations != NULL) + { + for(int i = 0; i < destinations->getLength(); i++) + { + DOM::Element dest = (DOM::Element&)destinations->item(i); + + // Sanity Check + if(dest == NULL || !dest.hasChildNodes()) + continue; + + // Go through the children of this destination + DOM::Node child = dest.getFirstChild(); + + DOM::Element block; + + while(child != NULL) + { + // If it's a block + if(isElement(child, kElBlock)) + { + block = (DOM::Element&)child; + child = child.getNextSibling(); + continue; + } + + // If it's already a real block element + for(const char** t = kBlockTags; *t != NULL; t++) + { + if(isElement(child, *t)) + { + block = NULL; + break; + } + } + + // If there's a block then add to it + if(block != NULL) + { + block.appendChild(dest.removeChild(child)); + child = block; + } + + child = child.getNextSibling(); + } + } + } +} + +void RtfParser::wrapTags(DOM::Document& doc, const string& tagName, + const string& wrapName) +{ + DOM::NodeList tags = doc.getElementsByTagName(tagName); + if(tags != NULL) + { + for(int i = 0; i < tags->getLength(); i++) + { + DOM::Element tag = (DOM::Element&)tags->item(i); + + DOM::Element wrap = doc.createElement(wrapName); + while(tag.hasChildNodes()) + wrap.appendChild(tag.removeChild(tag.getFirstChild())); + + tag.appendChild(wrap); + } + } +} + +void RtfParser::breakTags(DOM::Document& doc, const string& parentName, + const string& tagName) +{ + DOM::NodeList parents = doc.getElementsByTagName(parentName); + if(parents != NULL) + { + for(int i = 0; i < parents->getLength(); i++) + { + DOM::Element parent = (DOM::Element&)parents->item(i); + + if(!parent.hasChildNodes()) + continue; + + DOM::NodeList tags = parent.getElementsByTagName(tagName); + if(tags != NULL) + { + for(int i = 0; i < tags->getLength(); i++) + breakElement((DOM::Element&)tags->item(i), parentName); + } + + DOM::Node tag = doc.createElement(tagName); + parent.insertBefore(tag, parent.getFirstChild()); + + DOM::Node child = tag; + + while(child != NULL && (child = child.getNextSibling()) != NULL) + { + if(isElement(child, kElBlock)) + { + DOM::Node next = child.getNextSibling(); + if(next == NULL) + { + parent.removeChild(child); + continue; + } + + if(isElement(next, tagName)) + { + DOM::Node twodown = next.getNextSibling(); + if(!isElement(twodown, kElBlock)) + { + child = parent.insertBefore(parent.removeChild(next), child); + } + else + { + parent.removeChild(child); + child = next; + } + } + } + + if(isElement(child, tagName)) + { + if(!tag.hasChildNodes()) + parent.removeChild(tag); + tag = child; + } + else + { + tag.appendChild(parent.removeChild(child)); + child = tag; + } + } + + if(!tag.hasChildNodes()) + parent.removeChild(tag); + } + } + + DOM::NodeList tags = doc.getElementsByTagName(tagName); + if(tags != NULL) + { + for(int i = 0; i < tags->getLength(); i++) + { + DOM::Element tag = (DOM::Element&)tags->item(i); + DOM::Node parent = tag.getParentNode(); + + if(parent != NULL && !isElement(parent, parentName)) + parent.removeChild(tag); + +#if 0 + else if(tag.hasChildNodes()) + { + DOM::NodeList children = tag.getChildNodes(); + if(children != NULL && children->getLength() == 1) + { + DOM::Node child = children->item(0); + if(child != NULL && !child.hasChildNodes() && + isElement(child, kElBlock)) + parent.removeChild(tag); + } + } +#endif + + } + } +} + +void RtfParser::breakLists(DOM::Document& doc) +{ + // Now group stuff in destinations into tables + DOM::NodeList destinations = doc.getElementsByTagName(kElDest); + if(destinations != NULL) + { + for(int i = 0; i < destinations->getLength(); i++) + { + DOM::Element dest = (DOM::Element&)destinations->item(i); + + // Sanity Check + if(dest == NULL) + continue; + + // Go through the children of this destination + DOM::Node child = dest.getFirstChild(); + + DOM::Element list; + DOM::Element e; + + wstring previd; + + while(child != NULL) + { + // If it's a block and has a cell attribute + if(isElement(child, kElBlock)) + { + e = (DOM::Element&)child; + + // if it has a cell attribute + wstring listid = e.getAttribute(kAtList); + if(listid.length() > 0) + { + e.removeAttribute(kAtList); + + if(list == NULL || previd != listid) + { + list = doc.createElement(kElList); + list.setAttribute(kAtList, listid); + dest.insertBefore(list, child); + previd = listid; + } + } + else + { + list = NULL; + previd.clear(); + } + } + + // It's not a block + if(list != NULL) + { + list.appendChild(dest.removeChild(child)); + child = list; + } + + child = child.getNextSibling(); + } + } + } +} + +void RtfParser::fixStyles(const DOM::Document doc) +{ + DOM::NodeList styles = doc.getElementsByTagName(kElStyle); + if(styles != NULL) + { + DOM::NodeList blocks = doc.getElementsByTagName(kElBlock); + if(blocks != NULL) + { + for(int i = 0; i < blocks->getLength(); i++) + { + DOM::Element block = (DOM::Element&)blocks->item(i); + + if(block == NULL || !block.hasAttribute(kElStyle)) + continue; + + for(int j = 0; j < styles->getLength(); j++) + { + DOM::Element style = (DOM::Element&)styles->item(j); + if(style != NULL) + { + if(style.getAttribute(kAtId) == block.getAttribute(kElStyle)) + { + wstring name = style.getAttribute(kAtName); + if(name.length() > 0) + block.setAttribute(kElStyle, name); + } + } + } + } + } + + for(int i = 0; i < styles->getLength(); i++) + { + DOM::Element style = (DOM::Element&)styles->item(i); + if(style != NULL) + style.removeAttribute(kAtId); + } + } + + +} + + +void RtfParser::breakTables(DOM::Document& doc) +{ + DOM::NodeList rows = doc.getElementsByTagName(kElRow); + if(rows != NULL) + { + for(int i = 0; i < rows->getLength(); i++) + { + DOM::Element row = (DOM::Element&)rows->item(i); + DOM::Node parent = row.getParentNode(); + + if(parent == NULL) + continue; + + if(isElement(parent, kElBlock)) + { + DOM::Node grandparent = parent.getParentNode(); + + if(grandparent != NULL && !row.hasChildNodes()) + { + if(row.getPreviousSibling() == NULL) + grandparent.insertBefore(parent.removeChild(row), parent); + else if(row.getNextSibling() == NULL) + insertAfter(grandparent, parent.removeChild(row), parent); + } + } + + breakElement(row, kElDest); + } + } + + + + // Now group stuff in destinations into tables + DOM::NodeList destinations = doc.getElementsByTagName(kElDest); + if(destinations != NULL) + { + for(int i = 0; i < destinations->getLength(); i++) + { + DOM::Element dest = (DOM::Element&)destinations->item(i); + + // Sanity Check + if(dest == NULL) + continue; + + // Go through the children of this destination + DOM::Node child = dest.getFirstChild(); + + DOM::Element table; + DOM::Element e; + + while(child != NULL) + { + // If it's a block and has a cell attribute + if(isElement(child, kElBlock)) + { + e = (DOM::Element&)child; + + // if it has a cell attribute + if(e.getAttribute(kAtCell).length() > 0) + { + e.removeAttribute(kAtCell); + + if(table == NULL) + { + table = doc.createElement(kElTable); + dest.insertBefore(table, child); + } + } + else + { + table = NULL; + } + } + + // It's not a block + if(table != NULL) + { + table.appendChild(dest.removeChild(child)); + child = table; + } + + child = child.getNextSibling(); + } + } + } +} + +void RtfParser::insertAfter(DOM::Node& parent, const DOM::Node& node, + const DOM::Node& ref) +{ + DOM::Node sibling = ref.getNextSibling(); + if(sibling == NULL) + parent.appendChild(node); + else + parent.insertBefore(node, sibling); +} + +void RtfParser::removeTags(const DOM::Document& doc) +{ + // Go through the list of nodes + for(const char** t = kRemoveTags; *t != NULL; t++) + { + DOM::NodeList elements = doc.getElementsByTagName(*t); + if(elements != NULL) + { + for(int j = 0; j < elements->getLength(); j++) + { + DOM::Element el = (DOM::Element&)elements->item(j); + DOM::Node parent = el->getParentNode(); + + if(parent == NULL) + continue; + + while(el.hasChildNodes()) + parent.insertBefore(el.removeChild(el.getFirstChild()), el); + + parent.removeChild(el); + } + } + } +} + +void RtfParser::fixLists(const DOM::Document doc) +{ + DOM::NodeList lists = doc.getElementsByTagName(kElList); + if(lists != NULL) + { + DOM::NodeList listdefs = doc.getElementsByTagName(kElListdef); + if(listdefs != NULL) + { + for(int i = 0; i < listdefs->getLength(); i++) + { + DOM::Element listdef = (DOM::Element&)listdefs->item(i); + + if(listdef == NULL || !listdef.hasAttribute(kAtList)) + continue; + + for(int j = 0; j < lists->getLength(); j++) + { + DOM::Element list = (DOM::Element&)lists->item(j); + if(list != NULL) + { + if(list.getAttribute(kAtList) == listdef.getAttribute(kAtList)) + { + copyAttributes(listdef, list, kHideList); + list.removeAttribute(kAtList); + } + } + } + } + } + } +} + +void RtfParser::fixBlocks(const DOM::Document doc) +{ + // First break out all the paragraphs to the destination level + DOM::NodeList blocks = doc.getElementsByTagName(kElBlock); + if(blocks != NULL) + { + string fix; + wstring val; + + for(int i = 0; i < blocks->getLength(); i++) + { + DOM::Element block = (DOM::Element&)blocks->item(i); + DOM::Node parent = block.getParentNode(); + + if(parent == NULL) + continue; + + fix.resize(0); + val.resize(0); + + val = block.getAttribute(kAtFix); + if(val.length() > 0) + block.removeAttribute(kAtFix); + + + if(val.length() > 0) + { + val = block.getAttributeNS("", kAtFix); + if(val.length() > 0) + block.removeAttributeNS("", kAtFix); + } + + if(val.length() > 0) + DOM::transcode16to8(val, fix); + + if(fix.length() == 0) + fix = kElPara; + + DOM::Element el = doc.createElement(fix); + copyAttributes(block, el, NULL); + + while(block.hasChildNodes()) + el.appendChild(block.removeChild(block.getFirstChild())); + + parent.replaceChild(el, block); + } + } +} + + +/** + * Removes adjacent duplicate nodes of certain names + */ +void RtfParser::removeDuplicates(const DOM::Document& doc) +{ + // Go through the list of nodes + for(const char** t = kNoDuplicates; *t = NULL; t++) + { + DOM::NodeList elements = doc.getElementsByTagName(*t); + if(elements != NULL) + { + int x = elements->getLength(); + for(int j = 0; j < elements->getLength(); j++) + { + + // Make sure it's a valid element + DOM::Element element = (DOM::Element&)elements->item(j); + if(element == NULL) + continue; + + // Get neighbors + DOM::Node previous = element.getPreviousSibling(); + DOM::Node next = element.getNextSibling(); + + // Make sure it's still in the document, as we may have + // removed it on a previous loop + DOM::Node parent = element.getParentNode(); + if(parent == NULL) + continue; + + // Combine previous if valid + if(previous != NULL && previous.getNodeType() == DOM::Node::ELEMENT_NODE && + isEqualElement((DOM::Element&)previous, element)) + { + while(previous.hasChildNodes()) + { + DOM::Node child = previous.removeChild(previous.getLastChild()); + if(child != NULL) + { + if(element.hasChildNodes()) + element.insertBefore(child, element.getFirstChild()); + else + element.appendChild(child); + } + } + + // Remove duplicate node + parent.removeChild(previous); + } + + // Combine next if valid + if(next != NULL && next.getNodeType() == DOM::Node::ELEMENT_NODE && + isEqualElement((DOM::Element&)next, element)) + { + while(next.hasChildNodes()) + { + DOM::Node child = next.removeChild(next.getFirstChild()); + if(child != NULL) + element.appendChild(child); + } + + // Remove duplicate node + parent.removeChild(next); + } + } + } + } +} \ No newline at end of file -- cgit v1.2.3