#include "usuals.h"
#include "rtfanalyser.h"
const char* kElDest = "i_dest";
const char* kElBlock = "i_block";
const char* kAtFix = "i_fix";
const char* kAtCell = "i_cell";
const char* kElListtable = "i_listtable";
const char* kElListdef = "i_listdef";
const char* kElPara = "para";
const char* kElDoc = "document";
const char* kElTab = "tab";
const char* kElSect = "sect";
const char* kElPage = "page";
const char* kElStyle = "style";
const char* kElLine = "line";
const char* kElList = "list";
const char* kElStylesheet = "stylesheet";
const char* kElInfo = "info";
const char* kElTitle = "title";
const char* kElAuthor = "author";
const char* kElOperator = "operator";
const char* kElB = "b";
const char* kElHide = "hide";
const char* kElI = "i";
const char* kElStrike = "strike";
const char* kElU = "u";
const char* kElColor = "color";
const char* kElCell = "cell";
const char* kElRow = "row";
const char* kElTable = "table";
const char* kAtList = "list";
const char* kAtName = "name";
const char* kAtBold = "bold";
const char* kAtHidden = "hide";
const char* kAtItalic = "italic";
const char* kAtStrike = "strike";
const char* kAtUnderline = "underline";
const char* kAtColor = "color";
const char* kAtType = "type";
const char* kAtOrdered = "ordered";
const char* kAtStart = "start";
const char* kAtId = "id";
const char* kAtIndex = "id";
const wchar_t* kValDisc = L"disc";
const wchar_t* kValLowerAlpha = L"lower-alpha";
const wchar_t* kValUpperAlpha = L"upper-alpha";
const wchar_t* kValLowerRoman = L"lower-roman";
const wchar_t* kValUpperRoman = L"upper-roman";
const wchar_t* kValArabic = L"arabic";
const wchar_t* kValNull = L"";
const wchar_t* kValList = L"list";
const wchar_t* kValPara = L"para";
const wchar_t* kValTable = L"table";
const char* kNoDuplicates[] =
{ kElB, kElU, kElI, kElColor, kElHide, kElColor, NULL };
const char* kRemoveTags[] =
{ kElDest, kElListdef, kElListtable, NULL };
const char* kBlockTags[] =
{ kElTable, kElPara, NULL };
const char* kHideList[] =
{ kAtId, kAtList, NULL };
const char* kNSPrefix = "xmlns";
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
RtfParser::RtfParser(const RtfParserOptions& options)
{
m_document = NULL;
memcpy(&m_options, &options, sizeof(options));
}
RtfParser::~RtfParser()
{
clear();
if(m_impl != NULL)
m_impl.release();
}
void RtfParser::clear()
{
if(m_document != NULL)
{
try
{
m_document.release();
}
catch(...) { }
m_document = NULL;
}
LevelHandler::clear();
}
void RtfParser::startDocument(RtfReader* reader)
{
LevelHandler::startDocument(reader);
// Create a new document
m_document = m_impl.createDocument("", kElDoc, DOM::DocumentType());
// TODO: Throw error if document is null
ASSERT(m_document != NULL);
ASSERT(m_curLevel != NULL);
m_curLevel->setElement(m_document.getDocumentElement(), true);
// Set the attributes on the top level
setAnalyser(AnalyserPtr(new Root));
setDestination(DestinationPtr(new Content));
getTextFormatting().resetPara();
getTextFormatting().resetText();
}
void RtfParser::endDocument()
{
LevelHandler::endDocument();
// Cleanup the tree
removeDuplicates(m_document);
breakTables(m_document);
breakTags(m_document, kElTable, kElRow);
breakTags(m_document, kElRow, kElCell);
wrapTags(m_document, kElCell, kElDest);
breakBlocks(m_document);
breakLists(m_document);
fixLists(m_document);
fixStyles(m_document);
fixBlocks(m_document);
removeTags(m_document);
breakBreak(m_document, kElDoc, kElPage);
breakBreak(m_document, kElDoc, kElSect);
return;
}
// -----------------------------------------------------------------------
// Helper functions
DOM::Element RtfParser::createElement(const string& name)
{
ASSERT(name.length() > 0);
return m_document.createElement(name);
// TODO: Throw exception here if necessary
}
void RtfParser::replaceElement(const DOM::Element& element)
{
ASSERT(m_curLevel != NULL);
m_curLevel->setElement(element, true);
}
void RtfParser::pushElement(const DOM::Element& element)
{
ASSERT(m_curLevel != NULL);
getElement().appendChild(element);
m_curLevel->setElement(element);
}
DOM::Element RtfParser::popElement()
{
DOM::Element element = getElement();
ASSERT(m_curLevel != NULL);
DOM::Node parent = element.getParentNode();
ASSERT(parent.getNodeType() == DOM::Node::ELEMENT_NODE);
// Set it deep so it replaces the current element
m_curLevel->setElement((DOM::Element&)parent, true);
return element;
}
void RtfParser::setAttribute(const string& name, const wstring& value, DOM::Element el)
{
ASSERT(name.length() > 0);
if(el == NULL)
el = getElement();
el.setAttribute(name, value);
}
void RtfParser::setDestination(DestinationPtr dest)
{
ASSERT(m_curLevel);
m_curLevel->setDestination(dest);
dest->m_parser = this;
dest->initialize();
}
DestinationPtr RtfParser::replaceDestination(DestinationPtr dest)
{
ASSERT(m_curLevel);
DestinationPtr old = m_curLevel->getDestination();
m_curLevel->setDestination(dest, true);
dest->m_parser = this;
dest->initialize();
return old;
}
void RtfParser::setAnalyser(AnalyserPtr analy)
{
ASSERT(m_curLevel);
ASSERT(analy != NULL);
analy->m_parser = this;
m_curLevel->setAnalyser(analy);
analy->initialize();
}
AnalyserPtr RtfParser::getAnalyser()
{
ASSERT(m_curLevel);
return m_curLevel->getAnalyser();
}
DestinationPtr RtfParser::getDestination()
{
ASSERT(m_curLevel);
return m_curLevel->getDestination();
}
RtfFormatting& RtfParser::getTextFormatting()
{
ASSERT(m_curLevel);
return m_curLevel->getFormatting();
}
// ---------------------------------------------------------------------------------
// Pass this stuff on through to the appropriate analysers etc...
void RtfParser::charData(wstring data)
{
ASSERT(m_curLevel != NULL);
DestinationPtr destination = m_curLevel->getDestination();
if(destination)
{
destination->charData(data);
}
else
{
destination = DestinationPtr(new Content);
setDestination(destination);
}
}
void RtfParser::controlWord(const string& cw, int flags, int param)
{
ASSERT(m_curLevel != NULL);
AnalyserPtr analyser = m_curLevel->getAnalyser();
if(analyser)
analyser->controlWord(cw, flags, param);
}
void RtfParser::groupStart()
{
LevelHandler::groupStart();
ASSERT(m_curLevel != NULL);
AnalyserPtr analyser = m_curLevel->getAnalyser();
if(analyser)
analyser->groupStart();
}
void RtfParser::groupEnd()
{
ASSERT(m_curLevel != NULL);
AnalyserPtr analyser = m_curLevel->getAnalyser();
if(analyser)
analyser->groupEnd();
LevelHandler::groupEnd();
}
#define ON_INITIALIZE(cls) \
void RtfParser::cls::initialize()
#define ON_CONTROLWORD(cls) \
void RtfParser::cls::controlWord(const string& cw, int flags, int param)
#define ON_CHARDATA(cls) \
void RtfParser::cls::charData(wstring data)
#define ON_GROUPSTART(cls) \
void RtfParser::cls::groupStart()
#define ON_GROUPEND(cls) \
void RtfParser::cls::groupEnd()
#define ON_DONE(cls) \
void RtfParser::cls::done()
#define AN_ELEMENT(name) \
m_parser->pushElement(m_parser->createElement(name))
#define AN_POP_ELEMENT() \
m_parser->popElement()
#define AN_ATTRIBUTE(name, value) \
m_parser->setAttribute(name, value)
#define AN_DESTINATION_ATTR(name) \
m_parser->setDestination(new Attribute(name))
#define AN_DESTINATION(cls) \
m_parser->setDestination(new cls)
#define AN_ANALYSER(cls) \
m_parser->setAnalyser(AnalyserPtr(new cls))
#define AN_SET_ANALYSER(cls) \
m_parser->setAnalyser(AnalyserPtr(cls))
#define HAS_PARAM (flags & kHasParam)
#define DEFAULT_CONTROLWORD processDefault(cw, flags, param)
#define DUMMY 1 == 1
#define NUM_ATTR(n) m_parser->formatInt(n)
bool RtfParser::ParseAnalyser::processDefault(const string& cw, int flags, int param)
{
if(cw == "upr")
{
AnalyserPtr analy = m_parser->getAnalyser();
ASSERT(analy != NULL);
AN_SET_ANALYSER(new Upr(analy));
return true;
}
return false;
}
void RtfParser::ParseAnalyser::applyParaFormatting(RtfFormatting* format,
DOM::Element& el)
{
if(format == NULL)
format = &(m_parser->getTextFormatting());
wstring fix = kValPara;
int list = format->paraList();
if(list != -1)
{
el.setAttribute(kAtList, NUM_ATTR(list));
}
else
{
el.removeAttribute(kAtList);
}
if(format->paraInTable())
el.setAttribute(kAtCell, L"1");
else
el.removeAttribute(kAtCell);
int style = format->paraStyle();
if(style != -1)
el.setAttribute(kElStyle, NUM_ATTR(style));
else
el.removeAttribute(kElStyle);
el.setAttribute(kAtFix, fix);
}
DOM::Element RtfParser::ParseAnalyser::getCurrentBlock()
{
DOM::Node node = m_parser->getElement();
if(node.hasChildNodes())
node = node.getLastChild();
return m_parser->getPriorElement(node, kElBlock);
}
bool RtfParser::ParseAnalyser::processTextContent(const string& cw, int flags, int param)
{
DOM::Element el;
bool process = false;
RtfFormatting& format = m_parser->getTextFormatting();
if(cw == "par")
{
el = getCurrentBlock();
if(el != NULL)
applyParaFormatting(&format, el);
el = m_parser->createElement(kElBlock);
applyParaFormatting(&format, el);
}
else if(cw == "intbl")
format.paraSetTable(true);
else if(cw == "cell")
{
el = getCurrentBlock();
if(el != NULL)
applyParaFormatting(&format, el);
el = m_parser->createElement(kElCell);
m_parser->pushElement(el);
m_parser->popElement();
el = m_parser->createElement(kElBlock);
applyParaFormatting(&format, el);
}
else if(cw == "trowd")
el = m_parser->createElement(kElRow);
else if(cw == "tab")
el = m_parser->createElement(kElTab);
else if(cw == "sect")
el = m_parser->createElement(kElSect);
else if(cw == "page")
el = m_parser->createElement(kElPage);
else if(cw == "s" && HAS_PARAM)
format.paraSetStyle(param);
else if(cw == "line")
el = m_parser->createElement(kElLine);
else if(cw == "header")
AN_ANALYSER(Skip);
else if(cw == "footer")
AN_ANALYSER(Skip);
else if(cw == "bkmkstart")
AN_ANALYSER(Skip);
else if(cw == "listtext")
AN_ANALYSER(Skip);
else if(cw == "ls" && HAS_PARAM)
format.paraSetList(param);
if(el != NULL)
{
// This ensures that our content destination is open and ready
DestinationPtr dest = m_parser->getDestination();
ASSERT(dest != NULL);
dest->charData(kValNull);
m_parser->pushElement(el);
m_parser->popElement();
}
return (el != NULL) || process;
/* TODO: cell, row, intbl, cellx, trowd*/
}
bool RtfParser::ParseAnalyser::processTextFormatting(const string& cw, int flags,
int param, RtfFormatting& format)
{
bool on = true;
if(flags & HAS_PARAM && param == 0)
on = false;
if(cw == "pard")
{
format.resetPara();
// applyParaFormatting();
}
else if(cw == "plain")
format.resetText();
else if(cw == "b")
format.textSetBold(on);
else if(cw == "i")
format.textSetItalic(on);
else if(cw == "v")
format.textSetHidden(on);
else if(cw == "ul")
format.textSetUnderline(on);
else if(cw == "cf" && HAS_PARAM)
format.textSetColor(param);
else
return false;
return true;
}
bool RtfParser::ParseAnalyser::processTextFormatting(const string& cw, int flags, int param)
{
return processTextFormatting(cw, flags, param, m_parser->getTextFormatting());
}
ON_INITIALIZE(Skip)
{ AN_DESTINATION(Null); }
ON_GROUPSTART(Skip)
{ AN_ANALYSER(Skip); }
RtfParser::Upr::Upr(AnalyserPtr prv)
{
ASSERT(prv);
prev = prv;
}
ON_GROUPSTART(Upr)
{ AN_ANALYSER(Skip); }
ON_GROUPEND(Upr)
{
ASSERT(prev);
m_parser->setAnalyser(prev);
prev = NULL;
}
ON_INITIALIZE(Stylesheet)
{
AN_ELEMENT(kElStylesheet);
}
ON_GROUPSTART(Stylesheet)
{
AN_ANALYSER(Style);
AN_DESTINATION(Null);
}
ON_INITIALIZE(Style)
{
// Were not sure if this element is really something
// so we can't always create
haveStyle = false;
}
ON_CONTROLWORD(Style)
{
// Get the style id
if(flags & kAsterisk)
{
AN_ANALYSER(Skip);
return;
}
if(!haveStyle)
{
AN_ELEMENT(kElStyle);
AN_DESTINATION_ATTR(kAtName);
haveStyle = true;
}
if(cw == "s" && flags & kHasParam)
{
AN_ATTRIBUTE(kAtId, NUM_ATTR(param));
}
// Otherwise get as much formatting out of the tag as possible
else if(processTextFormatting(cw, flags, param))
DUMMY;
else
DEFAULT_CONTROLWORD;
}
ON_GROUPSTART(Style)
{
AN_ANALYSER(Skip);
}
ON_GROUPEND(Style)
{
RtfFormatting& props = m_parser->getTextFormatting();
if(props.textIsBold())
AN_ATTRIBUTE(kAtBold, L"1");
if(props.textIsHidden())
AN_ATTRIBUTE(kAtHidden, L"1");
if(props.textIsItalic())
AN_ATTRIBUTE(kAtItalic, L"1");
if(props.textIsStrike())
AN_ATTRIBUTE(kAtStrike, L"1");
if(props.textIsUnderline())
AN_ATTRIBUTE(kAtUnderline, L"1");
if(props.textColor() != -1 && m_parser->getOptions().doColors)
AN_ATTRIBUTE(kAtColor, NUM_ATTR(props.textColor()));
}
ON_INITIALIZE(ListTable)
{
AN_ELEMENT(kElListtable);
}
ON_GROUPSTART(ListTable)
{
AN_ANALYSER(List);
AN_DESTINATION(Null);
}
ON_INITIALIZE(List)
{
AN_ELEMENT(kElListdef);
AN_ATTRIBUTE(kAtType, kValDisc);
AN_ATTRIBUTE(kAtOrdered, L"0");
levelsSeen = 0;
}
ON_CONTROLWORD(List)
{
if(cw == "listname")
AN_DESTINATION_ATTR(kAtName);
else if(cw == "listid" && HAS_PARAM)
AN_ATTRIBUTE(kAtId, NUM_ATTR(param));
// We let listlevel in here too
else if(cw == "levelstartat" && HAS_PARAM)
AN_ATTRIBUTE(kAtStart, NUM_ATTR(param));
else if(cw == "levelnfc" && HAS_PARAM)
{
switch(param)
{
case 0: // 1, 2, 3
case 5: // 1st, 2nd, 3rd
case 6: // One, Two, Three
case 7: // First, Second, Third
case 22: // 01, 02, 03
AN_ATTRIBUTE(kAtType, kValArabic);
break;
case 1: // I, II, III
AN_ATTRIBUTE(kAtType, kValUpperRoman);
break;
case 2: // i, ii, iii
AN_ATTRIBUTE(kAtType, kValLowerRoman);
break;
case 3: // A, B, C
AN_ATTRIBUTE(kAtType, kValUpperAlpha);
break;
case 4: // a, b, c
AN_ATTRIBUTE(kAtType, kValLowerAlpha);
break;
default:
AN_ATTRIBUTE(kAtType, kValDisc);
break;
}
switch(param)
{
case 0: case 5: case 6: case 7: case 22:
case 1: case 2: case 3: case 4:
AN_ATTRIBUTE(kAtOrdered, L"1");
break;
default:
AN_ATTRIBUTE(kAtOrdered, L"0");
}
}
else
DEFAULT_CONTROLWORD;
}
ON_GROUPSTART(List)
{
if(levelsSeen > 0)
AN_ANALYSER(Skip);
levelsSeen++;
}
ON_INITIALIZE(ListOverrideTable)
{
DOM::Document document = m_parser->getDocument();
lists = document.getElementsByTagName(kElListdef);
curList = NULL;
lsId = -1;
}
ON_GROUPSTART(ListOverrideTable)
{
AN_DESTINATION(Null);
}
ON_CONTROLWORD(ListOverrideTable)
{
// New list override clear
if(cw == "listoverride")
curList = NULL;
// List id for current listoverride
else if(cw == "listid" && HAS_PARAM)
{
wstring id = NUM_ATTR(param);
if(lists != NULL)
{
for(int i = 0; i < lists->getLength(); i++)
{
DOM::Node node = lists->item(i);
if(node != NULL && node.getNodeType() == DOM::Node::ELEMENT_NODE)
{
DOM::Element element = (DOM::Element&)node;
if(element.getAttribute(kAtId) == id)
{
curList = element;
break;
}
}
}
}
}
// The actual list code
else if(cw == "ls" && HAS_PARAM)
lsId = param;
// Override the starting level for the node
else if(cw == "levelstartat" && HAS_PARAM)
{
if(curList != NULL)
curList.setAttribute(kAtStart, NUM_ATTR(param));
}
else
DEFAULT_CONTROLWORD;
// Okay before any overrides take effect we need to duplicate
// the list node for overriding, using the 'listid' and 'ls' we gathered
if(curList != NULL && lsId != -1)
{
DOM::Element parent = (DOM::Element&)curList.getParentNode();
if(parent != NULL)
{
curList = (DOM::Element&)curList.cloneNode(true);
if(curList != NULL)
{
parent.appendChild(curList);
curList.setAttribute(kAtList, NUM_ATTR(lsId));
}
}
lsId = -1;
}
}
ON_GROUPEND(ListOverrideTable)
{
}
ON_INITIALIZE(Info)
{
// Create a new element
AN_ELEMENT(kElInfo);
AN_DESTINATION(Null);
}
ON_CONTROLWORD(Info)
{
// The title
if(cw == "title")
{
AN_ELEMENT(kElTitle);
AN_DESTINATION(Raw);
}
else if(cw == "author")
{
AN_ELEMENT(kElAuthor);
AN_DESTINATION(Raw);
}
else if(cw == "operator")
{
AN_ELEMENT(kElOperator);
AN_DESTINATION(Raw);
}
else if(flags & kAsterisk)
AN_ANALYSER(Skip);
else
DEFAULT_CONTROLWORD;
}
ON_INITIALIZE(Root)
{
}
ON_CONTROLWORD(Root)
{
if(cw == "stylesheet")
AN_ANALYSER(Stylesheet);
else if(cw == "listtable")
AN_ANALYSER(ListTable);
else if(cw == "listoverridetable")
AN_ANALYSER(ListOverrideTable);
else if(cw == "info")
AN_ANALYSER(Info);
else if(cw == "fonttbl")
AN_ANALYSER(Skip);
else if(cw == "colortbl")
AN_ANALYSER(Skip);
else if(cw == "pict")
{
AN_ANALYSER(Skip);
AN_DESTINATION(Null);
}
else if(flags & kAsterisk)
AN_ANALYSER(Skip);
else if(processTextContent(cw, flags, param))
DUMMY;
else if(processTextFormatting(cw, flags, param))
DUMMY;
else
DEFAULT_CONTROLWORD;
}
ON_INITIALIZE(Content)
{
parent = m_parser->getElement();
created = false;
}
ON_CHARDATA(Content)
{
// Create the first time we get content
if(!created)
{
DOM::Element dest = m_parser->createElement(kElDest);
parent.appendChild(dest);
m_parser->replaceElement(dest);
DOM::Element el = m_parser->createElement(kElBlock);
m_parser->pushElement(el);
m_parser->popElement();
created = true;
}
if(data.length() == 0)
return;
int elements = 0;
RtfFormatting& format = m_parser->getTextFormatting();
// Now do text Properties if necessary
if(format.textIsBold())
{
AN_ELEMENT(kElB);
elements++;
}
if(format.textIsHidden())
{
AN_ELEMENT(kElHide);
elements++;
}
if(format.textIsItalic())
{
AN_ELEMENT(kElI);
elements++;
}
if(format.textIsStrike())
{
AN_ELEMENT(kElStrike);
elements++;
}
if(format.textIsUnderline())
{
AN_ELEMENT(kElU);
elements++;
}
if(format.textColor() != -1 && m_parser->getOptions().doColors)
{
AN_ELEMENT(kElColor);
AN_ATTRIBUTE(kAtIndex, NUM_ATTR(format.textColor()));
elements++;
}
// Write the data to the element
m_parser->getElement().appendChild(
m_parser->getDocument().createTextNode(data));
// Now drop out of all the above formatting
while(elements-- > 0)
AN_POP_ELEMENT();
}
#if 0
ON_INITIALIZE(Table)
{
stack = 0;
level = m_parser->getLevel();
AN_ELEMENT(kElTable);
AN_DESTINATION(Content);
}
ON_CONTROLWORD(Table)
{
ASSERT(stack >= 0);
ASSERT(level != NULL);
if(cw == "trowd")
{
stack++;
}
else if(cw == "row")
{
stack--;
if(stack <= 0)
m_parser->rewindLevel(level);
}
else if(processTextContent(cw, flags, param))
DUMMY;
else if(processTextFormatting(cw, flags, param))
DUMMY;
else
DEFAULT_CONTROLWORD;
if(!m_parser->getTextFormatting().paraInTable())
{
m_parser->rewindLevel(level);
}
}
#endif
ON_CHARDATA(Raw)
{
// Write the data to the element
m_parser->getElement().appendChild(
m_parser->getDocument().createTextNode(data));
}
ON_INITIALIZE(Attribute)
{
element = m_parser->getElement();
ASSERT(element != NULL);
}
ON_CHARDATA(Attribute)
{
// Get the current value
wstring cur = element.getAttribute(name);
if(data.at(data.size() - 1) == L';')
data.resize(data.size() - 1);
// Append data
cur.append(data);
// Write it back
element.setAttribute(name, cur);
}
/**
* A quick check to see if a node is an element of a certain
* name
*/
bool RtfParser::isElement(const DOM::Node& node, const string& name)
{
return node != NULL && node.getNodeType() == DOM::Node::ELEMENT_NODE &&
node.getNodeName() == name;
}
bool RtfParser::isEqualElement(const DOM::Element& el1, const DOM::Element& el2)
{
if(el1.getNodeName() == el2.getNodeName())
return false;
DOM::NamedNodeMap at1 = el1.getAttributes();
DOM::NamedNodeMap at2 = el2.getAttributes();
if(at1 == NULL && at2 == NULL)
return true;
if(at1 == NULL || at2 == NULL ||
at1->getLength() != at2->getLength())
return false;
for(int i = 0; i < at1->getLength(); i++)
{
DOM::Attr attr1 = (DOM::Attr&)at1->item(0);
if(attr1 != NULL)
return false;
DOM::Attr attr2 = (DOM::Attr&)at2->getNamedItem(attr1.getNodeName());
if(attr2 != NULL)
return false;
if(attr1.getNodeValue() == attr2.getNodeValue())
return false;
}
return true;
}
wstring RtfParser::formatInt(int num)
{
wchar_t buff[12];
swprintf(buff, L"%d", num);
wstring n(buff);
return n;
}
/**
* Gets the pertinent ancestor of this node, or returns null
* if not found.
*/
DOM::Element RtfParser::getContainingElement(const DOM::Node& node, const string& name)
{
DOM::Node n = node;
while(true)
{
n = n.getParentNode();
if(n == NULL)
break;
if(isElement(n, name))
return (DOM::Element&)n;
}
return DOM::Element();
}
DOM::Element RtfParser::getPriorElement(const DOM::Node& node, const string& name)
{
DOM::Node n = node;
while(n != NULL)
{
if(isElement(n, name))
return (DOM::Element&)n;
n = n.getPreviousSibling();
}
DOM::Node parent = node.getParentNode();
if(parent == NULL)
return DOM::Element();
else
return getPriorElement(parent, name);
}
bool isNsAttr(const string& name)
{
return strncmp(name.c_str(), kNSPrefix, strlen(kNSPrefix)) ? false : true;
}
void RtfParser::copyAttributes(const DOM::Element& src, DOM::Element& dest,
const char** hideList)
{
// Now get both sets of attributes
DOM::NamedNodeMap srcMap = src.getAttributes();
DOM::NamedNodeMap destMap = dest.getAttributes();
if(srcMap == NULL || destMap == NULL)
return;
// And copy them from one to the other
for(int j = 0; j < srcMap->getLength(); j++)
{
DOM::Node attr = srcMap->item(j);
if(attr != NULL)
{
// BUG: Sablotron seems to have a bug in it's
// setAttributeNode implementation. It always
// adds a blank namespace
// attr = attr.cloneNode(false);
// if(attr != NULL)
// destMap.setNamedItem(attr);
string name = attr.getNodeName();
if(hideList)
{
for(const char** t = hideList; *t != NULL; t++)
{
if(name == *t)
name.erase();
}
}
if(name.length() > 0 && !isNsAttr(name))
dest.setAttribute(attr.getNodeName(), attr.getNodeValue());
}
}
}
void RtfParser::breakBreak(DOM::Document& doc, const string& contain,
const string& tag)
{
DOM::NodeList els = doc.getElementsByTagName(tag);
if(els != NULL)
{
for(int i = 0; i < els->getLength(); i++)
{
DOM::Element el = (DOM::Element&)els->item(i);
#if 0
// See if parent node only has this break tag
// in it. If so then replace parent with this
DOM::Node parent = el.getParentNode();
if(parent != NULL)
{
DOM::Node grandparent = parent.getParentNode();
if(grandparent != NULL &&
el.getPreviousSibling() == NULL &&
el.getNextSibling() == NULL)
{
grandparent.replaceChild(parent.removeChild(el), parent);
}
}
#endif
breakElement(el, contain);
}
}
}
/**
* Breaks a paragraph up through a previous level. Calls itself
* recursively to break paragraphs totally free up to containing
* destination.
*
* For example:
*
*
* This is a
* test of your concentration.
*
*
* Becomes:
*
*
* This is a
* test of your concentration.
*
*/
bool RtfParser::breakElement(DOM::Element& el, const string& contain)
{
ASSERT(el != NULL);
DOM::Element parent = (DOM::Element&)el.getParentNode();
DOM::Element grandparent;
string s = el.getNodeName();
s = parent.getNodeName();
// Get the parent node
if(parent != NULL)
grandparent = (DOM::Element&)parent.getParentNode();
// Make sure we have something to work with before continuing
if(grandparent == NULL || parent == NULL ||
isElement(parent, contain))
return true;
DOM::Node e;
// Check to see if this is the first node in the parent.
// If so then just move out to before
if(el.getPreviousSibling() == NULL)
{
e = grandparent.insertBefore(parent.removeChild(el), parent);
}
// Check to see if this is the last node in the parent.
// If so then just move out to after the parent
else if(el.getNextSibling() == NULL)
{
DOM::Node next = parent.getNextSibling();
if(next == NULL)
e = grandparent.appendChild(parent.removeChild(el));
else
e = grandparent.insertBefore(parent.removeChild(el), next);
}
// Otherwise it's in the middle so split the parent
// element etc...
else
{
// Clone it but not deep
DOM::Element parent2 = (DOM::Element&)parent.cloneNode(false);
if(parent2 == NULL)
return false;
// Flag that tells us whether we moved anything up to parent
bool moved = false;
// Now move all nodes after this one to the second parent.
while((e = el.getNextSibling()) != NULL)
{
parent2.appendChild(parent.removeChild(e));
moved = true;
}
// Remove the element from it's parent
e = parent.removeChild(el);
// Okay now we move the paragraph up to the parent
insertAfter(grandparent, e, parent);
if(moved)
insertAfter(grandparent, parent2, e);
}
// Now call it again with the paragraph in the new position
// untill everything's cut through!
return breakElement((DOM::Element&)e, contain);
}
/**
* Changes from a marker based paragraph system to a contained
* paragraph system. Also applies paragraph attributes to the
* appropriate paragraph.
*
* For example:
*
*
* This is a
* test of your concentration.
*
*
* Becomes:
*
* This is a
* test of your concentration.
*/
void RtfParser::breakBlocks(DOM::Document& document)
{
// First break out all the paragraphs to the destination level
DOM::NodeList blocks = document.getElementsByTagName(kElBlock);
if(blocks != NULL)
{
for(int i = 0; i < blocks->getLength(); i++)
{
DOM::Element block = (DOM::Element&)blocks->item(i);
// If it's the single closed style para then break it
if(block != NULL && !block.hasChildNodes())
breakElement(block, kElDest);
}
}
// Now group stuff in destinations into paras or other blocks
DOM::NodeList destinations = document.getElementsByTagName(kElDest);
if(destinations != NULL)
{
for(int i = 0; i < destinations->getLength(); i++)
{
DOM::Element dest = (DOM::Element&)destinations->item(i);
// Sanity Check
if(dest == NULL || !dest.hasChildNodes())
continue;
// Go through the children of this destination
DOM::Node child = dest.getFirstChild();
DOM::Element block;
while(child != NULL)
{
// If it's a block
if(isElement(child, kElBlock))
{
block = (DOM::Element&)child;
child = child.getNextSibling();
continue;
}
// If it's already a real block element
for(const char** t = kBlockTags; *t != NULL; t++)
{
if(isElement(child, *t))
{
block = NULL;
break;
}
}
// If there's a block then add to it
if(block != NULL)
{
block.appendChild(dest.removeChild(child));
child = block;
}
child = child.getNextSibling();
}
}
}
}
void RtfParser::wrapTags(DOM::Document& doc, const string& tagName,
const string& wrapName)
{
DOM::NodeList tags = doc.getElementsByTagName(tagName);
if(tags != NULL)
{
for(int i = 0; i < tags->getLength(); i++)
{
DOM::Element tag = (DOM::Element&)tags->item(i);
DOM::Element wrap = doc.createElement(wrapName);
while(tag.hasChildNodes())
wrap.appendChild(tag.removeChild(tag.getFirstChild()));
tag.appendChild(wrap);
}
}
}
void RtfParser::breakTags(DOM::Document& doc, const string& parentName,
const string& tagName)
{
DOM::NodeList parents = doc.getElementsByTagName(parentName);
if(parents != NULL)
{
for(int i = 0; i < parents->getLength(); i++)
{
DOM::Element parent = (DOM::Element&)parents->item(i);
if(!parent.hasChildNodes())
continue;
DOM::NodeList tags = parent.getElementsByTagName(tagName);
if(tags != NULL)
{
for(int i = 0; i < tags->getLength(); i++)
breakElement((DOM::Element&)tags->item(i), parentName);
}
DOM::Node tag = doc.createElement(tagName);
parent.insertBefore(tag, parent.getFirstChild());
DOM::Node child = tag;
while(child != NULL && (child = child.getNextSibling()) != NULL)
{
if(isElement(child, kElBlock))
{
DOM::Node next = child.getNextSibling();
if(next == NULL)
{
parent.removeChild(child);
continue;
}
if(isElement(next, tagName))
{
DOM::Node twodown = next.getNextSibling();
if(!isElement(twodown, kElBlock))
{
child = parent.insertBefore(parent.removeChild(next), child);
}
else
{
parent.removeChild(child);
child = next;
}
}
}
if(isElement(child, tagName))
{
if(!tag.hasChildNodes())
parent.removeChild(tag);
tag = child;
}
else
{
tag.appendChild(parent.removeChild(child));
child = tag;
}
}
if(!tag.hasChildNodes())
parent.removeChild(tag);
}
}
DOM::NodeList tags = doc.getElementsByTagName(tagName);
if(tags != NULL)
{
for(int i = 0; i < tags->getLength(); i++)
{
DOM::Element tag = (DOM::Element&)tags->item(i);
DOM::Node parent = tag.getParentNode();
if(parent != NULL && !isElement(parent, parentName))
parent.removeChild(tag);
#if 0
else if(tag.hasChildNodes())
{
DOM::NodeList children = tag.getChildNodes();
if(children != NULL && children->getLength() == 1)
{
DOM::Node child = children->item(0);
if(child != NULL && !child.hasChildNodes() &&
isElement(child, kElBlock))
parent.removeChild(tag);
}
}
#endif
}
}
}
void RtfParser::breakLists(DOM::Document& doc)
{
// Now group stuff in destinations into tables
DOM::NodeList destinations = doc.getElementsByTagName(kElDest);
if(destinations != NULL)
{
for(int i = 0; i < destinations->getLength(); i++)
{
DOM::Element dest = (DOM::Element&)destinations->item(i);
// Sanity Check
if(dest == NULL)
continue;
// Go through the children of this destination
DOM::Node child = dest.getFirstChild();
DOM::Element list;
DOM::Element e;
wstring previd;
while(child != NULL)
{
// If it's a block and has a cell attribute
if(isElement(child, kElBlock))
{
e = (DOM::Element&)child;
// if it has a cell attribute
wstring listid = e.getAttribute(kAtList);
if(listid.length() > 0)
{
e.removeAttribute(kAtList);
if(list == NULL || previd != listid)
{
list = doc.createElement(kElList);
list.setAttribute(kAtList, listid);
dest.insertBefore(list, child);
previd = listid;
}
}
else
{
list = NULL;
previd.erase();
}
}
// It's not a block
if(list != NULL)
{
list.appendChild(dest.removeChild(child));
child = list;
}
child = child.getNextSibling();
}
}
}
}
void RtfParser::fixStyles(const DOM::Document doc)
{
DOM::NodeList styles = doc.getElementsByTagName(kElStyle);
if(styles != NULL)
{
DOM::NodeList blocks = doc.getElementsByTagName(kElBlock);
if(blocks != NULL)
{
for(int i = 0; i < blocks->getLength(); i++)
{
DOM::Element block = (DOM::Element&)blocks->item(i);
if(block == NULL || !block.hasAttribute(kElStyle))
continue;
for(int j = 0; j < styles->getLength(); j++)
{
DOM::Element style = (DOM::Element&)styles->item(j);
if(style != NULL)
{
if(style.getAttribute(kAtId) == block.getAttribute(kElStyle))
{
wstring name = style.getAttribute(kAtName);
if(name.length() > 0)
block.setAttribute(kElStyle, name);
}
}
}
}
}
for(int i = 0; i < styles->getLength(); i++)
{
DOM::Element style = (DOM::Element&)styles->item(i);
if(style != NULL)
style.removeAttribute(kAtId);
}
}
}
void RtfParser::breakTables(DOM::Document& doc)
{
DOM::NodeList rows = doc.getElementsByTagName(kElRow);
if(rows != NULL)
{
for(int i = 0; i < rows->getLength(); i++)
{
DOM::Element row = (DOM::Element&)rows->item(i);
DOM::Node parent = row.getParentNode();
if(parent == NULL)
continue;
if(isElement(parent, kElBlock))
{
DOM::Node grandparent = parent.getParentNode();
if(grandparent != NULL && !row.hasChildNodes())
{
if(row.getPreviousSibling() == NULL)
grandparent.insertBefore(parent.removeChild(row), parent);
else if(row.getNextSibling() == NULL)
insertAfter(grandparent, parent.removeChild(row), parent);
}
}
breakElement(row, kElDest);
}
}
// Now group stuff in destinations into tables
DOM::NodeList destinations = doc.getElementsByTagName(kElDest);
if(destinations != NULL)
{
for(int i = 0; i < destinations->getLength(); i++)
{
DOM::Element dest = (DOM::Element&)destinations->item(i);
// Sanity Check
if(dest == NULL)
continue;
// Go through the children of this destination
DOM::Node child = dest.getFirstChild();
DOM::Element table;
DOM::Element e;
while(child != NULL)
{
// If it's a block and has a cell attribute
if(isElement(child, kElBlock))
{
e = (DOM::Element&)child;
// if it has a cell attribute
if(e.getAttribute(kAtCell).length() > 0)
{
e.removeAttribute(kAtCell);
if(table == NULL)
{
table = doc.createElement(kElTable);
dest.insertBefore(table, child);
}
}
else
{
table = NULL;
}
}
// It's not a block
if(table != NULL)
{
table.appendChild(dest.removeChild(child));
child = table;
}
child = child.getNextSibling();
}
}
}
}
void RtfParser::insertAfter(DOM::Node& parent, const DOM::Node& node,
const DOM::Node& ref)
{
DOM::Node sibling = ref.getNextSibling();
if(sibling == NULL)
parent.appendChild(node);
else
parent.insertBefore(node, sibling);
}
void RtfParser::removeTags(const DOM::Document& doc)
{
// Go through the list of nodes
for(const char** t = kRemoveTags; *t != NULL; t++)
{
DOM::NodeList elements = doc.getElementsByTagName(*t);
if(elements != NULL)
{
for(int j = 0; j < elements->getLength(); j++)
{
DOM::Element el = (DOM::Element&)elements->item(j);
DOM::Node parent = el->getParentNode();
if(parent == NULL)
continue;
while(el.hasChildNodes())
parent.insertBefore(el.removeChild(el.getFirstChild()), el);
parent.removeChild(el);
}
}
}
}
void RtfParser::fixLists(const DOM::Document doc)
{
DOM::NodeList lists = doc.getElementsByTagName(kElList);
if(lists != NULL)
{
DOM::NodeList listdefs = doc.getElementsByTagName(kElListdef);
if(listdefs != NULL)
{
for(int i = 0; i < listdefs->getLength(); i++)
{
DOM::Element listdef = (DOM::Element&)listdefs->item(i);
if(listdef == NULL || !listdef.hasAttribute(kAtList))
continue;
for(int j = 0; j < lists->getLength(); j++)
{
DOM::Element list = (DOM::Element&)lists->item(j);
if(list != NULL)
{
if(list.getAttribute(kAtList) == listdef.getAttribute(kAtList))
{
copyAttributes(listdef, list, kHideList);
list.removeAttribute(kAtList);
}
}
}
}
}
}
}
void RtfParser::fixBlocks(const DOM::Document doc)
{
// First break out all the paragraphs to the destination level
DOM::NodeList blocks = doc.getElementsByTagName(kElBlock);
if(blocks != NULL)
{
string fix;
wstring val;
for(int i = 0; i < blocks->getLength(); i++)
{
DOM::Element block = (DOM::Element&)blocks->item(i);
DOM::Node parent = block.getParentNode();
if(parent == NULL)
continue;
fix.resize(0);
val.resize(0);
val = block.getAttribute(kAtFix);
if(val.length() > 0)
block.removeAttribute(kAtFix);
if(val.length() > 0)
{
val = block.getAttributeNS("", kAtFix);
if(val.length() > 0)
block.removeAttributeNS("", kAtFix);
}
if(val.length() > 0)
DOM::transcode16to8(val, fix);
if(fix.length() == 0)
fix = kElPara;
DOM::Element el = doc.createElement(fix);
copyAttributes(block, el, NULL);
while(block.hasChildNodes())
el.appendChild(block.removeChild(block.getFirstChild()));
parent.replaceChild(el, block);
}
}
}
/**
* Removes adjacent duplicate nodes of certain names
*/
void RtfParser::removeDuplicates(const DOM::Document& doc)
{
// Go through the list of nodes
for(const char** t = kNoDuplicates; *t = NULL; t++)
{
DOM::NodeList elements = doc.getElementsByTagName(*t);
if(elements != NULL)
{
int x = elements->getLength();
for(int j = 0; j < elements->getLength(); j++)
{
// Make sure it's a valid element
DOM::Element element = (DOM::Element&)elements->item(j);
if(element == NULL)
continue;
// Get neighbors
DOM::Node previous = element.getPreviousSibling();
DOM::Node next = element.getNextSibling();
// Make sure it's still in the document, as we may have
// removed it on a previous loop
DOM::Node parent = element.getParentNode();
if(parent == NULL)
continue;
// Combine previous if valid
if(previous != NULL && previous.getNodeType() == DOM::Node::ELEMENT_NODE &&
isEqualElement((DOM::Element&)previous, element))
{
while(previous.hasChildNodes())
{
DOM::Node child = previous.removeChild(previous.getLastChild());
if(child != NULL)
{
if(element.hasChildNodes())
element.insertBefore(child, element.getFirstChild());
else
element.appendChild(child);
}
}
// Remove duplicate node
parent.removeChild(previous);
}
// Combine next if valid
if(next != NULL && next.getNodeType() == DOM::Node::ELEMENT_NODE &&
isEqualElement((DOM::Element&)next, element))
{
while(next.hasChildNodes())
{
DOM::Node child = next.removeChild(next.getFirstChild());
if(child != NULL)
element.appendChild(child);
}
// Remove duplicate node
parent.removeChild(next);
}
}
}
}
}