summaryrefslogtreecommitdiff
path: root/src/rtfparser.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/rtfparser.cpp')
-rw-r--r--src/rtfparser.cpp294
1 files changed, 151 insertions, 143 deletions
diff --git a/src/rtfparser.cpp b/src/rtfparser.cpp
index 6d07c80..78945b6 100644
--- a/src/rtfparser.cpp
+++ b/src/rtfparser.cpp
@@ -36,6 +36,8 @@
*
*/
+// RENAME RTFParser.cpp
+
#include "usuals.h"
#include <stdlib.h>
@@ -43,11 +45,11 @@
#include "rtfreader.h"
-const int RtfHandler::kAsterisk = 0x00000001;
-const int RtfHandler::kHasParam = 0x00000002;
-const int RtfHandler::kIsEncoded = 0x00000004;
+/* ----------------------------------------------------------------------------------
+ * CONSTRUCTION
+ */
-RtfReader::RtfReader()
+RTFParser::RTFParser()
{
m_handler = NULL;
m_depth = 0;
@@ -57,12 +59,17 @@ RtfReader::RtfReader()
m_uniEatStack.push(0);
}
-RtfReader::~RtfReader()
+RTFParser::~RTFParser()
{
}
-bool RtfReader::parse(string fileName)
+
+/* ----------------------------------------------------------------------------------
+ * PUBLIC METHODS
+ */
+
+bool RTFParser::parse(string fileName)
{
FILE* file = fopen(fileName.c_str(), "r");
if(!file)
@@ -75,26 +82,134 @@ bool RtfReader::parse(string fileName)
return ret;
}
-void RtfReader::emptyData(RtfContext& cx)
+bool RTFParser::parse(FILE* file)
+{
+ int ch = 0;
+
+ // The group depth
+ m_depth = 0;
+ m_parseErrors = "";
+
+ RtfContext cx;
+ cx.isData = false;
+ cx.file = file;
+ cx.data = L"";
+
+ if(m_handler)
+ m_handler->startDocument(this);
+
+ while(1)
+ {
+ ch = fgetc(file);
+ if(ch == EOF)
+ goto done;
+
+ // TODO: Do we need this ?
+ if(!cx.isData)
+ {
+ switch(ch)
+ {
+
+ // Starting a control word
+ case '\\':
+ if(!parseControlWord(cx))
+ goto done;
+ break;
+
+ // Starting an RTF group
+ case '{':
+ {
+ // Send all previous data
+ flushData(cx);
+
+ // Handle any unicode destinations properly
+ m_uniEatStack.push(m_uniEatStack.top());
+
+ if(m_handler)
+ m_handler->groupStart();
+
+ m_depth++;
+ }
+ break;
+
+ case '}':
+ {
+ // Send all previous data
+ flushData(cx);
+
+ if(m_handler)
+ m_handler->groupEnd();
+
+ // Handle any unicode destinations properly
+ if(!m_uniEatStack.empty())
+ m_uniEatStack.pop();
+
+ m_depth--;
+ }
+ break;
+
+ default:
+ cx.isData = true;
+ break;
+ }
+ }
+
+ if(cx.isData)
+ {
+ // We translate tabs into the appropriate control word
+ if(ch == '\t')
+ sendControlWord(cx, "tab", 0, -1);
+
+ // line endings aren't used
+ else if(!strchr("\r\n", ch))
+ sendData(cx, ch);
+
+ cx.isData = false;
+ }
+ }
+
+
+done:
+
+ if(m_depth != 0)
+ m_parseErrors.append("unmatched braces\n");
+
+ if(m_handler)
+ m_handler->endDocument();
+
+ // If any parse errors return failure
+ return m_parseErrors.empty();
+}
+
+
+/* ----------------------------------------------------------------------------------
+ * HANDLER CALLS
+ */
+
+void RTFParser::flushData(RtfContext& cx)
{
if(!cx.data.empty())
{
if(m_handler)
m_handler->charData(cx.data);
+
cx.data.resize(0);
}
}
-void RtfReader::sendData(RtfContext& cx, wchar_t ch)
+void RTFParser::sendData(RtfContext& cx, wchar_t ch)
{
+ // Skip unicode chars we've been asked to
if(m_uniEat > 0)
m_uniEat--;
+
else
cx.data.append(1, ch);
}
-void RtfReader::sendData(RtfContext& cx, wstring data)
+void RTFParser::sendData(RtfContext& cx, wstring data)
{
+ // Skip any unicode chars we've been asked to
if(m_uniEat > 0)
{
int len = data.size();
@@ -110,16 +225,24 @@ void RtfReader::sendData(RtfContext& cx, wstring data)
}
}
-void RtfReader::sendControlWord(RtfContext& cx, string cw, int flags, int param)
+void RTFParser::sendControlWord(RtfContext& cx, string cw, int flags, int param)
{
- emptyData(cx);
+ flushData(cx);
+
if(m_handler)
m_handler->controlWord(cw, flags, param);
}
-bool RtfReader::parseHexChar(RtfContext& cx, int num)
+
+/* ----------------------------------------------------------------------------------
+ * PARSE HELPERS
+ */
+
+bool RTFParser::parseHexChar(RtfContext& cx, int num)
{
string data;
+
+ // Get num chars and put them in the string
for(int i = 0; i < num; i++)
{
char ch = fgetc(cx.file);
@@ -139,6 +262,7 @@ bool RtfReader::parseHexChar(RtfContext& cx, int num)
}
}
+ // If parsing hex, then convert to appropriate unicode
if(m_parseHex)
{
char* end = NULL;
@@ -148,6 +272,9 @@ bool RtfReader::parseHexChar(RtfContext& cx, int num)
else
m_parseErrors.append("invalid hex char: " + data + "\n");
}
+
+ // TODO: Why would we ever want to do this?
+ // Otherwise just send as a hex control word
else
{
sendControlWord(cx, data, RtfHandler::kIsEncoded, -1);
@@ -156,7 +283,7 @@ bool RtfReader::parseHexChar(RtfContext& cx, int num)
return true;
}
-bool RtfReader::parseControlWord(RtfContext& cx)
+bool RTFParser::parseControlWord(RtfContext& cx)
{
bool isAsterisk = false;
string controlword;
@@ -171,7 +298,7 @@ bool RtfReader::parseControlWord(RtfContext& cx)
bool empty = controlword.empty();
// Part of the name of a control word
- // NOTE: Although the RTF specification prohibits upercase
+ // NOTE: Although the RTF specification prohibits uppercase
// control words, MS Word uses them :-/
if(ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z')
controlword.append(1, (char)ch);
@@ -211,16 +338,16 @@ bool RtfReader::parseControlWord(RtfContext& cx)
break;
}
- // Escaped braces
- else if(empty && ch == '{')
- {
- sendData(cx, L'{');
- }
+ // Escaped braces
+ else if(empty && ch == '{')
+ {
+ sendData(cx, L'{');
+ }
- else if(empty && ch == '}')
- {
- sendData(cx, L'}');
- }
+ else if(empty && ch == '}')
+ {
+ sendData(cx, L'}');
+ }
// Non breaking space
else if(empty && ch == '~')
@@ -242,18 +369,7 @@ bool RtfReader::parseControlWord(RtfContext& cx)
param.append(1, (char)ch);
}
- // TODO: This looks real hokey and acts that
- // way too
-#if 0
- // An enter as the first character of a control word
- // makes a paragraph
- else if(strchr("\n\r", ch))
- {
- controlword = "par";
- break;
- }
-#endif
- // Space end a rtf code (but get eaten)
+ // Space at end a rtf code (it gets eaten)
else if(strchr(" ", ch))
break;
@@ -331,111 +447,3 @@ bool RtfReader::parseControlWord(RtfContext& cx)
return true;
}
-bool RtfReader::parse(FILE* file)
-{
- m_depth = 0;
- m_parseErrors = "";
-
- int ch = 0;
-
- RtfContext cx;
- cx.isData = false;
- cx.file = file;
- cx.data = L"";
-
- if(m_handler)
- m_handler->startDocument(this);
-
- while(1)
- {
- ch = fgetc(file);
- if(ch == EOF)
- goto done;
-
- // Type is undetermined so we figure it out here
- if(!cx.isData)
- {
- switch(ch)
- {
- case '\\':
- if(!parseControlWord(cx))
- goto done;
- break;
-
- case '{':
- {
- emptyData(cx);
-
- m_uniEatStack.push(m_uniEatStack.top());
-
- if(m_handler)
- m_handler->groupStart();
-
- m_depth++;
- }
- break;
-
- case '}':
- {
- emptyData(cx);
-
- if(m_handler)
- m_handler->groupEnd();
-
- if(!m_uniEatStack.empty())
- m_uniEatStack.pop();
-
- m_depth--;
- }
- break;
-
- default:
- cx.isData = true;
- break;
- }
- }
-
- if(cx.isData)
- {
- // We translate tabs into the appropriate control
- // word
- if(ch == '\t')
- sendControlWord(cx, "tab", 0, -1);
-
-// Don't need this code, the XML outputter
-// Takes care of it for us
-#if 0
- if(ch == '&')
- sendData(cx, L"&amp;");
-
- else if(ch == '\'')
- sendData(cx, L"&apos;");
-
- else if(ch == '"')
- sendData(cx, L"&quot;");
-
- else if(ch == '<')
- sendData(cx, L"&lt;");
-
- else if(ch == '>')
- sendData(cx, L"&gt;");
-#endif
-
- // enters a
- else if(!strchr("\r\n", ch))
- sendData(cx, ch);
-
- cx.isData = false;
- }
- }
-
-done:
-
- if(m_depth != 0)
- m_parseErrors.append("unmatched braces\n");
-
- if(m_handler)
- m_handler->endDocument();
-
- return m_parseErrors.empty();
-}