1 files changed, 151 insertions, 143 deletions
diff --git a/src/rtfparser.cpp b/src/rtfparser.cpp
index 6d07c80..78945b6 100644
--- a/src/rtfparser.cpp
+++ b/src/rtfparser.cpp
@@ -36,6 +36,8 @@
  *
  */
 
+// RENAME RTFParser.cpp
+
 #include "usuals.h"
 
 #include <stdlib.h>
@@ -43,11 +45,11 @@
 #include "rtfreader.h"
 
 
-const int RtfHandler::kAsterisk = 0x00000001;
-const int RtfHandler::kHasParam = 0x00000002;
-const int RtfHandler::kIsEncoded = 0x00000004;
+/* ----------------------------------------------------------------------------------
+ *  CONSTRUCTION
+ */
 
-RtfReader::RtfReader()
+RTFParser::RTFParser()
 {
 	m_handler = NULL;
 	m_depth = 0;
@@ -57,12 +59,17 @@ RtfReader::RtfReader()
 	m_uniEatStack.push(0);
 }
 
-RtfReader::~RtfReader()
+RTFParser::~RTFParser()
 {
 
 }
 
-bool RtfReader::parse(string fileName)
+
+/* ----------------------------------------------------------------------------------
+ *  PUBLIC METHODS
+ */
+
+bool RTFParser::parse(string fileName)
 {
 	FILE* file = fopen(fileName.c_str(), "r");
 	if(!file)
@@ -75,26 +82,134 @@ bool RtfReader::parse(string fileName)
 	return ret;
 }
 
-void RtfReader::emptyData(RtfContext& cx)
+bool RTFParser::parse(FILE* file)
+{
+    int ch = 0;
+
+    // The group depth
+    m_depth = 0;
+    m_parseErrors = "";
+
+    RtfContext cx;
+    cx.isData = false;
+    cx.file = file;
+    cx.data = L"";
+
+    if(m_handler)
+        m_handler->startDocument(this);
+
+    while(1)
+    {
+        ch = fgetc(file);
+        if(ch == EOF)
+            goto done;
+
+        // TODO: Do we need this ?
+        if(!cx.isData)
+        {
+            switch(ch)
+            {
+
+            // Starting a control word
+            case '\\':
+                if(!parseControlWord(cx))
+                    goto done;
+                break;
+
+            // Starting an RTF group
+            case '{':
+                {
+                    // Send all previous data
+                    flushData(cx);
+
+                    // Handle any unicode destinations properly
+                    m_uniEatStack.push(m_uniEatStack.top());
+
+                    if(m_handler)
+                        m_handler->groupStart();
+
+                    m_depth++;
+                }
+                break;
+
+            case '}':
+                {
+                    // Send all previous data
+                    flushData(cx);
+
+                    if(m_handler)
+                        m_handler->groupEnd();
+
+                    // Handle any unicode destinations properly
+                    if(!m_uniEatStack.empty())
+                        m_uniEatStack.pop();
+
+                    m_depth--;
+                }
+                break;
+
+            default:
+                cx.isData = true;
+                break;
+            }
+        }
+
+        if(cx.isData)
+        {
+            // We translate tabs into the appropriate control word
+            if(ch == '\t')
+                sendControlWord(cx, "tab", 0, -1);
+
+            // line endings aren't used
+            else if(!strchr("\r\n", ch))
+               sendData(cx, ch);
+
+            cx.isData = false;
+        }
+    }
+
+
+done:
+
+    if(m_depth != 0)
+        m_parseErrors.append("unmatched braces\n");
+
+    if(m_handler)
+        m_handler->endDocument();
+
+    // If any parse errors return failure
+    return m_parseErrors.empty();
+}
+
+
+/* ----------------------------------------------------------------------------------
+ *  HANDLER CALLS
+ */
+
+void RTFParser::flushData(RtfContext& cx)
 {
 	if(!cx.data.empty())
 	{
 		if(m_handler)
 			m_handler->charData(cx.data);
+
 		cx.data.resize(0);
 	}
 }
 
-void RtfReader::sendData(RtfContext& cx, wchar_t ch)
+void RTFParser::sendData(RtfContext& cx, wchar_t ch)
 {
+    // Skip unicode chars we've been asked to
 	if(m_uniEat > 0)
 		m_uniEat--;
+
 	else
 		cx.data.append(1, ch);
 }
 
-void RtfReader::sendData(RtfContext& cx, wstring data)
+void RTFParser::sendData(RtfContext& cx, wstring data)
 {
+    // Skip any unicode chars we've been asked to
 	if(m_uniEat > 0)
 	{
 		int len = data.size();
@@ -110,16 +225,24 @@ void RtfReader::sendData(RtfContext& cx, wstring data)
 	}
 }
 
-void RtfReader::sendControlWord(RtfContext& cx, string cw, int flags, int param)
+void RTFParser::sendControlWord(RtfContext& cx, string cw, int flags, int param)
 {
-	emptyData(cx);
+	flushData(cx);
+
 	if(m_handler)
 		m_handler->controlWord(cw, flags, param);
 }
 
-bool RtfReader::parseHexChar(RtfContext& cx, int num)
+
+/* ----------------------------------------------------------------------------------
+ *  PARSE HELPERS
+ */
+
+bool RTFParser::parseHexChar(RtfContext& cx, int num)
 {
 	string data;
+
+    // Get num chars and put them in the string
 	for(int i = 0; i < num; i++)
 	{
 		char ch = fgetc(cx.file);
@@ -139,6 +262,7 @@ bool RtfReader::parseHexChar(RtfContext& cx, int num)
 		}
 	}
 
+    // If parsing hex, then convert to appropriate unicode
 	if(m_parseHex)
 	{
 		char* end = NULL;
@@ -148,6 +272,9 @@ bool RtfReader::parseHexChar(RtfContext& cx, int num)
 		else
 			m_parseErrors.append("invalid hex char: " + data + "\n");
 	}
+
+    // TODO: Why would we ever want to do this?
+    // Otherwise just send as a hex control word
 	else
 	{
 		sendControlWord(cx, data, RtfHandler::kIsEncoded, -1);
@@ -156,7 +283,7 @@ bool RtfReader::parseHexChar(RtfContext& cx, int num)
 	return true;
 }
 
-bool RtfReader::parseControlWord(RtfContext& cx)
+bool RTFParser::parseControlWord(RtfContext& cx)
 {
 	bool isAsterisk = false;
 	string controlword;
@@ -171,7 +298,7 @@ bool RtfReader::parseControlWord(RtfContext& cx)
 		bool empty = controlword.empty();
 
 		// Part of the name of a control word
-		// NOTE: Although the RTF specification prohibits upercase
+		// NOTE: Although the RTF specification prohibits uppercase
 		// control words, MS Word uses them :-/
 		if(ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z')
 			controlword.append(1, (char)ch);
@@ -211,16 +338,16 @@ bool RtfReader::parseControlWord(RtfContext& cx)
 			break;
 		}
 
-    // Escaped braces
-    else if(empty && ch == '{')
-    {
-      sendData(cx, L'{');
-    }
+        // Escaped braces
+        else if(empty && ch == '{')
+        {
+            sendData(cx, L'{');
+        }
 
-    else if(empty && ch == '}')
-    {
-      sendData(cx, L'}');
-    }
+        else if(empty && ch == '}')
+        {
+            sendData(cx, L'}');
+        }
 
 		// Non breaking space
 		else if(empty && ch == '~')
@@ -242,18 +369,7 @@ bool RtfReader::parseControlWord(RtfContext& cx)
 			param.append(1, (char)ch);
 		}
 
-		// TODO: This looks real hokey and acts that
-		// way too
-#if 0
-		// An enter as the first character of a control word
-		// makes a paragraph
-		else if(strchr("\n\r", ch))
-		{
-			controlword = "par";
-			break;
-		}
-#endif
-		// Space end a rtf code (but get eaten)
+		// Space at end a rtf code (it gets eaten)
 		else if(strchr(" ", ch))
 			break;
 
@@ -331,111 +447,3 @@ bool RtfReader::parseControlWord(RtfContext& cx)
 	return true;
 }
 
-bool RtfReader::parse(FILE* file)
-{
-	m_depth = 0;
-	m_parseErrors = "";
-
-	int ch = 0;
-
-	RtfContext cx;
-	cx.isData = false;
-	cx.file = file;
-	cx.data = L"";
-
-	if(m_handler)
-		m_handler->startDocument(this);
-
-	while(1)
-	{
-		ch = fgetc(file);
-		if(ch == EOF)
-			goto done;
-
-		// Type is undetermined so we figure it out here
-		if(!cx.isData)
-		{
-			switch(ch)
-			{
-			case '\\':
-				if(!parseControlWord(cx))
-					goto done;
-				break;
-
-			case '{':
-				{
-					emptyData(cx);
-
-					m_uniEatStack.push(m_uniEatStack.top());
-
-					if(m_handler)
-						m_handler->groupStart();
-
-					m_depth++;
-				}
-				break;
-
-			case '}':
-				{
-					emptyData(cx);
-
-					if(m_handler)
-						m_handler->groupEnd();
-
-					if(!m_uniEatStack.empty())
-						m_uniEatStack.pop();
-
-					m_depth--;
-				}
-				break;
-
-			default:
-				cx.isData = true;
-				break;
-			}
-		}
-
-		if(cx.isData)
-		{
-			// We translate tabs into the appropriate control
-			// word
-			if(ch == '\t')
-				sendControlWord(cx, "tab", 0, -1);
-
-// Don't need this code, the XML outputter
-// Takes care of it for us
-#if 0
-			if(ch == '&')
-				sendData(cx, L"&amp;");
-
-			else if(ch == '\'')
-				sendData(cx, L"&apos;");
-
-			else if(ch == '"')
-				sendData(cx, L"&quot;");
-
-			else if(ch == '<')
-				sendData(cx, L"&lt;");
-
-			else if(ch == '>')
-				sendData(cx, L"&gt;");
-#endif
-
-			// enters a
-			else if(!strchr("\r\n", ch))
-				sendData(cx, ch);
-
-			cx.isData = false;
-		}
-	}
-
-done:
-
-	if(m_depth != 0)
-		m_parseErrors.append("unmatched braces\n");
-
-	if(m_handler)
-		m_handler->endDocument();
-
-	return m_parseErrors.empty();
-}