diff options
author | Stef Walter <stef@memberwebs.com> | 2004-07-22 22:30:48 +0000 |
---|---|---|
committer | Stef Walter <stef@memberwebs.com> | 2004-07-22 22:30:48 +0000 |
commit | 879f4addd4c94492c21c36c0be98122a879907bf (patch) | |
tree | 81dba160573131e476cad13e8421aff07ccb5d11 /src/rtfparser.cpp | |
parent | 7c93b2bab50b1ee28aee190a064b11daed247d83 (diff) |
- Comments and formatting changes.
Diffstat (limited to 'src/rtfparser.cpp')
-rw-r--r-- | src/rtfparser.cpp | 294 |
1 files changed, 151 insertions, 143 deletions
diff --git a/src/rtfparser.cpp b/src/rtfparser.cpp index 6d07c80..78945b6 100644 --- a/src/rtfparser.cpp +++ b/src/rtfparser.cpp @@ -36,6 +36,8 @@ * */ +// RENAME RTFParser.cpp + #include "usuals.h" #include <stdlib.h> @@ -43,11 +45,11 @@ #include "rtfreader.h" -const int RtfHandler::kAsterisk = 0x00000001; -const int RtfHandler::kHasParam = 0x00000002; -const int RtfHandler::kIsEncoded = 0x00000004; +/* ---------------------------------------------------------------------------------- + * CONSTRUCTION + */ -RtfReader::RtfReader() +RTFParser::RTFParser() { m_handler = NULL; m_depth = 0; @@ -57,12 +59,17 @@ RtfReader::RtfReader() m_uniEatStack.push(0); } -RtfReader::~RtfReader() +RTFParser::~RTFParser() { } -bool RtfReader::parse(string fileName) + +/* ---------------------------------------------------------------------------------- + * PUBLIC METHODS + */ + +bool RTFParser::parse(string fileName) { FILE* file = fopen(fileName.c_str(), "r"); if(!file) @@ -75,26 +82,134 @@ bool RtfReader::parse(string fileName) return ret; } -void RtfReader::emptyData(RtfContext& cx) +bool RTFParser::parse(FILE* file) +{ + int ch = 0; + + // The group depth + m_depth = 0; + m_parseErrors = ""; + + RtfContext cx; + cx.isData = false; + cx.file = file; + cx.data = L""; + + if(m_handler) + m_handler->startDocument(this); + + while(1) + { + ch = fgetc(file); + if(ch == EOF) + goto done; + + // TODO: Do we need this ? + if(!cx.isData) + { + switch(ch) + { + + // Starting a control word + case '\\': + if(!parseControlWord(cx)) + goto done; + break; + + // Starting an RTF group + case '{': + { + // Send all previous data + flushData(cx); + + // Handle any unicode destinations properly + m_uniEatStack.push(m_uniEatStack.top()); + + if(m_handler) + m_handler->groupStart(); + + m_depth++; + } + break; + + case '}': + { + // Send all previous data + flushData(cx); + + if(m_handler) + m_handler->groupEnd(); + + // Handle any unicode destinations properly + if(!m_uniEatStack.empty()) + m_uniEatStack.pop(); + + m_depth--; + } + break; + + default: + cx.isData = true; + break; + } + } + + if(cx.isData) + { + // We translate tabs into the appropriate control word + if(ch == '\t') + sendControlWord(cx, "tab", 0, -1); + + // line endings aren't used + else if(!strchr("\r\n", ch)) + sendData(cx, ch); + + cx.isData = false; + } + } + + +done: + + if(m_depth != 0) + m_parseErrors.append("unmatched braces\n"); + + if(m_handler) + m_handler->endDocument(); + + // If any parse errors return failure + return m_parseErrors.empty(); +} + + +/* ---------------------------------------------------------------------------------- + * HANDLER CALLS + */ + +void RTFParser::flushData(RtfContext& cx) { if(!cx.data.empty()) { if(m_handler) m_handler->charData(cx.data); + cx.data.resize(0); } } -void RtfReader::sendData(RtfContext& cx, wchar_t ch) +void RTFParser::sendData(RtfContext& cx, wchar_t ch) { + // Skip unicode chars we've been asked to if(m_uniEat > 0) m_uniEat--; + else cx.data.append(1, ch); } -void RtfReader::sendData(RtfContext& cx, wstring data) +void RTFParser::sendData(RtfContext& cx, wstring data) { + // Skip any unicode chars we've been asked to if(m_uniEat > 0) { int len = data.size(); @@ -110,16 +225,24 @@ void RtfReader::sendData(RtfContext& cx, wstring data) } } -void RtfReader::sendControlWord(RtfContext& cx, string cw, int flags, int param) +void RTFParser::sendControlWord(RtfContext& cx, string cw, int flags, int param) { - emptyData(cx); + flushData(cx); + if(m_handler) m_handler->controlWord(cw, flags, param); } -bool RtfReader::parseHexChar(RtfContext& cx, int num) + +/* ---------------------------------------------------------------------------------- + * PARSE HELPERS + */ + +bool RTFParser::parseHexChar(RtfContext& cx, int num) { string data; + + // Get num chars and put them in the string for(int i = 0; i < num; i++) { char ch = fgetc(cx.file); @@ -139,6 +262,7 @@ bool RtfReader::parseHexChar(RtfContext& cx, int num) } } + // If parsing hex, then convert to appropriate unicode if(m_parseHex) { char* end = NULL; @@ -148,6 +272,9 @@ bool RtfReader::parseHexChar(RtfContext& cx, int num) else m_parseErrors.append("invalid hex char: " + data + "\n"); } + + // TODO: Why would we ever want to do this? + // Otherwise just send as a hex control word else { sendControlWord(cx, data, RtfHandler::kIsEncoded, -1); @@ -156,7 +283,7 @@ bool RtfReader::parseHexChar(RtfContext& cx, int num) return true; } -bool RtfReader::parseControlWord(RtfContext& cx) +bool RTFParser::parseControlWord(RtfContext& cx) { bool isAsterisk = false; string controlword; @@ -171,7 +298,7 @@ bool RtfReader::parseControlWord(RtfContext& cx) bool empty = controlword.empty(); // Part of the name of a control word - // NOTE: Although the RTF specification prohibits upercase + // NOTE: Although the RTF specification prohibits uppercase // control words, MS Word uses them :-/ if(ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z') controlword.append(1, (char)ch); @@ -211,16 +338,16 @@ bool RtfReader::parseControlWord(RtfContext& cx) break; } - // Escaped braces - else if(empty && ch == '{') - { - sendData(cx, L'{'); - } + // Escaped braces + else if(empty && ch == '{') + { + sendData(cx, L'{'); + } - else if(empty && ch == '}') - { - sendData(cx, L'}'); - } + else if(empty && ch == '}') + { + sendData(cx, L'}'); + } // Non breaking space else if(empty && ch == '~') @@ -242,18 +369,7 @@ bool RtfReader::parseControlWord(RtfContext& cx) param.append(1, (char)ch); } - // TODO: This looks real hokey and acts that - // way too -#if 0 - // An enter as the first character of a control word - // makes a paragraph - else if(strchr("\n\r", ch)) - { - controlword = "par"; - break; - } -#endif - // Space end a rtf code (but get eaten) + // Space at end a rtf code (it gets eaten) else if(strchr(" ", ch)) break; @@ -331,111 +447,3 @@ bool RtfReader::parseControlWord(RtfContext& cx) return true; } -bool RtfReader::parse(FILE* file) -{ - m_depth = 0; - m_parseErrors = ""; - - int ch = 0; - - RtfContext cx; - cx.isData = false; - cx.file = file; - cx.data = L""; - - if(m_handler) - m_handler->startDocument(this); - - while(1) - { - ch = fgetc(file); - if(ch == EOF) - goto done; - - // Type is undetermined so we figure it out here - if(!cx.isData) - { - switch(ch) - { - case '\\': - if(!parseControlWord(cx)) - goto done; - break; - - case '{': - { - emptyData(cx); - - m_uniEatStack.push(m_uniEatStack.top()); - - if(m_handler) - m_handler->groupStart(); - - m_depth++; - } - break; - - case '}': - { - emptyData(cx); - - if(m_handler) - m_handler->groupEnd(); - - if(!m_uniEatStack.empty()) - m_uniEatStack.pop(); - - m_depth--; - } - break; - - default: - cx.isData = true; - break; - } - } - - if(cx.isData) - { - // We translate tabs into the appropriate control - // word - if(ch == '\t') - sendControlWord(cx, "tab", 0, -1); - -// Don't need this code, the XML outputter -// Takes care of it for us -#if 0 - if(ch == '&') - sendData(cx, L"&"); - - else if(ch == '\'') - sendData(cx, L"'"); - - else if(ch == '"') - sendData(cx, L"""); - - else if(ch == '<') - sendData(cx, L"<"); - - else if(ch == '>') - sendData(cx, L">"); -#endif - - // enters a - else if(!strchr("\r\n", ch)) - sendData(cx, ch); - - cx.isData = false; - } - } - -done: - - if(m_depth != 0) - m_parseErrors.append("unmatched braces\n"); - - if(m_handler) - m_handler->endDocument(); - - return m_parseErrors.empty(); -} |