summaryrefslogtreecommitdiff
path: root/src/utf8.cpp
diff options
context:
space:
mode:
authorStef Walter <stef@memberwebs.com>2004-07-29 22:52:30 +0000
committerStef Walter <stef@memberwebs.com>2004-07-29 22:52:30 +0000
commit740d8a6bcb6521e188361befa7f5636c880bf63b (patch)
treef269a5d492d712be73d840fdefeb6ad247ee03db /src/utf8.cpp
parent0105af34f6977c51619bf1060e74e3c249932c68 (diff)
- Moved to DOMC
Diffstat (limited to 'src/utf8.cpp')
-rw-r--r--src/utf8.cpp153
1 files changed, 153 insertions, 0 deletions
diff --git a/src/utf8.cpp b/src/utf8.cpp
new file mode 100644
index 0000000..7c2e3ee
--- /dev/null
+++ b/src/utf8.cpp
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2004, Nate Nielsen
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the
+ * following disclaimer.
+ * * Redistributions in binary form must reproduce the
+ * above copyright notice, this list of conditions and
+ * the following disclaimer in the documentation and/or
+ * other materials provided with the distribution.
+ * * The names of contributors to this software may not be
+ * used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ *
+ * CONTRIBUTORS
+ * Nate Nielsen <nielsen@memberwebs.com>
+ *
+ */
+
+#include <wchar.h>
+#include "usuals.h"
+
+bool transcode16to8(wchar_t ch, string& ret)
+{
+ ret.reserve(ret.length() + 3);
+
+ if(ch <= 0x007F)
+ {
+ ret.append(1, (char)ch);
+ }
+ else if(ch <= 0x07FF)
+ {
+ ret.append(1, (char)(192 | (ch >> 6)));
+ ret.append(1, (char)(128 | (ch & 63)));
+ }
+ else
+ {
+ ret.append(1, (char)(224 | (ch >> 12)));
+ ret.append(1, (char)(128 | ((ch >> 6) & 63)));
+ ret.append(1, (char)(128 | (ch & 63)));
+ }
+
+ return true;
+}
+
+bool transcode16to8(const wstring& data, string& ret)
+{
+ ret.reserve(ret.length() + data.length() + (data.length() / 2));
+
+ // basic_string.c_str doesn't work properly everywhere
+ // most notably not in the g++ std library
+
+ const wchar_t* c = data.length() ? data.data() : L"";
+ const wchar_t* e = c + data.length();
+
+ for( ; c < e; c++)
+ {
+ if(*c <= 0x007F)
+ {
+ ret.append(1, (char)*c);
+ }
+ else if(*c <= 0x07FF)
+ {
+ ret.append(1, (char)(192 | (*c >> 6)));
+ ret.append(1, (char)(128 | (*c & 63)));
+ }
+ else
+ {
+ ret.append(1, (char)(224 | (*c >> 12)));
+ ret.append(1, (char)(128 | ((*c >> 6) & 63)));
+ ret.append(1, (char)(128 | (*c & 63)) );
+ }
+ }
+
+ return true;
+}
+
+bool transcode8to16(const string& data, wstring& ret)
+{
+ ret.reserve(data.length());
+
+ // basic_string.c_str doesn't work properly everywhere
+ // most notably not in the g++ std library
+
+ const char* c = data.length() ? data.data() : "";
+ const char* e = c + data.length();
+
+ for( ; c < e; c++)
+ {
+ // First 4 bits set
+ if((c[0] & 0xF8) == 0xF0 &&
+ (c[1] & 0xC0) == 0x80 &&
+ (c[2] & 0xC0) == 0x80 &&
+ (c[3] & 0xC0) == 0x80)
+ {
+ ret.append(1, (wchar_t)(((wchar_t)c[0] & 7) << 18 |
+ ((wchar_t)c[1] & 63) << 12 |
+ ((wchar_t)c[2] & 63) << 6 |
+ ((wchar_t)c[3] & 63)));
+ c += 3;
+ }
+
+ // First 3 bits set
+ else if((c[0] & 0xF0) == 0xE0 &&
+ (c[1] & 0xC0) == 0x80 &&
+ (c[2] & 0xC0) == 0x80)
+ {
+ ret.append(1, (wchar_t)(((wchar_t)c[0] & 15) << 12 |
+ ((wchar_t)c[1] & 63) << 6 |
+ ((wchar_t)c[2] & 63)));
+ c += 2;
+ }
+
+ // First 2 bits set
+ else if((c[0] & 0xE0) == 0xC0 &&
+ (c[1] & 0xC0) == 0x80)
+ {
+ ret.append(1, (wchar_t)(((wchar_t)c[0] & 31) << 6 |
+ ((wchar_t)c[1] & 63)));
+ c += 1;
+ }
+
+ // First bit set
+ else if(!(c[0] & 0x80))
+ {
+ ret.append(1, (wchar_t)c[0]);
+ }
+
+ else
+ return false;
+ }
+
+ return true;
+}