summaryrefslogtreecommitdiff
path: root/src/sablotr.cpp
blob: 465648add98a948567fa1793a500c7f9dba89300 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
//
// AUTHOR
// N. Nielsen
//
// LICENSE
// This software is in the public domain.
//
// The software is provided "as is", without warranty of any kind,
// express or implied, including but not limited to the warranties
// of merchantability, fitness for a particular purpose, and
// noninfringement. In no event shall the author(s) be liable for any
// claim, damages, or other liability, whether in an action of
// contract, tort, or otherwise, arising from, out of, or in connection
// with the software or the use or other dealings in the software.
//
// SUPPORT
// Send bug reports to: <nielsen@memberwebs.com>
//
// SITE
// http://memberwebs.com/nielsen/
//

// SABLOTRON C++ WRAPPER CONVERSION FUNCTIONS
//

#include "wchar.h"
#include "sablo.h"

bool DOM::transcode16to8(const std::basic_string<wchar_t>& data,
						 std::basic_string<char>& ret)
{
	ret.resize(0);
	ret.reserve(data.length() + (data.length() / 2));

	// basic_string.c_str doesn't work properly everywhere
	// most notably not in the g++ std library

	const wchar_t* c = data.length() ? data.data() : L"";
	const wchar_t* e = c + data.length();

	for( ; c != e; c++)
	{
		if(*c <= 0x007F)
		{
			ret.append(1, (char)*c);
		}
		else if(*c <= 0x07FF)
		{
			ret.append(1, (char)(192 | (*c >> 6)));
			ret.append(1, (char)(128 | (*c & 63)));
		}
		else
		{
			ret.append(1, (char)(224 | (*c >> 12)));
			ret.append(1, (char)(128 | ((*c >> 6) & 63)));
			ret.append(1, (char)(128 | (*c & 63)) );
		}
	}

	return true;
}

bool DOM::transcode8to16(const std::basic_string<char>& data,
						 std::basic_string<wchar_t>& ret)
{
	ret.resize(0);
	ret.reserve(data.length());

	// basic_string.c_str doesn't work properly everywhere
	// most notably not in the g++ std library

	const char* c = data.length() ? data.data() : "";
	const char* e = c + data.length();

	for( ; c != e; c++)
	{
		// First 4 bits set
		if((c[0] & 0xF8) == 0xF0 &&
		   (c[1] & 0xC0) == 0x80 &&
		   (c[2] & 0xC0) == 0x80 &&
		   (c[3] & 0xC0) == 0x80)
		{
			ret.append(1, ((wchar_t)c[0] & 7) << 18 |
				       ((wchar_t)c[1] & 63) << 12 |
					   ((wchar_t)c[2] & 63) << 6 |
					   ((wchar_t)c[3] & 63));
			c += 3;
		}

		// First 3 bits set
		else if((c[0] & 0xF0) == 0xE0 &&
		        (c[1] & 0xC0) == 0x80 &&
				(c[2] & 0xC0) == 0x80)
		{
			ret.append(1, ((wchar_t)c[0] & 15) << 12 |
				       ((wchar_t)c[1] & 63) << 6 |
					   ((wchar_t)c[2] & 63));
			c += 2;
		}

		// First 2 bits set
		else if((c[0] & 0xE0) == 0xC0 &&
			    (c[1] & 0xC0) == 0x80)
		{
			ret.append(1, ((wchar_t)c[0] & 31) << 6 |
					   ((wchar_t)c[1] & 63));
			c += 1;
		}

		// First bit set
		else if(!(c[0] & 0x80))
		{
			ret.append(1, (wchar_t)c[0]);
		}

		else
			return false;
	}

	return true;
}