/* * Copyright (c) 2004, Nate Nielsen * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * * Redistributions of source code must retain the above * copyright notice, this list of conditions and the * following disclaimer. * * Redistributions in binary form must reproduce the * above copyright notice, this list of conditions and * the following disclaimer in the documentation and/or * other materials provided with the distribution. * * The names of contributors to this software may not be * used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * * * CONTRIBUTORS * Nate Nielsen * */ // SABLOTRON C++ WRAPPER CONVERSION FUNCTIONS // #include #include "sablo.h" bool DOM::transcode16to8(const std::basic_string& data, std::basic_string& ret) { ret.resize(0); ret.reserve(data.length() + (data.length() / 2)); // basic_string.c_str doesn't work properly everywhere // most notably not in the g++ std library const wchar_t* c = data.length() ? data.data() : L""; const wchar_t* e = c + data.length(); for( ; c < e; c++) { if(*c <= 0x007F) { ret.append(1, (char)*c); } else if(*c <= 0x07FF) { ret.append(1, (char)(192 | (*c >> 6))); ret.append(1, (char)(128 | (*c & 63))); } else { ret.append(1, (char)(224 | (*c >> 12))); ret.append(1, (char)(128 | ((*c >> 6) & 63))); ret.append(1, (char)(128 | (*c & 63)) ); } } return true; } bool DOM::transcode8to16(const std::basic_string& data, std::basic_string& ret) { ret.resize(0); ret.reserve(data.length()); // basic_string.c_str doesn't work properly everywhere // most notably not in the g++ std library const char* c = data.length() ? data.data() : ""; const char* e = c + data.length(); for( ; c < e; c++) { // First 4 bits set if((c[0] & 0xF8) == 0xF0 && (c[1] & 0xC0) == 0x80 && (c[2] & 0xC0) == 0x80 && (c[3] & 0xC0) == 0x80) { ret.append(1, (wchar_t)(((wchar_t)c[0] & 7) << 18 | ((wchar_t)c[1] & 63) << 12 | ((wchar_t)c[2] & 63) << 6 | ((wchar_t)c[3] & 63))); c += 3; } // First 3 bits set else if((c[0] & 0xF0) == 0xE0 && (c[1] & 0xC0) == 0x80 && (c[2] & 0xC0) == 0x80) { ret.append(1, (wchar_t)(((wchar_t)c[0] & 15) << 12 | ((wchar_t)c[1] & 63) << 6 | ((wchar_t)c[2] & 63))); c += 2; } // First 2 bits set else if((c[0] & 0xE0) == 0xC0 && (c[1] & 0xC0) == 0x80) { ret.append(1, (wchar_t)(((wchar_t)c[0] & 31) << 6 | ((wchar_t)c[1] & 63))); c += 1; } // First bit set else if(!(c[0] & 0x80)) { ret.append(1, (wchar_t)c[0]); } else return false; } return true; }