/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include #include #include #include #include #if !defined(LOG4CXX) #define LOG4CXX 1 #endif #include #if LOG4CXX_LOGCHAR_IS_UNICHAR || LOG4CXX_CFSTRING_API || LOG4CXX_UNICHAR_API #include #endif using namespace log4cxx; using namespace log4cxx::helpers; void Transcoder::decodeUTF8(const std::string& src, LogString& dst) { std::string::const_iterator iter = src.begin(); while(iter != src.end()) { unsigned int sv = decode(src, iter); if(sv != 0xFFFF) { encode(sv, dst); } else { dst.append(1, LOSSCHAR); iter++; } } } void Transcoder::encodeUTF8(const LogString& src, std::string& dst) { #if LOG4CXX_LOGCHAR_IS_UTF8 dst.append(src); #else LogString::const_iterator iter = src.begin(); while(iter != src.end()) { unsigned int sv = decode(src, iter); if(sv != 0xFFFF) { encode(sv, dst); } else { dst.append(1, LOSSCHAR); iter++; } } #endif } char* Transcoder::encodeUTF8(const LogString& src, Pool& p) { #if LOG4CXX_LOGCHAR_IS_UTF8 return p.pstrdup(src); #else std::string tmp; encodeUTF8(src, tmp); return p.pstrdup(tmp); #endif } void Transcoder::encodeUTF8(unsigned int sv, ByteBuffer& dst) { size_t bytes = encodeUTF8(sv, dst.current()); dst.position(dst.position() + bytes); } size_t Transcoder::encodeUTF8(unsigned int ch, char* dst) { if (ch < 0x80) { dst[0] = (char) ch; return 1; } else if (ch < 0x800) { dst[0] = (char) (0xC0 + (ch >> 6)); dst[1] = (char) (0x80 + (ch & 0x3F)); return 2; } else if (ch < 0x10000) { dst[0] = (char) (0xE0 + (ch >> 12)); dst[1] = (char) (0x80 + ((ch >> 6) & 0x3F)); dst[2] = (char) (0x80 + (ch & 0x3F)); return 3; } else if (ch <= 0x10FFFF) { dst[0] = (char) (0xF0 + (ch >> 18)); dst[1] = (char) (0x80 + ((ch >> 12) & 0x3F)); dst[2] = (char) (0x80 + ((ch >> 6) & 0x3F)); dst[3] = (char) (0x80 + (ch & 0x3F)); return 4; } else { // // output UTF-8 encoding of 0xFFFF // dst[0] = (char) 0xEF; dst[1] = (char) 0xBF; dst[2] = (char) 0xBF; return 3; } } void Transcoder::encodeUTF16BE(unsigned int sv, ByteBuffer& dst) { size_t bytes = encodeUTF16BE(sv, dst.current()); dst.position(dst.position() + bytes); } size_t Transcoder::encodeUTF16BE(unsigned int ch, char* dst) { if (ch <= 0xFFFF) { dst[0] = (char) (ch >> 8); dst[1] = (char) (ch & 0xFF); return 2; } if (ch <= 0x10FFFF) { unsigned char w = (unsigned char) ((ch >> 16) - 1); dst[0] = (char) (0xD8 + (w >> 2)); dst[1] = (char) (((w & 0x03) << 6) + ((ch >> 10) & 0x3F)); dst[2] = (char) (0xDC + ((ch & 0x30) >> 4)); dst[3] = (char) (ch & 0xFF); return 4; } dst[0] = dst[1] = (char) 0xFF; return 2; } void Transcoder::encodeUTF16LE(unsigned int sv, ByteBuffer& dst) { size_t bytes = encodeUTF16LE(sv, dst.current()); dst.position(dst.position() + bytes); } size_t Transcoder::encodeUTF16LE(unsigned int ch, char* dst) { if (ch <= 0xFFFF) { dst[1] = (char) (ch >> 8); dst[0] = (char) (ch & 0xFF); return 2; } if (ch <= 0x10FFFF) { unsigned char w = (unsigned char) ((ch >> 16) - 1); dst[1] = (char) (0xD8 + (w >> 2)); dst[0] = (char) (((w & 0x03) << 6) + ((ch >> 10) & 0x3F)); dst[3] = (char) (0xDC + ((ch & 0x30) >> 4)); dst[2] = (char) (ch & 0xFF); return 4; } dst[0] = dst[1] = (char) 0xFF; return 2; } unsigned int Transcoder::decode(const std::string& src, std::string::const_iterator& iter) { std::string::const_iterator start(iter); unsigned char ch1 = *(iter++); if (ch1 <= 0x7F) { return ch1; } // // should not have continuation character here // if ((ch1 & 0xC0) != 0x80 && iter != src.end()) { unsigned char ch2 = *(iter++); // // should be continuation if ((ch2 & 0xC0) != 0x80) { iter = start; return 0xFFFF; } if((ch1 & 0xE0) == 0xC0) { if ((ch2 & 0xC0) == 0x80) { unsigned int rv = ((ch1 & 0x1F) << 6) + (ch2 & 0x3F); if (rv >= 0x80) { return rv; } } iter = start; return 0xFFFF; } if (iter != src.end()) { unsigned char ch3 = *(iter++); // // should be continuation // if ((ch3 & 0xC0) != 0x80) { iter = start; return 0xFFFF; } if ((ch1 & 0xF0) == 0xE0) { unsigned rv = ((ch1 & 0x0F) << 12) + ((ch2 & 0x3F) << 6) + (ch3 & 0x3F); if (rv <= 0x800) { iter = start; return 0xFFFF; } return rv; } if (iter != src.end()) { unsigned char ch4 = *(iter++); if ((ch4 & 0xC0) != 0x80) { iter = start; return 0xFFFF; } unsigned int rv = ((ch1 & 0x07) << 18) + ((ch2 & 0x3F) << 12) + ((ch3 & 0x3F) << 6) + (ch4 & 0x3F); if (rv > 0xFFFF) { return rv; } } } } iter = start; return 0xFFFF; } void Transcoder::encode(unsigned int sv, std::string& dst) { char tmp[8]; size_t bytes = encodeUTF8(sv, tmp); dst.append(tmp, bytes); } void Transcoder::decode(const std::string& src, LogString& dst) { #if LOG4CXX_CHARSET_UTF8 && LOG4CXX_LOGCHAR_IS_UTF8 dst.append(src); #else static CharsetDecoderPtr decoder(CharsetDecoder::getDefaultDecoder()); dst.reserve(dst.size() + src.size()); std::string::const_iterator iter = src.begin(); #if !LOG4CXX_CHARSET_EBCDIC for(; iter != src.end() && ((unsigned char) *iter) < 0x80; iter++) { dst.append(1, *iter); } #endif if (iter != src.end()) { size_t offset = iter - src.begin(); ByteBuffer buf(const_cast(src.data() + offset), src.size() - offset); while(buf.remaining() > 0) { log4cxx_status_t stat = decoder->decode(buf, dst); if(CharsetDecoder::isError(stat)) { dst.append(1, LOSSCHAR); buf.position(buf.position() + 1); } } decoder->decode(buf, dst); } #endif } char* Transcoder::encode(const LogString& src, Pool& p) { #if LOG4CXX_CHARSET_UTF8 && LOG4CXX_LOGCHAR_IS_UTF8 return p.pstrdup(src); #else std::string tmp; encode(src, tmp); return p.pstrdup(tmp); #endif } void Transcoder::encode(const LogString& src, std::string& dst) { #if LOG4CXX_CHARSET_UTF8 && LOG4CXX_LOGCHAR_IS_UTF8 dst.append(src); #else static CharsetEncoderPtr encoder(CharsetEncoder::getDefaultEncoder()); dst.reserve(dst.size() + src.size()); LogString::const_iterator iter = src.begin(); #if !LOG4CXX_CHARSET_EBCDIC for(; iter != src.end() && ((unsigned int) *iter) < 0x80; iter++) { dst.append(1, *iter); } #endif if (iter != src.end()) { char buf[BUFSIZE]; ByteBuffer out(buf, BUFSIZE); while(iter != src.end()) { log4cxx_status_t stat = encoder->encode(src, iter, out); out.flip(); dst.append(out.data(), out.limit()); out.clear(); if (CharsetEncoder::isError(stat)) { dst.append(1, LOSSCHAR); iter++; } } encoder->encode(src, iter, out); } #endif } template static unsigned int decodeUTF16(const String& in, Iterator& iter) { unsigned int ch1 = *iter; // // if not surrogate pair // if (ch1 < 0xD800 || ch1 > 0xDFFF) { // // then advance iterator and return wchar_t value // if(ch1 != 0xFFFF) iter++; return ch1; } else if (ch1 < 0xDC00) { // // started with high-surrogate value // if there is an additional wchar_t Iterator iter2 = iter + 1; if (iter2 != in.end()) { unsigned int ch2 = *iter2; // // if it is a matching low surrogate then // advance the iterator and return the scalar value if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) { iter += 2; return (ch1 - 0xD800) * 0x400 + (ch2 - 0xDC00) + 0x10000; } } } // // unrecognized value, do not advance iterator // return 0xFFFF; } template static void encodeUTF16(unsigned int sv, String& dst) { if (sv < 0x10000) { dst.append(1, sv); } else { unsigned char u = (unsigned char) (sv >> 16); unsigned char w = (unsigned char) (u - 1); unsigned short hs = (0xD800 + ((w & 0xF) << 6) + ((sv & 0xFFFF) >> 10)); unsigned short ls = (0xDC00 + (sv & 0x3FF)); dst.append(1, hs); dst.append(1, ls); } } #if LOG4CXX_WCHAR_T_API || LOG4CXX_LOGCHAR_IS_WCHAR_T || defined(WIN32) || defined(_WIN32) void Transcoder::decode(const std::wstring& src, LogString& dst) { #if LOG4CXX_LOGCHAR_IS_WCHAR_T dst.append(src, len); #else std::wstring::const_iterator i = src.begin(); while(i != src.end()) { unsigned int cp = decode(src, i); if (cp != 0xFFFF) { encode(cp, dst); } else { dst.append(1, LOSSCHAR); i++; } } #endif } void Transcoder::encode(const LogString& src, std::wstring& dst) { #if LOG4CXX_LOGCHAR_IS_WCHAR_T dst.append(src); #else for(LogString::const_iterator i = src.begin(); i != src.end();) { unsigned int cp = Transcoder::decode(src, i); encode(cp, dst); } #endif } wchar_t* Transcoder::wencode(const LogString& src, Pool& p) { #if LOG4CXX_LOGCHAR_IS_WCHAR_T std::wstring& tmp = src; #else std::wstring tmp; encode(src, tmp); #endif wchar_t* dst = (wchar_t*) p.palloc((tmp.length() + 1) * sizeof(wchar_t)); dst[tmp.length()] = 0; memcpy(dst, tmp.data(), tmp.length() * sizeof(wchar_t)); return dst; } unsigned int Transcoder::decode(const std::wstring& in, std::wstring::const_iterator& iter) { #if defined(__STDC_ISO_10646__) return *(iter++); #else return decodeUTF16(in, iter); #endif } void Transcoder::encode(unsigned int sv, std::wstring& dst) { #if defined(__STDC_ISO_10646__) dst.append(1, sv); #else if (sizeof(wchar_t) == 4) { dst.append(1, sv); } else { encodeUTF16(sv, dst); } #endif } #endif #if LOG4CXX_UNICHAR_API || LOG4CXX_CFSTRING_API void Transcoder::decode(const std::basic_string& src, LogString& dst) { #if LOG4CXX_LOGCHAR_IS_UNICHAR dst.append(src); #else for(std::basic_string::const_iterator i = src.begin(); i != src.end();) { unsigned int cp = decode(src, i); encode(cp, dst); } #endif } void Transcoder::encode(const LogString& src, std::basic_string& dst) { #if LOG4CXX_LOGCHAR_IS_UNICHAR dst.append(src); #else for(LogString::const_iterator i = src.begin(); i != src.end();) { unsigned int cp = decode(src, i); encode(cp, dst); } #endif } unsigned int Transcoder::decode(const std::basic_string& in, std::basic_string::const_iterator& iter) { return decodeUTF16(in, iter); } void Transcoder::encode(unsigned int sv, std::basic_string& dst) { encodeUTF16(sv, dst); } #endif #if LOG4CXX_CFSTRING_API void Transcoder::decode(const CFStringRef& src, LogString& dst) { const UniChar* chars = CFStringGetCharactersPtr(src); if (chars) { decode(chars, dst); } else { size_t length = CFStringGetLength(src); if (length > 0) { std::vector tmp(length); CFStringGetCharacters(src, CFRangeMake(0, length), &tmp[0]); #if LOG4CXX_LOGCHAR_IS_UNICHAR dst.append(&tmp[0], tmp.size()); #else decode(std::basic_string(&tmp[0], tmp.size()), dst); #endif } } } CFStringRef Transcoder::encode(const LogString& src) { LOG4CXX_ENCODE_UNICHAR(tmp, src); return CFStringCreateWithCharacters(kCFAllocatorDefault, tmp.data(), tmp.size()); } #endif logchar Transcoder::decode(char val) { #if LOG4CXX_CHARSET_EBCDIC LogString dst; Transcoder::decode(std::string(1, val), dst); return dst[0]; #else return val; #endif } LogString Transcoder::decode(const char* val) { #if LOG4CXX_LOGCHAR_IS_UTF8 && !LOG4CXX_CHARSET_EBCDIC return val; #else LogString dst; Transcoder::decode(val, dst); return dst; #endif } std::string Transcoder::encodeCharsetName(const LogString& val) { char asciiTable[] = { ' ', '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6' , '7', '8', '9', ':', ';', '<', '=', '>', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', '\\', ']', '^', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}', '~' }; std::string out; for(LogString::const_iterator iter = val.begin(); iter != val.end(); iter++) { if (*iter >= 0x20 && *iter < 0x7F) { out.append(1, asciiTable[*iter - 0x20]); } else { out.append(1, LOSSCHAR); } } return out; }