diff options
Diffstat (limited to 'src/win32/utf-conv.c')
-rw-r--r-- | src/win32/utf-conv.c | 61 |
1 files changed, 18 insertions, 43 deletions
diff --git a/src/win32/utf-conv.c b/src/win32/utf-conv.c index a98e814f0..88a84141e 100644 --- a/src/win32/utf-conv.c +++ b/src/win32/utf-conv.c @@ -11,83 +11,52 @@ #define U16_LEAD(c) (wchar_t)(((c)>>10)+0xd7c0) #define U16_TRAIL(c) (wchar_t)(((c)&0x3ff)|0xdc00) -void git__utf8_to_16(wchar_t *dest, const char *src) +#if 0 +void git__utf8_to_16(wchar_t *dest, size_t length, const char *src) { wchar_t *pDest = dest; uint32_t ch; const uint8_t* pSrc = (uint8_t*) src; - const uint8_t *pSrcLimit = pSrc + strlen(src); - assert(dest && src); + assert(dest && src && length); - if ((pSrcLimit - pSrc) >= 4) { - pSrcLimit -= 3; /* temporarily reduce pSrcLimit */ + length--; - /* in this loop, we can always access at least 4 bytes, up to pSrc+3 */ - do { - ch = *pSrc++; - if(ch < 0xc0) { - /* - * ASCII, or a trail byte in lead position which is treated like - * a single-byte sequence for better character boundary - * resynchronization after illegal sequences. - */ - *pDest++=(wchar_t)ch; - } else if(ch < 0xe0) { /* U+0080..U+07FF */ - /* 0x3080 = (0xc0 << 6) + 0x80 */ - *pDest++ = (wchar_t)((ch << 6) + *pSrc++ - 0x3080); - } else if(ch < 0xf0) { /* U+0800..U+FFFF */ - /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ - /* 0x2080 = (0x80 << 6) + 0x80 */ - ch = (ch << 12) + (*pSrc++ << 6); - *pDest++ = (wchar_t)(ch + *pSrc++ - 0x2080); - } else /* f0..f4 */ { /* U+10000..U+10FFFF */ - /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */ - ch = (ch << 18) + (*pSrc++ << 12); - ch += *pSrc++ << 6; - ch += *pSrc++ - 0x3c82080; - *(pDest++) = U16_LEAD(ch); - *(pDest++) = U16_TRAIL(ch); - } - } while(pSrc < pSrcLimit); - - pSrcLimit += 3; /* restore original pSrcLimit */ - } - - while(pSrc < pSrcLimit) { + while(*pSrc && length > 0) { ch = *pSrc++; + length--; + if(ch < 0xc0) { /* * ASCII, or a trail byte in lead position which is treated like * a single-byte sequence for better character boundary * resynchronization after illegal sequences. */ - *pDest++=(wchar_t)ch; + *pDest++ = (wchar_t)ch; continue; } else if(ch < 0xe0) { /* U+0080..U+07FF */ - if(pSrc < pSrcLimit) { + if (pSrc[0]) { /* 0x3080 = (0xc0 << 6) + 0x80 */ *pDest++ = (wchar_t)((ch << 6) + *pSrc++ - 0x3080); continue; } } else if(ch < 0xf0) { /* U+0800..U+FFFF */ - if((pSrcLimit - pSrc) >= 2) { + if (pSrc[0] && pSrc[1]) { /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ /* 0x2080 = (0x80 << 6) + 0x80 */ ch = (ch << 12) + (*pSrc++ << 6); *pDest++ = (wchar_t)(ch + *pSrc++ - 0x2080); - pSrc += 3; continue; } } else /* f0..f4 */ { /* U+10000..U+10FFFF */ - if((pSrcLimit - pSrc) >= 3) { + if (length >= 1 && pSrc[0] && pSrc[1] && pSrc[2]) { /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */ ch = (ch << 18) + (*pSrc++ << 12); ch += *pSrc++ << 6; ch += *pSrc++ - 0x3c82080; *(pDest++) = U16_LEAD(ch); *(pDest++) = U16_TRAIL(ch); - pSrc += 4; + length--; /* two bytes for this character */ continue; } } @@ -99,6 +68,12 @@ void git__utf8_to_16(wchar_t *dest, const char *src) *pDest++ = 0x0; } +#endif + +void git__utf8_to_16(wchar_t *dest, size_t length, const char *src) +{ + MultiByteToWideChar(CP_UTF8, 0, src, -1, dest, length); +} void git__utf16_to_8(char *out, const wchar_t *input) { |