summaryrefslogtreecommitdiff
path: root/src/win32/utf-conv.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/win32/utf-conv.c')
-rw-r--r--src/win32/utf-conv.c61
1 files changed, 18 insertions, 43 deletions
diff --git a/src/win32/utf-conv.c b/src/win32/utf-conv.c
index a98e814f0..88a84141e 100644
--- a/src/win32/utf-conv.c
+++ b/src/win32/utf-conv.c
@@ -11,83 +11,52 @@
#define U16_LEAD(c) (wchar_t)(((c)>>10)+0xd7c0)
#define U16_TRAIL(c) (wchar_t)(((c)&0x3ff)|0xdc00)
-void git__utf8_to_16(wchar_t *dest, const char *src)
+#if 0
+void git__utf8_to_16(wchar_t *dest, size_t length, const char *src)
{
wchar_t *pDest = dest;
uint32_t ch;
const uint8_t* pSrc = (uint8_t*) src;
- const uint8_t *pSrcLimit = pSrc + strlen(src);
- assert(dest && src);
+ assert(dest && src && length);
- if ((pSrcLimit - pSrc) >= 4) {
- pSrcLimit -= 3; /* temporarily reduce pSrcLimit */
+ length--;
- /* in this loop, we can always access at least 4 bytes, up to pSrc+3 */
- do {
- ch = *pSrc++;
- if(ch < 0xc0) {
- /*
- * ASCII, or a trail byte in lead position which is treated like
- * a single-byte sequence for better character boundary
- * resynchronization after illegal sequences.
- */
- *pDest++=(wchar_t)ch;
- } else if(ch < 0xe0) { /* U+0080..U+07FF */
- /* 0x3080 = (0xc0 << 6) + 0x80 */
- *pDest++ = (wchar_t)((ch << 6) + *pSrc++ - 0x3080);
- } else if(ch < 0xf0) { /* U+0800..U+FFFF */
- /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
- /* 0x2080 = (0x80 << 6) + 0x80 */
- ch = (ch << 12) + (*pSrc++ << 6);
- *pDest++ = (wchar_t)(ch + *pSrc++ - 0x2080);
- } else /* f0..f4 */ { /* U+10000..U+10FFFF */
- /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
- ch = (ch << 18) + (*pSrc++ << 12);
- ch += *pSrc++ << 6;
- ch += *pSrc++ - 0x3c82080;
- *(pDest++) = U16_LEAD(ch);
- *(pDest++) = U16_TRAIL(ch);
- }
- } while(pSrc < pSrcLimit);
-
- pSrcLimit += 3; /* restore original pSrcLimit */
- }
-
- while(pSrc < pSrcLimit) {
+ while(*pSrc && length > 0) {
ch = *pSrc++;
+ length--;
+
if(ch < 0xc0) {
/*
* ASCII, or a trail byte in lead position which is treated like
* a single-byte sequence for better character boundary
* resynchronization after illegal sequences.
*/
- *pDest++=(wchar_t)ch;
+ *pDest++ = (wchar_t)ch;
continue;
} else if(ch < 0xe0) { /* U+0080..U+07FF */
- if(pSrc < pSrcLimit) {
+ if (pSrc[0]) {
/* 0x3080 = (0xc0 << 6) + 0x80 */
*pDest++ = (wchar_t)((ch << 6) + *pSrc++ - 0x3080);
continue;
}
} else if(ch < 0xf0) { /* U+0800..U+FFFF */
- if((pSrcLimit - pSrc) >= 2) {
+ if (pSrc[0] && pSrc[1]) {
/* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
/* 0x2080 = (0x80 << 6) + 0x80 */
ch = (ch << 12) + (*pSrc++ << 6);
*pDest++ = (wchar_t)(ch + *pSrc++ - 0x2080);
- pSrc += 3;
continue;
}
} else /* f0..f4 */ { /* U+10000..U+10FFFF */
- if((pSrcLimit - pSrc) >= 3) {
+ if (length >= 1 && pSrc[0] && pSrc[1] && pSrc[2]) {
/* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
ch = (ch << 18) + (*pSrc++ << 12);
ch += *pSrc++ << 6;
ch += *pSrc++ - 0x3c82080;
*(pDest++) = U16_LEAD(ch);
*(pDest++) = U16_TRAIL(ch);
- pSrc += 4;
+ length--; /* two bytes for this character */
continue;
}
}
@@ -99,6 +68,12 @@ void git__utf8_to_16(wchar_t *dest, const char *src)
*pDest++ = 0x0;
}
+#endif
+
+void git__utf8_to_16(wchar_t *dest, size_t length, const char *src)
+{
+ MultiByteToWideChar(CP_UTF8, 0, src, -1, dest, length);
+}
void git__utf16_to_8(char *out, const wchar_t *input)
{