Issue #11461: Fix the incremental UTF-16 decoder. Original patch by

Amaury Forgeot d'Arc. Added tests for partial decoding of non-BMP characters.
author: Serhiy Storchaka <storchaka@gmail.com> 2013-01-08 23:12:00 +0200
committer: Serhiy Storchaka <storchaka@gmail.com> 2013-01-08 23:12:00 +0200
commit: c4b82c037e95fe2fe77352e8a4a54be7b209df2a (patch)
tree: 3a56936d881f41dbcd05cbf5d43afc76161e67f3 /Objects/unicodeobject.c
parent: c9631a14d77bd34a7c0ecf1cb1e5f8983b76d48f (diff)
download: cpython-git-c4b82c037e95fe2fe77352e8a4a54be7b209df2a.tar.gz
1 files changed, 4 insertions, 1 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 7713b5497e..1c6e55d1bf 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -2565,8 +2565,11 @@ PyUnicode_DecodeUTF16Stateful(const char *s,
 
         /* UTF-16 code pair: */
         if (e - q < 2) {
+            q -= 2;
+            if (consumed)
+                break;
             errmsg = "unexpected end of data";
-            startinpos = (((const char *)q)-2)-starts;
+            startinpos = ((const char *)q)-starts;
             endinpos = ((const char *)e)-starts;
             goto utf16Error;
         }
author	Serhiy Storchaka <storchaka@gmail.com>	2013-01-08 23:12:00 +0200
committer	Serhiy Storchaka <storchaka@gmail.com>	2013-01-08 23:12:00 +0200
commit	c4b82c037e95fe2fe77352e8a4a54be7b209df2a (patch)
tree	3a56936d881f41dbcd05cbf5d43afc76161e67f3 /Objects/unicodeobject.c
parent	c9631a14d77bd34a7c0ecf1cb1e5f8983b76d48f (diff)
download	cpython-git-c4b82c037e95fe2fe77352e8a4a54be7b209df2a.tar.gz