diff options
| author | Serhiy Storchaka <storchaka@gmail.com> | 2013-11-26 21:27:11 +0200 | 
|---|---|---|
| committer | Serhiy Storchaka <storchaka@gmail.com> | 2013-11-26 21:27:11 +0200 | 
| commit | 687ff0ecdf9eb574c3553eee2a8492668cfa84ef (patch) | |
| tree | 19e87329763348558f5e0a92b3e396f078dd6b1a /Lib/json/decoder.py | |
| parent | 1df88677e96f258a917b1cec0940ea98aeccaa72 (diff) | |
| parent | c93329b3dd6dde3de76f473f5573233cb0366d9c (diff) | |
| download | cpython-git-687ff0ecdf9eb574c3553eee2a8492668cfa84ef.tar.gz | |
Issue #11489: JSON decoder now accepts lone surrogates.
Diffstat (limited to 'Lib/json/decoder.py')
| -rw-r--r-- | Lib/json/decoder.py | 35 | 
1 files changed, 17 insertions, 18 deletions
| diff --git a/Lib/json/decoder.py b/Lib/json/decoder.py index da7ef9c819..59e5f41f4d 100644 --- a/Lib/json/decoder.py +++ b/Lib/json/decoder.py @@ -58,6 +58,16 @@ BACKSLASH = {      'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t',  } +def _decode_uXXXX(s, pos): +    esc = s[pos + 1:pos + 5] +    if len(esc) == 4 and esc[1] not in 'xX': +        try: +            return int(esc, 16) +        except ValueError: +            pass +    msg = "Invalid \\uXXXX escape" +    raise ValueError(errmsg(msg, s, pos)) +  def py_scanstring(s, end, strict=True,          _b=BACKSLASH, _m=STRINGCHUNK.match):      """Scan the string s for a JSON string. End is the index of the @@ -107,25 +117,14 @@ def py_scanstring(s, end, strict=True,                  raise ValueError(errmsg(msg, s, end))              end += 1          else: -            esc = s[end + 1:end + 5] -            next_end = end + 5 -            if len(esc) != 4: -                msg = "Invalid \\uXXXX escape" -                raise ValueError(errmsg(msg, s, end)) -            uni = int(esc, 16) -            if 0xd800 <= uni <= 0xdbff: -                msg = "Invalid \\uXXXX\\uXXXX surrogate pair" -                if not s[end + 5:end + 7] == '\\u': -                    raise ValueError(errmsg(msg, s, end)) -                esc2 = s[end + 7:end + 11] -                if len(esc2) != 4: -                    raise ValueError(errmsg(msg, s, end)) -                uni2 = int(esc2, 16) -                uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) -                next_end += 6 +            uni = _decode_uXXXX(s, end) +            end += 5 +            if 0xd800 <= uni <= 0xdbff and s[end:end + 2] == '\\u': +                uni2 = _decode_uXXXX(s, end + 1) +                if 0xdc00 <= uni2 <= 0xdfff: +                    uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) +                    end += 6              char = chr(uni) - -            end = next_end          _append(char)      return ''.join(chunks), end | 
