diff options
author | Benjamin Peterson <benjamin@python.org> | 2011-06-03 17:50:59 -0500 |
---|---|---|
committer | Benjamin Peterson <benjamin@python.org> | 2011-06-03 17:50:59 -0500 |
commit | 4dfcb1a00dbcbc76fcd152d652d3f83b53b14e45 (patch) | |
tree | 99acf90091031bcf16aa91eb93a4ca69e754e4eb | |
parent | 48deae12d511eabd8d73a2e4a135e6321a88b48c (diff) | |
parent | ab5fcc00a7a50c4c61f0a7247b21a0728c134779 (diff) | |
download | cpython-git-4dfcb1a00dbcbc76fcd152d652d3f83b53b14e45.tar.gz |
merge heads
-rw-r--r-- | Lib/test/test_multibytecodec_support.py | 36 | ||||
-rw-r--r-- | Modules/cjkcodecs/_codecs_hk.c | 93 | ||||
-rw-r--r-- | Modules/cjkcodecs/_codecs_jp.c | 10 |
3 files changed, 85 insertions, 54 deletions
diff --git a/Lib/test/test_multibytecodec_support.py b/Lib/test/test_multibytecodec_support.py index 7735976615..ef63b6934d 100644 --- a/Lib/test/test_multibytecodec_support.py +++ b/Lib/test/test_multibytecodec_support.py @@ -58,11 +58,16 @@ class TestBase: result = func(source, scheme)[0] if func is self.decode: self.assertTrue(type(result) is str, type(result)) + self.assertEqual(result, expected, + '%a.decode(%r, %r)=%a != %a' + % (source, self.encoding, scheme, result, + expected)) else: self.assertTrue(type(result) is bytes, type(result)) - self.assertEqual(result, expected, - '%a.decode(%r)=%a != %a' - % (source, self.encoding, result, expected)) + self.assertEqual(result, expected, + '%a.encode(%r, %r)=%a != %a' + % (source, self.encoding, scheme, result, + expected)) else: self.assertRaises(UnicodeError, func, source, scheme) @@ -279,6 +284,7 @@ class TestBase_Mapping(unittest.TestCase): pass_enctest = [] pass_dectest = [] supmaps = [] + codectests = [] def __init__(self, *args, **kw): unittest.TestCase.__init__(self, *args, **kw) @@ -348,6 +354,30 @@ class TestBase_Mapping(unittest.TestCase): if (csetch, unich) not in self.pass_dectest: self.assertEqual(str(csetch, self.encoding), unich) + def test_errorhandle(self): + for source, scheme, expected in self.codectests: + if isinstance(source, bytes): + func = source.decode + else: + func = source.encode + if expected: + if isinstance(source, bytes): + result = func(self.encoding, scheme) + self.assertTrue(type(result) is str, type(result)) + self.assertEqual(result, expected, + '%a.decode(%r, %r)=%a != %a' + % (source, self.encoding, scheme, result, + expected)) + else: + result = func(self.encoding, scheme) + self.assertTrue(type(result) is bytes, type(result)) + self.assertEqual(result, expected, + '%a.encode(%r, %r)=%a != %a' + % (source, self.encoding, scheme, result, + expected)) + else: + self.assertRaises(UnicodeError, func, self.encoding, scheme) + def load_teststring(name): dir = os.path.join(os.path.dirname(__file__), 'cjkencodings') with open(os.path.join(dir, name + '.txt'), 'rb') as f: diff --git a/Modules/cjkcodecs/_codecs_hk.c b/Modules/cjkcodecs/_codecs_hk.c index aaf103db88..558a42f89c 100644 --- a/Modules/cjkcodecs/_codecs_hk.c +++ b/Modules/cjkcodecs/_codecs_hk.c @@ -115,55 +115,56 @@ DECODER(big5hkscs) REQUIRE_INBUF(2) - if (0xc6 <= c && c <= 0xc8 && (c >= 0xc7 || IN2 >= 0xa1)) - goto hkscsdec; + if (0xc6 > c || c > 0xc8 || (c < 0xc7 && IN2 < 0xa1)) { + TRYMAP_DEC(big5, **outbuf, c, IN2) { + NEXT(2, 1) + continue; + } + } + + TRYMAP_DEC(big5hkscs, decoded, c, IN2) + { + int s = BH2S(c, IN2); + const unsigned char *hintbase; + + assert(0x87 <= c && c <= 0xfe); + assert(0x40 <= IN2 && IN2 <= 0xfe); + + if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) { + hintbase = big5hkscs_phint_0; + s -= BH2S(0x87, 0x40); + } + else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){ + hintbase = big5hkscs_phint_12130; + s -= BH2S(0xc6, 0xa1); + } + else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){ + hintbase = big5hkscs_phint_21924; + s -= BH2S(0xf9, 0xd6); + } + else + return MBERR_INTERNAL; - TRYMAP_DEC(big5, **outbuf, c, IN2) { - NEXT(2, 1) + if (hintbase[s >> 3] & (1 << (s & 7))) { + WRITEUCS4(decoded | 0x20000) + NEXT_IN(2) + } + else { + OUT1(decoded) + NEXT(2, 1) + } + continue; } - else -hkscsdec: TRYMAP_DEC(big5hkscs, decoded, c, IN2) { - int s = BH2S(c, IN2); - const unsigned char *hintbase; - - assert(0x87 <= c && c <= 0xfe); - assert(0x40 <= IN2 && IN2 <= 0xfe); - - if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) { - hintbase = big5hkscs_phint_0; - s -= BH2S(0x87, 0x40); - } - else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){ - hintbase = big5hkscs_phint_12130; - s -= BH2S(0xc6, 0xa1); - } - else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){ - hintbase = big5hkscs_phint_21924; - s -= BH2S(0xf9, 0xd6); - } - else - return MBERR_INTERNAL; - - if (hintbase[s >> 3] & (1 << (s & 7))) { - WRITEUCS4(decoded | 0x20000) - NEXT_IN(2) - } - else { - OUT1(decoded) - NEXT(2, 1) - } - } - else { - switch ((c << 8) | IN2) { - case 0x8862: WRITE2(0x00ca, 0x0304); break; - case 0x8864: WRITE2(0x00ca, 0x030c); break; - case 0x88a3: WRITE2(0x00ea, 0x0304); break; - case 0x88a5: WRITE2(0x00ea, 0x030c); break; - default: return 2; - } - - NEXT(2, 2) /* all decoded codepoints are pairs, above. */ + + switch ((c << 8) | IN2) { + case 0x8862: WRITE2(0x00ca, 0x0304); break; + case 0x8864: WRITE2(0x00ca, 0x030c); break; + case 0x88a3: WRITE2(0x00ea, 0x0304); break; + case 0x88a5: WRITE2(0x00ea, 0x030c); break; + default: return 2; } + + NEXT(2, 2) /* all decoded codepoints are pairs, above. */ } return 0; diff --git a/Modules/cjkcodecs/_codecs_jp.c b/Modules/cjkcodecs/_codecs_jp.c index 901d3bee47..a05e01b32e 100644 --- a/Modules/cjkcodecs/_codecs_jp.c +++ b/Modules/cjkcodecs/_codecs_jp.c @@ -371,11 +371,11 @@ DECODER(euc_jp) REQUIRE_OUTBUF(1) - if (c < 0x80) { - OUT1(c) - NEXT(1, 1) - continue; - } + if (c < 0x80) { + OUT1(c) + NEXT(1, 1) + continue; + } if (c == 0x8e) { /* JIS X 0201 half-width katakana */ |