From 5f1a7a98bbdae79dd1e7de2fd577a3feced0169f Mon Sep 17 00:00:00 2001 From: Tomaz Solc Date: Tue, 12 Oct 2010 19:12:30 +0200 Subject: Support for characters beyond BMP and below PUA --- unidecode/__init__.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'unidecode/__init__.py') diff --git a/unidecode/__init__.py b/unidecode/__init__.py index 9de02cf..1b04539 100644 --- a/unidecode/__init__.py +++ b/unidecode/__init__.py @@ -31,8 +31,8 @@ def unidecode(string): retval.append(char) continue - if codepoint > 0xffff: - continue # We don't support characters beyond the BMP. + if codepoint > 0xeffff: + continue # Characters in Private Use Area and above are ignored section = codepoint >> 8 # Chop off the last two hex digits position = codepoint % 256 # Last two hex digits @@ -41,7 +41,7 @@ def unidecode(string): table = Cache[section] except KeyError: try: - mod = __import__('unidecode.x%02x'%(section), [], [], ['data']) + mod = __import__('unidecode.x%03x'%(section), [], [], ['data']) except ImportError: Cache[section] = None continue # No match: ignore this character and carry on. @@ -51,4 +51,4 @@ def unidecode(string): if table and len(table) > position: retval.append( table[position] ) - return ''.join(retval) \ No newline at end of file + return ''.join(retval) -- cgit v1.2.1