summaryrefslogtreecommitdiff
path: root/unidecode/__init__.py
diff options
context:
space:
mode:
authorTomaz Solc <tomaz.solc@tablix.org>2010-10-12 19:12:30 +0200
committerTomaz Solc <tomaz.solc@tablix.org>2010-10-12 19:12:30 +0200
commit5f1a7a98bbdae79dd1e7de2fd577a3feced0169f (patch)
treeada5201890c047f5ab19664f4c9eba006ff57aa0 /unidecode/__init__.py
parentb6ca26833caeaede52e13bcf6ebd0e2015a2d753 (diff)
downloadunidecode-5f1a7a98bbdae79dd1e7de2fd577a3feced0169f.tar.gz
Support for characters beyond BMP and below PUA
Diffstat (limited to 'unidecode/__init__.py')
-rw-r--r--unidecode/__init__.py8
1 files changed, 4 insertions, 4 deletions
diff --git a/unidecode/__init__.py b/unidecode/__init__.py
index 9de02cf..1b04539 100644
--- a/unidecode/__init__.py
+++ b/unidecode/__init__.py
@@ -31,8 +31,8 @@ def unidecode(string):
retval.append(char)
continue
- if codepoint > 0xffff:
- continue # We don't support characters beyond the BMP.
+ if codepoint > 0xeffff:
+ continue # Characters in Private Use Area and above are ignored
section = codepoint >> 8 # Chop off the last two hex digits
position = codepoint % 256 # Last two hex digits
@@ -41,7 +41,7 @@ def unidecode(string):
table = Cache[section]
except KeyError:
try:
- mod = __import__('unidecode.x%02x'%(section), [], [], ['data'])
+ mod = __import__('unidecode.x%03x'%(section), [], [], ['data'])
except ImportError:
Cache[section] = None
continue # No match: ignore this character and carry on.
@@ -51,4 +51,4 @@ def unidecode(string):
if table and len(table) > position:
retval.append( table[position] )
- return ''.join(retval) \ No newline at end of file
+ return ''.join(retval)