diff options
| author | Tomaz Solc <tomaz.solc@tablix.org> | 2010-10-12 19:12:30 +0200 |
|---|---|---|
| committer | Tomaz Solc <tomaz.solc@tablix.org> | 2010-10-12 19:12:30 +0200 |
| commit | 5f1a7a98bbdae79dd1e7de2fd577a3feced0169f (patch) | |
| tree | ada5201890c047f5ab19664f4c9eba006ff57aa0 /unidecode/__init__.py | |
| parent | b6ca26833caeaede52e13bcf6ebd0e2015a2d753 (diff) | |
| download | unidecode-5f1a7a98bbdae79dd1e7de2fd577a3feced0169f.tar.gz | |
Support for characters beyond BMP and below PUA
Diffstat (limited to 'unidecode/__init__.py')
| -rw-r--r-- | unidecode/__init__.py | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/unidecode/__init__.py b/unidecode/__init__.py index 9de02cf..1b04539 100644 --- a/unidecode/__init__.py +++ b/unidecode/__init__.py @@ -31,8 +31,8 @@ def unidecode(string): retval.append(char) continue - if codepoint > 0xffff: - continue # We don't support characters beyond the BMP. + if codepoint > 0xeffff: + continue # Characters in Private Use Area and above are ignored section = codepoint >> 8 # Chop off the last two hex digits position = codepoint % 256 # Last two hex digits @@ -41,7 +41,7 @@ def unidecode(string): table = Cache[section] except KeyError: try: - mod = __import__('unidecode.x%02x'%(section), [], [], ['data']) + mod = __import__('unidecode.x%03x'%(section), [], [], ['data']) except ImportError: Cache[section] = None continue # No match: ignore this character and carry on. @@ -51,4 +51,4 @@ def unidecode(string): if table and len(table) > position: retval.append( table[position] ) - return ''.join(retval)
\ No newline at end of file + return ''.join(retval) |
