diff options
| author | Tomaz Solc <tomaz.solc@tablix.org> | 2014-12-07 19:03:33 +0100 |
|---|---|---|
| committer | Tomaz Solc <tomaz.solc@tablix.org> | 2014-12-07 19:03:33 +0100 |
| commit | 5ff1c3527d2f5e3d07c9745fdbab51841a32488b (patch) | |
| tree | c3526485ee68533e2db01ee1ce5930f1545c11f5 /unidecode | |
| parent | 6fba4e6cddeb9b0f2b5429ff9afd15cc63e1fe23 (diff) | |
| download | unidecode-5ff1c3527d2f5e3d07c9745fdbab51841a32488b.tar.gz | |
Issue a warning if a surrogate char is encountered
Also, improved the section in README regarding "narrow" Python builds.
Diffstat (limited to 'unidecode')
| -rw-r--r-- | unidecode/__init__.py | 5 |
1 files changed, 5 insertions, 0 deletions
diff --git a/unidecode/__init__.py b/unidecode/__init__.py index 2cb96c4..ac5b86d 100644 --- a/unidecode/__init__.py +++ b/unidecode/__init__.py @@ -44,6 +44,11 @@ def unidecode(string): if codepoint > 0xeffff: continue # Characters in Private Use Area and above are ignored + if 0xd800 <= codepoint <= 0xdfff: + warnings.warn( "Surrogate character %r will be ignored. " + "You might be using a narrow Python build." % (char,), + RuntimeWarning, 2) + section = codepoint >> 8 # Chop off the last two hex digits position = codepoint % 256 # Last two hex digits |
