summaryrefslogtreecommitdiff
path: root/unidecode
diff options
context:
space:
mode:
authorTomaz Solc <tomaz.solc@tablix.org>2014-12-07 19:03:33 +0100
committerTomaz Solc <tomaz.solc@tablix.org>2014-12-07 19:03:33 +0100
commit5ff1c3527d2f5e3d07c9745fdbab51841a32488b (patch)
treec3526485ee68533e2db01ee1ce5930f1545c11f5 /unidecode
parent6fba4e6cddeb9b0f2b5429ff9afd15cc63e1fe23 (diff)
downloadunidecode-5ff1c3527d2f5e3d07c9745fdbab51841a32488b.tar.gz
Issue a warning if a surrogate char is encountered
Also, improved the section in README regarding "narrow" Python builds.
Diffstat (limited to 'unidecode')
-rw-r--r--unidecode/__init__.py5
1 files changed, 5 insertions, 0 deletions
diff --git a/unidecode/__init__.py b/unidecode/__init__.py
index 2cb96c4..ac5b86d 100644
--- a/unidecode/__init__.py
+++ b/unidecode/__init__.py
@@ -44,6 +44,11 @@ def unidecode(string):
if codepoint > 0xeffff:
continue # Characters in Private Use Area and above are ignored
+ if 0xd800 <= codepoint <= 0xdfff:
+ warnings.warn( "Surrogate character %r will be ignored. "
+ "You might be using a narrow Python build." % (char,),
+ RuntimeWarning, 2)
+
section = codepoint >> 8 # Chop off the last two hex digits
position = codepoint % 256 # Last two hex digits