summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilip Thiem <ptthiem@gmail.com>2014-05-17 04:14:19 -0500
committerPhilip Thiem <ptthiem@gmail.com>2014-05-17 04:14:19 -0500
commit44e26d42e8aed06dba2fd11ef0e3b52d5e9fd424 (patch)
tree2ed28199b306fe53ee6b4f9457c936082396147c
parent09d63d1f6f12996ee23efa2548510d92e0f1eba3 (diff)
downloadpython-setuptools-bitbucket-44e26d42e8aed06dba2fd11ef0e3b52d5e9fd424.tar.gz
Starting a unicode_utils module.
-rw-r--r--setuptools/unicode_utils.py41
1 files changed, 41 insertions, 0 deletions
diff --git a/setuptools/unicode_utils.py b/setuptools/unicode_utils.py
new file mode 100644
index 00000000..d2de941a
--- /dev/null
+++ b/setuptools/unicode_utils.py
@@ -0,0 +1,41 @@
+import unicodedata
+import sys
+from setuptools.compat import unicode as decoded_string
+
+
+# HFS Plus uses decomposed UTF-8
+def decompose(path):
+ if isinstance(path, decoded_string):
+ return unicodedata.normalize('NFD', path)
+ try:
+ path = path.decode('utf-8')
+ path = unicodedata.normalize('NFD', path)
+ path = path.encode('utf-8')
+ except UnicodeError:
+ pass # Not UTF-8
+ return path
+
+
+def filesys_decode(path):
+ """
+ Ensure that the given path is decoded,
+ NONE when no expected encoding works
+ """
+
+ fs_enc = sys.getfilesystemencoding()
+ if isinstance(path, decoded_string):
+ return path
+
+ for enc in (fs_enc, "utf-8"):
+ try:
+ return path.decode(enc)
+ except UnicodeDecodeError:
+ continue
+
+
+def try_encode(string, enc):
+ "turn unicode encoding into a functional routine"
+ try:
+ return string.encode(enc)
+ except UnicodeEncodeError:
+ return None