Issue #16455: On FreeBSD and Solaris, if the locale is C, the

ASCII/surrogateescape codec is now used, instead of the locale encoding, to decode the command line arguments. This change fixes inconsistencies with os.fsencode() and os.fsdecode() because these operating systems announces an ASCII locale encoding, whereas the ISO-8859-1 encoding is used in practice.
author: Victor Stinner <victor.stinner@gmail.com> 2012-12-04 01:34:47 +0100
committer: Victor Stinner <victor.stinner@gmail.com> 2012-12-04 01:34:47 +0100
commit: d45c7f8d74d30de0a558b10e04541b861428b7c1 (patch)
tree: cb7c98ee584572691df70387baf586853f4298cf /Objects/unicodeobject.c
parent: ca9f8b21c8f047501fbf19668339d1c67523de79 (diff)
download: cpython-git-d45c7f8d74d30de0a558b10e04541b861428b7c1.tar.gz
1 files changed, 13 insertions, 11 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index dd8d7b2232..6491fdc316 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -2863,10 +2863,10 @@ PyUnicode_FromEncodedObject(register PyObject *obj,
 /* Convert encoding to lower case and replace '_' with '-' in order to
    catch e.g. UTF_8. Return 0 on error (encoding is longer than lower_len-1),
    1 on success. */
-static int
-normalize_encoding(const char *encoding,
-                   char *lower,
-                   size_t lower_len)
+int
+_Py_normalize_encoding(const char *encoding,
+                       char *lower,
+                       size_t lower_len)
 {
     const char *e;
     char *l;
@@ -2908,7 +2908,7 @@ PyUnicode_Decode(const char *s,
     char lower[11];  /* Enough for any encoding shortcut */
 
     /* Shortcuts for common default encodings */
-    if (normalize_encoding(encoding, lower, sizeof(lower))) {
+    if (_Py_normalize_encoding(encoding, lower, sizeof(lower))) {
         if ((strcmp(lower, "utf-8") == 0) ||
             (strcmp(lower, "utf8") == 0))
             return PyUnicode_DecodeUTF8Stateful(s, size, errors, NULL);
@@ -3110,7 +3110,8 @@ locale_error_handler(const char *errors, int *surrogateescape)
         *surrogateescape = 0;
         return 0;
     }
-    if (strcmp(errors, "surrogateescape") == 0) {
+    if (errors == "surrogateescape"
+        || strcmp(errors, "surrogateescape") == 0) {
         *surrogateescape = 1;
         return 0;
     }
@@ -3148,7 +3149,7 @@ PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
     }
 
     if (surrogateescape) {
-        /* locale encoding with surrogateescape */
+        /* "surrogateescape" error handler */
         char *str;
 
         str = _Py_wchar2char(wstr, &error_pos);
@@ -3168,6 +3169,7 @@ PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
         PyMem_Free(str);
     }
     else {
+        /* strict mode */
         size_t len, len2;
 
         len = wcstombs(NULL, wstr, 0);
@@ -3273,7 +3275,7 @@ PyUnicode_AsEncodedString(PyObject *unicode,
     }
 
     /* Shortcuts for common default encodings */
-    if (normalize_encoding(encoding, lower, sizeof(lower))) {
+    if (_Py_normalize_encoding(encoding, lower, sizeof(lower))) {
         if ((strcmp(lower, "utf-8") == 0) ||
             (strcmp(lower, "utf8") == 0))
         {
@@ -3413,8 +3415,8 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
         return NULL;
     }
 
-    if (surrogateescape)
-    {
+    if (surrogateescape) {
+        /* "surrogateescape" error handler */
         wstr = _Py_char2wchar(str, &wlen);
         if (wstr == NULL) {
             if (wlen == (size_t)-1)
@@ -3428,6 +3430,7 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
         PyMem_Free(wstr);
     }
     else {
+        /* strict mode */
 #ifndef HAVE_BROKEN_MBSTOWCS
         wlen = mbstowcs(NULL, str, 0);
 #else
@@ -3447,7 +3450,6 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
                 return PyErr_NoMemory();
         }
 
-        /* This shouldn't fail now */
         wlen2 = mbstowcs(wstr, str, wlen+1);
         if (wlen2 == (size_t)-1) {
             if (wstr != smallbuf)
author	Victor Stinner <victor.stinner@gmail.com>	2012-12-04 01:34:47 +0100
committer	Victor Stinner <victor.stinner@gmail.com>	2012-12-04 01:34:47 +0100
commit	d45c7f8d74d30de0a558b10e04541b861428b7c1 (patch)
tree	cb7c98ee584572691df70387baf586853f4298cf /Objects/unicodeobject.c
parent	ca9f8b21c8f047501fbf19668339d1c67523de79 (diff)
download	cpython-git-d45c7f8d74d30de0a558b10e04541b861428b7c1.tar.gz