1 files changed, 309 insertions, 129 deletions
diff --git a/Python/codecs.c b/Python/codecs.c
index 7334eb3e36..fd67d1b9e1 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -11,6 +11,8 @@ Copyright (c) Corporation for National Research Initiatives.
 #include "Python.h"
 #include <ctype.h>
 
+const char *Py_hexdigits = "0123456789abcdef";
+
 /* --- Codec Registry ----------------------------------------------------- */
 
 /* Import the standard encodings package which will register the first
@@ -61,10 +63,9 @@ PyObject *normalizestring(const char *string)
         return NULL;
     }
 
-    v = PyString_FromStringAndSize(NULL, len);
-    if (v == NULL)
+    p = PyMem_Malloc(len + 1);
+    if (p == NULL)
         return NULL;
-    p = PyString_AS_STRING(v);
     for (i = 0; i < len; i++) {
         register char ch = string[i];
         if (ch == ' ')
@@ -73,6 +74,11 @@ PyObject *normalizestring(const char *string)
             ch = Py_TOLOWER(Py_CHARMASK(ch));
         p[i] = ch;
     }
+    p[i] = '\0';
+    v = PyUnicode_FromString(p);
+    if (v == NULL)
+        return NULL;
+    PyMem_Free(p);
     return v;
 }
 
@@ -112,7 +118,7 @@ PyObject *_PyCodec_Lookup(const char *encoding)
     v = normalizestring(encoding);
     if (v == NULL)
         goto onError;
-    PyString_InternInPlace(&v);
+    PyUnicode_InternInPlace(&v);
 
     /* First, try to lookup the name in the registry dictionary */
     result = PyDict_GetItem(interp->codec_search_cache, v);
@@ -167,7 +173,10 @@ PyObject *_PyCodec_Lookup(const char *encoding)
     }
 
     /* Cache and return the result */
-    PyDict_SetItem(interp->codec_search_cache, v, result);
+    if (PyDict_SetItem(interp->codec_search_cache, v, result) < 0) {
+        Py_DECREF(result);
+        goto onError;
+    }
     Py_DECREF(args);
     return result;
 
@@ -176,6 +185,23 @@ PyObject *_PyCodec_Lookup(const char *encoding)
     return NULL;
 }
 
+/* Codec registry encoding check API. */
+
+int PyCodec_KnownEncoding(const char *encoding)
+{
+    PyObject *codecs;
+
+    codecs = _PyCodec_Lookup(encoding);
+    if (!codecs) {
+        PyErr_Clear();
+        return 0;
+    }
+    else {
+        Py_DECREF(codecs);
+        return 1;
+    }
+}
+
 static
 PyObject *args_tuple(PyObject *object,
                      const char *errors)
@@ -190,7 +216,7 @@ PyObject *args_tuple(PyObject *object,
     if (errors) {
         PyObject *v;
 
-        v = PyString_FromString(errors);
+        v = PyUnicode_FromString(errors);
         if (v == NULL) {
             Py_DECREF(args);
             return NULL;
@@ -317,7 +343,7 @@ PyObject *PyCodec_Encode(PyObject *object,
 {
     PyObject *encoder = NULL;
     PyObject *args = NULL, *result = NULL;
-    PyObject *v;
+    PyObject *v = NULL;
 
     encoder = PyCodec_Encoder(encoding);
     if (encoder == NULL)
@@ -327,14 +353,14 @@ PyObject *PyCodec_Encode(PyObject *object,
     if (args == NULL)
         goto onError;
 
-    result = PyEval_CallObject(encoder,args);
+    result = PyEval_CallObject(encoder, args);
     if (result == NULL)
         goto onError;
 
     if (!PyTuple_Check(result) ||
         PyTuple_GET_SIZE(result) != 2) {
         PyErr_SetString(PyExc_TypeError,
-                        "encoder must return a tuple (object,integer)");
+                        "encoder must return a tuple (object, integer)");
         goto onError;
     }
     v = PyTuple_GET_ITEM(result,0);
@@ -441,19 +467,16 @@ PyObject *PyCodec_LookupError(const char *name)
 
 static void wrong_exception_type(PyObject *exc)
 {
-    PyObject *type = PyObject_GetAttrString(exc, "__class__");
+    _Py_IDENTIFIER(__class__);
+    _Py_IDENTIFIER(__name__);
+    PyObject *type = _PyObject_GetAttrId(exc, &PyId___class__);
     if (type != NULL) {
-        PyObject *name = PyObject_GetAttrString(type, "__name__");
+        PyObject *name = _PyObject_GetAttrId(type, &PyId___name__);
         Py_DECREF(type);
         if (name != NULL) {
-            PyObject *string = PyObject_Str(name);
+            PyErr_Format(PyExc_TypeError,
+                         "don't know how to handle %S in error callback", name);
             Py_DECREF(name);
-            if (string != NULL) {
-                PyErr_Format(PyExc_TypeError,
-                    "don't know how to handle %.400s in error callback",
-                    PyString_AS_STRING(string));
-                Py_DECREF(string);
-            }
         }
     }
 }
@@ -468,7 +491,6 @@ PyObject *PyCodec_StrictErrors(PyObject *exc)
 }
 
 
-#ifdef Py_USING_UNICODE
 PyObject *PyCodec_IgnoreErrors(PyObject *exc)
 {
     Py_ssize_t end;
@@ -488,57 +510,58 @@ PyObject *PyCodec_IgnoreErrors(PyObject *exc)
         wrong_exception_type(exc);
         return NULL;
     }
-    /* ouch: passing NULL, 0, pos gives None instead of u'' */
-    return Py_BuildValue("(u#n)", &end, 0, end);
+    return Py_BuildValue("(Nn)", PyUnicode_New(0, 0), end);
 }
 
 
 PyObject *PyCodec_ReplaceErrors(PyObject *exc)
 {
-    PyObject *restuple;
-    Py_ssize_t start;
-    Py_ssize_t end;
-    Py_ssize_t i;
+    Py_ssize_t start, end, i, len;
 
     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
         PyObject *res;
-        Py_UNICODE *p;
+        int kind;
+        void *data;
         if (PyUnicodeEncodeError_GetStart(exc, &start))
             return NULL;
         if (PyUnicodeEncodeError_GetEnd(exc, &end))
             return NULL;
-        res = PyUnicode_FromUnicode(NULL, end-start);
+        len = end - start;
+        res = PyUnicode_New(len, '?');
         if (res == NULL)
             return NULL;
-        for (p = PyUnicode_AS_UNICODE(res), i = start;
-            i<end; ++p, ++i)
-            *p = '?';
-        restuple = Py_BuildValue("(On)", res, end);
-        Py_DECREF(res);
-        return restuple;
+        kind = PyUnicode_KIND(res);
+        data = PyUnicode_DATA(res);
+        for (i = 0; i < len; ++i)
+            PyUnicode_WRITE(kind, data, i, '?');
+        assert(_PyUnicode_CheckConsistency(res, 1));
+        return Py_BuildValue("(Nn)", res, end);
     }
     else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
-        Py_UNICODE res = Py_UNICODE_REPLACEMENT_CHARACTER;
         if (PyUnicodeDecodeError_GetEnd(exc, &end))
             return NULL;
-        return Py_BuildValue("(u#n)", &res, 1, end);
+        return Py_BuildValue("(Cn)",
+                             (int)Py_UNICODE_REPLACEMENT_CHARACTER,
+                             end);
     }
     else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
         PyObject *res;
-        Py_UNICODE *p;
+        int kind;
+        void *data;
         if (PyUnicodeTranslateError_GetStart(exc, &start))
             return NULL;
         if (PyUnicodeTranslateError_GetEnd(exc, &end))
             return NULL;
-        res = PyUnicode_FromUnicode(NULL, end-start);
+        len = end - start;
+        res = PyUnicode_New(len, Py_UNICODE_REPLACEMENT_CHARACTER);
         if (res == NULL)
             return NULL;
-        for (p = PyUnicode_AS_UNICODE(res), i = start;
-            i<end; ++p, ++i)
-            *p = Py_UNICODE_REPLACEMENT_CHARACTER;
-        restuple = Py_BuildValue("(On)", res, end);
-        Py_DECREF(res);
-        return restuple;
+        kind = PyUnicode_KIND(res);
+        data = PyUnicode_DATA(res);
+        for (i=0; i < len; i++)
+            PyUnicode_WRITE(kind, data, i, Py_UNICODE_REPLACEMENT_CHARACTER);
+        assert(_PyUnicode_CheckConsistency(res, 1));
+        return Py_BuildValue("(Nn)", res, end);
     }
     else {
         wrong_exception_type(exc);
@@ -551,82 +574,72 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
         PyObject *restuple;
         PyObject *object;
+        Py_ssize_t i;
         Py_ssize_t start;
         Py_ssize_t end;
         PyObject *res;
-        Py_UNICODE *p;
-        Py_UNICODE *startp;
-        Py_UNICODE *outp;
+        unsigned char *outp;
         int ressize;
+        Py_UCS4 ch;
         if (PyUnicodeEncodeError_GetStart(exc, &start))
             return NULL;
         if (PyUnicodeEncodeError_GetEnd(exc, &end))
             return NULL;
         if (!(object = PyUnicodeEncodeError_GetObject(exc)))
             return NULL;
-        startp = PyUnicode_AS_UNICODE(object);
-        for (p = startp+start, ressize = 0; p < startp+end; ++p) {
-            if (*p<10)
+        for (i = start, ressize = 0; i < end; ++i) {
+            /* object is guaranteed to be "ready" */
+            ch = PyUnicode_READ_CHAR(object, i);
+            if (ch<10)
                 ressize += 2+1+1;
-            else if (*p<100)
+            else if (ch<100)
                 ressize += 2+2+1;
-            else if (*p<1000)
+            else if (ch<1000)
                 ressize += 2+3+1;
-            else if (*p<10000)
+            else if (ch<10000)
                 ressize += 2+4+1;
-#ifndef Py_UNICODE_WIDE
-            else
-                ressize += 2+5+1;
-#else
-            else if (*p<100000)
+            else if (ch<100000)
                 ressize += 2+5+1;
-            else if (*p<1000000)
+            else if (ch<1000000)
                 ressize += 2+6+1;
             else
                 ressize += 2+7+1;
-#endif
         }
         /* allocate replacement */
-        res = PyUnicode_FromUnicode(NULL, ressize);
+        res = PyUnicode_New(ressize, 127);
         if (res == NULL) {
             Py_DECREF(object);
             return NULL;
         }
+        outp = PyUnicode_1BYTE_DATA(res);
         /* generate replacement */
-        for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
-            p < startp+end; ++p) {
-            Py_UNICODE c = *p;
+        for (i = start; i < end; ++i) {
             int digits;
             int base;
+            ch = PyUnicode_READ_CHAR(object, i);
             *outp++ = '&';
             *outp++ = '#';
-            if (*p<10) {
+            if (ch<10) {
                 digits = 1;
                 base = 1;
             }
-            else if (*p<100) {
+            else if (ch<100) {
                 digits = 2;
                 base = 10;
             }
-            else if (*p<1000) {
+            else if (ch<1000) {
                 digits = 3;
                 base = 100;
             }
-            else if (*p<10000) {
+            else if (ch<10000) {
                 digits = 4;
                 base = 1000;
             }
-#ifndef Py_UNICODE_WIDE
-            else {
-                digits = 5;
-                base = 10000;
-            }
-#else
-            else if (*p<100000) {
+            else if (ch<100000) {
                 digits = 5;
                 base = 10000;
             }
-            else if (*p<1000000) {
+            else if (ch<1000000) {
                 digits = 6;
                 base = 100000;
             }
@@ -634,16 +647,15 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
                 digits = 7;
                 base = 1000000;
             }
-#endif
             while (digits-->0) {
-                *outp++ = '0' + c/base;
-                c %= base;
+                *outp++ = '0' + ch/base;
+                ch %= base;
                 base /= 10;
             }
             *outp++ = ';';
         }
-        restuple = Py_BuildValue("(On)", res, end);
-        Py_DECREF(res);
+        assert(_PyUnicode_CheckConsistency(res, 1));
+        restuple = Py_BuildValue("(Nn)", res, end);
         Py_DECREF(object);
         return restuple;
     }
@@ -653,83 +665,237 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
     }
 }
 
-static Py_UNICODE hexdigits[] = {
-    '0', '1', '2', '3', '4', '5', '6', '7',
-    '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
-};
-
 PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
 {
     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
         PyObject *restuple;
         PyObject *object;
+        Py_ssize_t i;
         Py_ssize_t start;
         Py_ssize_t end;
         PyObject *res;
-        Py_UNICODE *p;
-        Py_UNICODE *startp;
-        Py_UNICODE *outp;
+        unsigned char *outp;
         int ressize;
+        Py_UCS4 c;
         if (PyUnicodeEncodeError_GetStart(exc, &start))
             return NULL;
         if (PyUnicodeEncodeError_GetEnd(exc, &end))
             return NULL;
         if (!(object = PyUnicodeEncodeError_GetObject(exc)))
             return NULL;
-        startp = PyUnicode_AS_UNICODE(object);
-        for (p = startp+start, ressize = 0; p < startp+end; ++p) {
-#ifdef Py_UNICODE_WIDE
-            if (*p >= 0x00010000)
+        for (i = start, ressize = 0; i < end; ++i) {
+            /* object is guaranteed to be "ready" */
+            c = PyUnicode_READ_CHAR(object, i);
+            if (c >= 0x10000) {
                 ressize += 1+1+8;
-            else
-#endif
-            if (*p >= 0x100) {
+            }
+            else if (c >= 0x100) {
                 ressize += 1+1+4;
             }
             else
                 ressize += 1+1+2;
         }
-        res = PyUnicode_FromUnicode(NULL, ressize);
+        res = PyUnicode_New(ressize, 127);
         if (res==NULL)
             return NULL;
-        for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
-            p < startp+end; ++p) {
-            Py_UNICODE c = *p;
+        for (i = start, outp = PyUnicode_1BYTE_DATA(res);
+            i < end; ++i) {
+            c = PyUnicode_READ_CHAR(object, i);
             *outp++ = '\\';
-#ifdef Py_UNICODE_WIDE
             if (c >= 0x00010000) {
                 *outp++ = 'U';
-                *outp++ = hexdigits[(c>>28)&0xf];
-                *outp++ = hexdigits[(c>>24)&0xf];
-                *outp++ = hexdigits[(c>>20)&0xf];
-                *outp++ = hexdigits[(c>>16)&0xf];
-                *outp++ = hexdigits[(c>>12)&0xf];
-                *outp++ = hexdigits[(c>>8)&0xf];
+                *outp++ = Py_hexdigits[(c>>28)&0xf];
+                *outp++ = Py_hexdigits[(c>>24)&0xf];
+                *outp++ = Py_hexdigits[(c>>20)&0xf];
+                *outp++ = Py_hexdigits[(c>>16)&0xf];
+                *outp++ = Py_hexdigits[(c>>12)&0xf];
+                *outp++ = Py_hexdigits[(c>>8)&0xf];
             }
-            else
-#endif
-            if (c >= 0x100) {
+            else if (c >= 0x100) {
                 *outp++ = 'u';
-                *outp++ = hexdigits[(c>>12)&0xf];
-                *outp++ = hexdigits[(c>>8)&0xf];
+                *outp++ = Py_hexdigits[(c>>12)&0xf];
+                *outp++ = Py_hexdigits[(c>>8)&0xf];
             }
             else
                 *outp++ = 'x';
-            *outp++ = hexdigits[(c>>4)&0xf];
-            *outp++ = hexdigits[c&0xf];
+            *outp++ = Py_hexdigits[(c>>4)&0xf];
+            *outp++ = Py_hexdigits[c&0xf];
+        }
+
+        assert(_PyUnicode_CheckConsistency(res, 1));
+        restuple = Py_BuildValue("(Nn)", res, end);
+        Py_DECREF(object);
+        return restuple;
+    }
+    else {
+        wrong_exception_type(exc);
+        return NULL;
+    }
+}
+
+/* This handler is declared static until someone demonstrates
+   a need to call it directly. */
+static PyObject *
+PyCodec_SurrogatePassErrors(PyObject *exc)
+{
+    PyObject *restuple;
+    PyObject *object;
+    Py_ssize_t i;
+    Py_ssize_t start;
+    Py_ssize_t end;
+    PyObject *res;
+    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
+        char *outp;
+        if (PyUnicodeEncodeError_GetStart(exc, &start))
+            return NULL;
+        if (PyUnicodeEncodeError_GetEnd(exc, &end))
+            return NULL;
+        if (!(object = PyUnicodeEncodeError_GetObject(exc)))
+            return NULL;
+        res = PyBytes_FromStringAndSize(NULL, 3*(end-start));
+        if (!res) {
+            Py_DECREF(object);
+            return NULL;
+        }
+        outp = PyBytes_AsString(res);
+        for (i = start; i < end; i++) {
+            /* object is guaranteed to be "ready" */
+            Py_UCS4 ch = PyUnicode_READ_CHAR(object, i);
+            if (ch < 0xd800 || ch > 0xdfff) {
+                /* Not a surrogate, fail with original exception */
+                PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
+                Py_DECREF(res);
+                Py_DECREF(object);
+                return NULL;
+            }
+            *outp++ = (char)(0xe0 | (ch >> 12));
+            *outp++ = (char)(0x80 | ((ch >> 6) & 0x3f));
+            *outp++ = (char)(0x80 | (ch & 0x3f));
+        }
+        restuple = Py_BuildValue("(On)", res, end);
+        Py_DECREF(res);
+        Py_DECREF(object);
+        return restuple;
+    }
+    else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
+        unsigned char *p;
+        Py_UCS4 ch = 0;
+        if (PyUnicodeDecodeError_GetStart(exc, &start))
+            return NULL;
+        if (!(object = PyUnicodeDecodeError_GetObject(exc)))
+            return NULL;
+        if (!(p = (unsigned char*)PyBytes_AsString(object))) {
+            Py_DECREF(object);
+            return NULL;
+        }
+        /* Try decoding a single surrogate character. If
+           there are more, let the codec call us again. */
+        p += start;
+        if (PyBytes_GET_SIZE(object) - start >= 3 &&
+            (p[0] & 0xf0) == 0xe0 &&
+            (p[1] & 0xc0) == 0x80 &&
+            (p[2] & 0xc0) == 0x80) {
+            /* it's a three-byte code */
+            ch = ((p[0] & 0x0f) << 12) + ((p[1] & 0x3f) << 6) + (p[2] & 0x3f);
+            if (ch < 0xd800 || ch > 0xdfff)
+                /* it's not a surrogate - fail */
+                ch = 0;
+        }
+        Py_DECREF(object);
+        if (ch == 0) {
+            PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
+            return NULL;
         }
+        res = PyUnicode_FromOrdinal(ch);
+        if (res == NULL)
+            return NULL;
+        return Py_BuildValue("(Nn)", res, start+3);
+    }
+    else {
+        wrong_exception_type(exc);
+        return NULL;
+    }
+}
 
+static PyObject *
+PyCodec_SurrogateEscapeErrors(PyObject *exc)
+{
+    PyObject *restuple;
+    PyObject *object;
+    Py_ssize_t i;
+    Py_ssize_t start;
+    Py_ssize_t end;
+    PyObject *res;
+    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
+        char *outp;
+        if (PyUnicodeEncodeError_GetStart(exc, &start))
+            return NULL;
+        if (PyUnicodeEncodeError_GetEnd(exc, &end))
+            return NULL;
+        if (!(object = PyUnicodeEncodeError_GetObject(exc)))
+            return NULL;
+        res = PyBytes_FromStringAndSize(NULL, end-start);
+        if (!res) {
+            Py_DECREF(object);
+            return NULL;
+        }
+        outp = PyBytes_AsString(res);
+        for (i = start; i < end; i++) {
+            /* object is guaranteed to be "ready" */
+            Py_UCS4 ch = PyUnicode_READ_CHAR(object, i);
+            if (ch < 0xdc80 || ch > 0xdcff) {
+                /* Not a UTF-8b surrogate, fail with original exception */
+                PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
+                Py_DECREF(res);
+                Py_DECREF(object);
+                return NULL;
+            }
+            *outp++ = ch - 0xdc00;
+        }
         restuple = Py_BuildValue("(On)", res, end);
         Py_DECREF(res);
         Py_DECREF(object);
         return restuple;
     }
+    else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
+        PyObject *str;
+        unsigned char *p;
+        Py_UCS2 ch[4]; /* decode up to 4 bad bytes. */
+        int consumed = 0;
+        if (PyUnicodeDecodeError_GetStart(exc, &start))
+            return NULL;
+        if (PyUnicodeDecodeError_GetEnd(exc, &end))
+            return NULL;
+        if (!(object = PyUnicodeDecodeError_GetObject(exc)))
+            return NULL;
+        if (!(p = (unsigned char*)PyBytes_AsString(object))) {
+            Py_DECREF(object);
+            return NULL;
+        }
+        while (consumed < 4 && consumed < end-start) {
+            /* Refuse to escape ASCII bytes. */
+            if (p[start+consumed] < 128)
+                break;
+            ch[consumed] = 0xdc00 + p[start+consumed];
+            consumed++;
+        }
+        Py_DECREF(object);
+        if (!consumed) {
+            /* codec complained about ASCII byte. */
+            PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
+            return NULL;
+        }
+        str = PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, ch, consumed);
+        if (str == NULL)
+            return NULL;
+        return Py_BuildValue("(Nn)", str, start+consumed);
+    }
     else {
         wrong_exception_type(exc);
         return NULL;
     }
 }
-#endif
+
 
 static PyObject *strict_errors(PyObject *self, PyObject *exc)
 {
@@ -737,7 +903,6 @@ static PyObject *strict_errors(PyObject *self, PyObject *exc)
 }
 
 
-#ifdef Py_USING_UNICODE
 static PyObject *ignore_errors(PyObject *self, PyObject *exc)
 {
     return PyCodec_IgnoreErrors(exc);
@@ -760,7 +925,16 @@ static PyObject *backslashreplace_errors(PyObject *self, PyObject *exc)
 {
     return PyCodec_BackslashReplaceErrors(exc);
 }
-#endif
+
+static PyObject *surrogatepass_errors(PyObject *self, PyObject *exc)
+{
+    return PyCodec_SurrogatePassErrors(exc);
+}
+
+static PyObject *surrogateescape_errors(PyObject *self, PyObject *exc)
+{
+    return PyCodec_SurrogateEscapeErrors(exc);
+}
 
 static int _PyCodecRegistry_Init(void)
 {
@@ -779,7 +953,6 @@ static int _PyCodecRegistry_Init(void)
                           "raises a UnicodeError on coding errors.")
             }
         },
-#ifdef Py_USING_UNICODE
         {
             "ignore",
             {
@@ -821,8 +994,23 @@ static int _PyCodecRegistry_Init(void)
                           "which replaces an unencodable character with a "
                           "backslashed escape sequence.")
             }
+        },
+        {
+            "surrogatepass",
+            {
+                "surrogatepass",
+                surrogatepass_errors,
+                METH_O
+            }
+        },
+        {
+            "surrogateescape",
+            {
+                "surrogateescape",
+                surrogateescape_errors,
+                METH_O
+            }
         }
-#endif
     };
 
     PyInterpreterState *interp = PyThreadState_GET()->interp;
@@ -837,7 +1025,7 @@ static int _PyCodecRegistry_Init(void)
     interp->codec_error_registry = PyDict_New();
 
     if (interp->codec_error_registry) {
-        for (i = 0; i < sizeof(methods)/sizeof(methods[0]); ++i) {
+        for (i = 0; i < Py_ARRAY_LENGTH(methods); ++i) {
             PyObject *func = PyCFunction_New(&methods[i].def, NULL);
             int res;
             if (!func)
@@ -854,19 +1042,11 @@ static int _PyCodecRegistry_Init(void)
         interp->codec_error_registry == NULL)
         Py_FatalError("can't initialize codec registry");
 
-    mod = PyImport_ImportModuleLevel("encodings", NULL, NULL, NULL, 0);
+    mod = PyImport_ImportModuleNoBlock("encodings");
     if (mod == NULL) {
-        if (PyErr_ExceptionMatches(PyExc_ImportError)) {
-            /* Ignore ImportErrors... this is done so that
-               distributions can disable the encodings package. Note
-               that other errors are not masked, e.g. SystemErrors
-               raised to inform the user of an error in the Python
-               configuration are still reported back to the user. */
-            PyErr_Clear();
-            return 0;
-        }
         return -1;
     }
     Py_DECREF(mod);
+    interp->codecs_initialized = 1;
     return 0;
 }