1 files changed, 2794 insertions, 2019 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index a149177a09..1ce10cfd90 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -47,13 +47,10 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 #include <windows.h>
 #endif
 
-/* Endianness switches; defaults to little endian */
-
-#ifdef WORDS_BIGENDIAN
-# define BYTEORDER_IS_BIG_ENDIAN
-#else
-# define BYTEORDER_IS_LITTLE_ENDIAN
-#endif
+/*[clinic input]
+class str
+[clinic start generated code]*/
+/*[clinic end generated code: checksum=da39a3ee5e6b4b0d3255bfef95601890afd80709]*/
 
 /* --- Globals ------------------------------------------------------------
 
@@ -130,16 +127,14 @@ extern "C" {
 /* true if the Unicode object has an allocated UTF-8 memory block
    (not shared with other data) */
 #define _PyUnicode_HAS_UTF8_MEMORY(op)                  \
-    (assert(_PyUnicode_CHECK(op)),                      \
-     (!PyUnicode_IS_COMPACT_ASCII(op)                   \
+    ((!PyUnicode_IS_COMPACT_ASCII(op)                   \
       && _PyUnicode_UTF8(op)                            \
       && _PyUnicode_UTF8(op) != PyUnicode_DATA(op)))
 
 /* true if the Unicode object has an allocated wstr memory block
    (not shared with other data) */
 #define _PyUnicode_HAS_WSTR_MEMORY(op)                  \
-    (assert(_PyUnicode_CHECK(op)),                      \
-     (_PyUnicode_WSTR(op) &&                            \
+    ((_PyUnicode_WSTR(op) &&                            \
       (!PyUnicode_IS_READY(op) ||                       \
        _PyUnicode_WSTR(op) != PyUnicode_DATA(op))))
 
@@ -150,9 +145,9 @@ extern "C" {
    buffer where the result characters are written to. */
 #define _PyUnicode_CONVERT_BYTES(from_type, to_type, begin, end, to) \
     do {                                                \
-        to_type *_to = (to_type *) to;                  \
-        const from_type *_iter = (begin);               \
-        const from_type *_end = (end);                  \
+        to_type *_to = (to_type *)(to);                \
+        const from_type *_iter = (from_type *)(begin);  \
+        const from_type *_end = (from_type *)(end);     \
         Py_ssize_t n = (_end) - (_iter);                \
         const from_type *_unrolled_end =                \
             _iter + _Py_SIZE_ROUND_DOWN(n, 4);          \
@@ -199,6 +194,10 @@ static PyObject *unicode_empty = NULL;
         return unicode_empty;                           \
     } while (0)
 
+/* Forward declaration */
+Py_LOCAL_INLINE(int)
+_PyUnicodeWriter_WriteCharInline(_PyUnicodeWriter *writer, Py_UCS4 ch);
+
 /* List of static strings. */
 static _Py_Identifier *static_strings = NULL;
 
@@ -427,8 +426,6 @@ unicode_result_wchar(PyObject *unicode)
 #ifndef Py_DEBUG
     Py_ssize_t len;
 
-    assert(Py_REFCNT(unicode) == 1);
-
     len = _PyUnicode_WSTR_LENGTH(unicode);
     if (len == 0) {
         Py_DECREF(unicode);
@@ -445,10 +442,12 @@ unicode_result_wchar(PyObject *unicode)
     }
 
     if (_PyUnicode_Ready(unicode) < 0) {
-        Py_XDECREF(unicode);
+        Py_DECREF(unicode);
         return NULL;
     }
 #else
+    assert(Py_REFCNT(unicode) == 1);
+
     /* don't make the result ready in debug mode to ensure that the caller
        makes the string ready before using it */
     assert(_PyUnicode_CheckConsistency(unicode, 1));
@@ -471,7 +470,9 @@ unicode_result_ready(PyObject *unicode)
     }
 
     if (length == 1) {
-        Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, 0);
+        void *data = PyUnicode_DATA(unicode);
+        int kind = PyUnicode_KIND(unicode);
+        Py_UCS4 ch = PyUnicode_READ(kind, data, 0);
         if (ch < 256) {
             PyObject *latin1_char = unicode_latin1[ch];
             if (latin1_char != NULL) {
@@ -544,7 +545,6 @@ static OSVERSIONINFOEX winver;
 
 static BLOOM_MASK bloom_linebreak = ~(BLOOM_MASK)0;
 
-#define BLOOM_ADD(mask, ch) ((mask |= (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
 #define BLOOM(mask, ch)     ((mask &  (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
 
 #define BLOOM_LINEBREAK(ch)                                             \
@@ -554,21 +554,40 @@ static BLOOM_MASK bloom_linebreak = ~(BLOOM_MASK)0;
 Py_LOCAL_INLINE(BLOOM_MASK)
 make_bloom_mask(int kind, void* ptr, Py_ssize_t len)
 {
+#define BLOOM_UPDATE(TYPE, MASK, PTR, LEN)             \
+    do {                                               \
+        TYPE *data = (TYPE *)PTR;                      \
+        TYPE *end = data + LEN;                        \
+        Py_UCS4 ch;                                    \
+        for (; data != end; data++) {                  \
+            ch = *data;                                \
+            MASK |= (1UL << (ch & (BLOOM_WIDTH - 1))); \
+        }                                              \
+        break;                                         \
+    } while (0)
+
     /* calculate simple bloom-style bitmask for a given unicode string */
 
     BLOOM_MASK mask;
-    Py_ssize_t i;
 
     mask = 0;
-    for (i = 0; i < len; i++)
-        BLOOM_ADD(mask, PyUnicode_READ(kind, ptr, i));
-
+    switch (kind) {
+    case PyUnicode_1BYTE_KIND:
+        BLOOM_UPDATE(Py_UCS1, mask, ptr, len);
+        break;
+    case PyUnicode_2BYTE_KIND:
+        BLOOM_UPDATE(Py_UCS2, mask, ptr, len);
+        break;
+    case PyUnicode_4BYTE_KIND:
+        BLOOM_UPDATE(Py_UCS4, mask, ptr, len);
+        break;
+    default:
+        assert(0);
+    }
     return mask;
-}
 
-#define BLOOM_MEMBER(mask, chr, str) \
-    (BLOOM(mask, chr) \
-     && (PyUnicode_FindChar(str, chr, 0, PyUnicode_GET_LENGTH(str), 1) >= 0))
+#undef BLOOM_UPDATE
+}
 
 /* Compilation of templated routines */
 
@@ -588,6 +607,7 @@ make_bloom_mask(int kind, void* ptr, Py_ssize_t len)
 #include "stringlib/split.h"
 #include "stringlib/count.h"
 #include "stringlib/find.h"
+#include "stringlib/replace.h"
 #include "stringlib/find_max_char.h"
 #include "stringlib/localeutil.h"
 #include "stringlib/undef.h"
@@ -598,6 +618,7 @@ make_bloom_mask(int kind, void* ptr, Py_ssize_t len)
 #include "stringlib/split.h"
 #include "stringlib/count.h"
 #include "stringlib/find.h"
+#include "stringlib/replace.h"
 #include "stringlib/find_max_char.h"
 #include "stringlib/localeutil.h"
 #include "stringlib/undef.h"
@@ -608,6 +629,7 @@ make_bloom_mask(int kind, void* ptr, Py_ssize_t len)
 #include "stringlib/split.h"
 #include "stringlib/count.h"
 #include "stringlib/find.h"
+#include "stringlib/replace.h"
 #include "stringlib/find_max_char.h"
 #include "stringlib/localeutil.h"
 #include "stringlib/undef.h"
@@ -654,6 +676,25 @@ Py_LOCAL_INLINE(Py_ssize_t) findchar(void *s, int kind,
     }
 }
 
+#ifdef Py_DEBUG
+/* Fill the data of an Unicode string with invalid characters to detect bugs
+   earlier.
+
+   _PyUnicode_CheckConsistency(str, 1) detects invalid characters, at least for
+   ASCII and UCS-4 strings. U+00FF is invalid in ASCII and U+FFFFFFFF is an
+   invalid character in Unicode 6.0. */
+static void
+unicode_fill_invalid(PyObject *unicode, Py_ssize_t old_length)
+{
+    int kind = PyUnicode_KIND(unicode);
+    Py_UCS1 *data = PyUnicode_1BYTE_DATA(unicode);
+    Py_ssize_t length = _PyUnicode_LENGTH(unicode);
+    if (length <= old_length)
+        return;
+    memset(data + old_length * kind, 0xff, (length - old_length) * kind);
+}
+#endif
+
 static PyObject*
 resize_compact(PyObject *unicode, Py_ssize_t length)
 {
@@ -662,6 +703,10 @@ resize_compact(PyObject *unicode, Py_ssize_t length)
     Py_ssize_t new_size;
     int share_wstr;
     PyObject *new_unicode;
+#ifdef Py_DEBUG
+    Py_ssize_t old_length = _PyUnicode_LENGTH(unicode);
+#endif
+
     assert(unicode_modifiable(unicode));
     assert(PyUnicode_IS_READY(unicode));
     assert(PyUnicode_IS_COMPACT(unicode));
@@ -701,6 +746,9 @@ resize_compact(PyObject *unicode, Py_ssize_t length)
         PyObject_DEL(_PyUnicode_WSTR(unicode));
         _PyUnicode_WSTR(unicode) = NULL;
     }
+#ifdef Py_DEBUG
+    unicode_fill_invalid(unicode, old_length);
+#endif
     PyUnicode_WRITE(PyUnicode_KIND(unicode), PyUnicode_DATA(unicode),
                     length, 0);
     assert(_PyUnicode_CheckConsistency(unicode, 0));
@@ -719,6 +767,9 @@ resize_inplace(PyObject *unicode, Py_ssize_t length)
         Py_ssize_t char_size;
         int share_wstr, share_utf8;
         void *data;
+#ifdef Py_DEBUG
+        Py_ssize_t old_length = _PyUnicode_LENGTH(unicode);
+#endif
 
         data = _PyUnicode_DATA_ANY(unicode);
         char_size = PyUnicode_KIND(unicode);
@@ -754,6 +805,9 @@ resize_inplace(PyObject *unicode, Py_ssize_t length)
         }
         _PyUnicode_LENGTH(unicode) = length;
         PyUnicode_WRITE(PyUnicode_KIND(unicode), data, length, 0);
+#ifdef Py_DEBUG
+        unicode_fill_invalid(unicode, old_length);
+#endif
         if (share_wstr || _PyUnicode_WSTR(unicode) == NULL) {
             assert(_PyUnicode_CheckConsistency(unicode, 0));
             return 0;
@@ -806,8 +860,8 @@ resize_copy(PyObject *unicode, Py_ssize_t length)
             return NULL;
         copy_length = _PyUnicode_WSTR_LENGTH(unicode);
         copy_length = Py_MIN(copy_length, length);
-        Py_UNICODE_COPY(_PyUnicode_WSTR(w), _PyUnicode_WSTR(unicode),
-                        copy_length);
+        Py_MEMCPY(_PyUnicode_WSTR(w), _PyUnicode_WSTR(unicode),
+                  copy_length * sizeof(wchar_t));
         return w;
     }
 }
@@ -824,7 +878,7 @@ resize_copy(PyObject *unicode, Py_ssize_t length)
 static PyUnicodeObject *
 _PyUnicode_New(Py_ssize_t length)
 {
-    register PyUnicodeObject *unicode;
+    PyUnicodeObject *unicode;
     size_t new_size;
 
     /* Optimization for empty strings */
@@ -847,6 +901,19 @@ _PyUnicode_New(Py_ssize_t length)
     if (unicode == NULL)
         return NULL;
     new_size = sizeof(Py_UNICODE) * ((size_t)length + 1);
+
+    _PyUnicode_WSTR_LENGTH(unicode) = length;
+    _PyUnicode_HASH(unicode) = -1;
+    _PyUnicode_STATE(unicode).interned = 0;
+    _PyUnicode_STATE(unicode).kind = 0;
+    _PyUnicode_STATE(unicode).compact = 0;
+    _PyUnicode_STATE(unicode).ready = 0;
+    _PyUnicode_STATE(unicode).ascii = 0;
+    _PyUnicode_DATA_ANY(unicode) = NULL;
+    _PyUnicode_LENGTH(unicode) = 0;
+    _PyUnicode_UTF8(unicode) = NULL;
+    _PyUnicode_UTF8_LENGTH(unicode) = 0;
+
     _PyUnicode_WSTR(unicode) = (Py_UNICODE*) PyObject_MALLOC(new_size);
     if (!_PyUnicode_WSTR(unicode)) {
         Py_DECREF(unicode);
@@ -863,17 +930,7 @@ _PyUnicode_New(Py_ssize_t length)
      */
     _PyUnicode_WSTR(unicode)[0] = 0;
     _PyUnicode_WSTR(unicode)[length] = 0;
-    _PyUnicode_WSTR_LENGTH(unicode) = length;
-    _PyUnicode_HASH(unicode) = -1;
-    _PyUnicode_STATE(unicode).interned = 0;
-    _PyUnicode_STATE(unicode).kind = 0;
-    _PyUnicode_STATE(unicode).compact = 0;
-    _PyUnicode_STATE(unicode).ready = 0;
-    _PyUnicode_STATE(unicode).ascii = 0;
-    _PyUnicode_DATA_ANY(unicode) = NULL;
-    _PyUnicode_LENGTH(unicode) = 0;
-    _PyUnicode_UTF8(unicode) = NULL;
-    _PyUnicode_UTF8_LENGTH(unicode) = 0;
+
     assert(_PyUnicode_CheckConsistency((PyObject *)unicode, 0));
     return unicode;
 }
@@ -1078,11 +1135,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
         }
     }
 #ifdef Py_DEBUG
-    /* Fill the data with invalid characters to detect bugs earlier.
-       _PyUnicode_CheckConsistency(str, 1) detects invalid characters,
-       at least for ASCII and UCS-4 strings. U+00FF is invalid in ASCII
-       and U+FFFFFFFF is an invalid character in Unicode 6.0. */
-    memset(data, 0xff, size * kind);
+    unicode_fill_invalid((PyObject*)unicode, 0);
 #endif
     assert(_PyUnicode_CheckConsistency((PyObject*)unicode, 0));
     return obj;
@@ -1512,7 +1565,7 @@ _PyUnicode_Ready(PyObject *unicode)
 }
 
 static void
-unicode_dealloc(register PyObject *unicode)
+unicode_dealloc(PyObject *unicode)
 {
     switch (PyUnicode_CHECK_INTERNED(unicode)) {
     case SSTATE_NOT_INTERNED:
@@ -1644,38 +1697,6 @@ PyUnicode_Resize(PyObject **p_unicode, Py_ssize_t length)
     return unicode_resize(p_unicode, length);
 }
 
-static int
-unicode_widen(PyObject **p_unicode, Py_ssize_t length,
-              unsigned int maxchar)
-{
-    PyObject *result;
-    assert(PyUnicode_IS_READY(*p_unicode));
-    assert(length <= PyUnicode_GET_LENGTH(*p_unicode));
-    if (maxchar <= PyUnicode_MAX_CHAR_VALUE(*p_unicode))
-        return 0;
-    result = PyUnicode_New(PyUnicode_GET_LENGTH(*p_unicode),
-                           maxchar);
-    if (result == NULL)
-        return -1;
-    _PyUnicode_FastCopyCharacters(result, 0, *p_unicode, 0, length);
-    Py_DECREF(*p_unicode);
-    *p_unicode = result;
-    return 0;
-}
-
-static int
-unicode_putchar(PyObject **p_unicode, Py_ssize_t *pos,
-                Py_UCS4 ch)
-{
-    assert(ch <= MAX_UNICODE);
-    if (unicode_widen(p_unicode, *pos, ch) < 0)
-        return -1;
-    PyUnicode_WRITE(PyUnicode_KIND(*p_unicode),
-                    PyUnicode_DATA(*p_unicode),
-                    (*pos)++, ch);
-    return 0;
-}
-
 /* Copy a ASCII or latin1 char* string into a Python Unicode string.
 
    WARNING: The function doesn't copy the terminating null character and
@@ -1692,6 +1713,14 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
     switch (kind) {
     case PyUnicode_1BYTE_KIND: {
         assert(index + len <= PyUnicode_GET_LENGTH(unicode));
+#ifdef Py_DEBUG
+        if (PyUnicode_IS_ASCII(unicode)) {
+            Py_UCS4 maxchar = ucs1lib_find_max_char(
+                (const Py_UCS1*)str,
+                (const Py_UCS1*)str + len);
+            assert(maxchar < 128);
+        }
+#endif
         memcpy((char *) data + index, str, len);
         break;
     }
@@ -1720,7 +1749,6 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
     }
 }
 
-
 static PyObject*
 get_latin1_char(unsigned char ch)
 {
@@ -1737,6 +1765,34 @@ get_latin1_char(unsigned char ch)
     return unicode;
 }
 
+static PyObject*
+unicode_char(Py_UCS4 ch)
+{
+    PyObject *unicode;
+
+    assert(ch <= MAX_UNICODE);
+
+    if (ch < 256)
+        return get_latin1_char(ch);
+
+    unicode = PyUnicode_New(1, ch);
+    if (unicode == NULL)
+        return NULL;
+    switch (PyUnicode_KIND(unicode)) {
+    case PyUnicode_1BYTE_KIND:
+        PyUnicode_1BYTE_DATA(unicode)[0] = (Py_UCS1)ch;
+        break;
+    case PyUnicode_2BYTE_KIND:
+        PyUnicode_2BYTE_DATA(unicode)[0] = (Py_UCS2)ch;
+        break;
+    default:
+        assert(PyUnicode_KIND(unicode) == PyUnicode_4BYTE_KIND);
+        PyUnicode_4BYTE_DATA(unicode)[0] = ch;
+    }
+    assert(_PyUnicode_CheckConsistency(unicode, 1));
+    return unicode;
+}
+
 PyObject *
 PyUnicode_FromUnicode(const Py_UNICODE *u, Py_ssize_t size)
 {
@@ -1935,18 +1991,8 @@ _PyUnicode_FromUCS2(const Py_UCS2 *u, Py_ssize_t size)
     if (size == 0)
         _Py_RETURN_UNICODE_EMPTY();
     assert(size > 0);
-    if (size == 1) {
-        Py_UCS4 ch = u[0];
-        if (ch < 256)
-            return get_latin1_char((unsigned char)ch);
-
-        res = PyUnicode_New(1, ch);
-        if (res == NULL)
-            return NULL;
-        PyUnicode_WRITE(PyUnicode_KIND(res), PyUnicode_DATA(res), 0, ch);
-        assert(_PyUnicode_CheckConsistency(res, 1));
-        return res;
-    }
+    if (size == 1)
+        return unicode_char(u[0]);
 
     max_char = ucs2lib_find_max_char(u, u + size);
     res = PyUnicode_New(size, max_char);
@@ -1971,18 +2017,8 @@ _PyUnicode_FromUCS4(const Py_UCS4 *u, Py_ssize_t size)
     if (size == 0)
         _Py_RETURN_UNICODE_EMPTY();
     assert(size > 0);
-    if (size == 1) {
-        Py_UCS4 ch = u[0];
-        if (ch < 256)
-            return get_latin1_char((unsigned char)ch);
-
-        res = PyUnicode_New(1, ch);
-        if (res == NULL)
-            return NULL;
-        PyUnicode_WRITE(PyUnicode_KIND(res), PyUnicode_DATA(res), 0, ch);
-        assert(_PyUnicode_CheckConsistency(res, 1));
-        return res;
-    }
+    if (size == 1)
+        return unicode_char(u[0]);
 
     max_char = ucs4lib_find_max_char(u, u + size);
     res = PyUnicode_New(size, max_char);
@@ -2258,7 +2294,7 @@ PyUnicode_AsUCS4Copy(PyObject *string)
 #ifdef HAVE_WCHAR_H
 
 PyObject *
-PyUnicode_FromWideChar(register const wchar_t *w, Py_ssize_t size)
+PyUnicode_FromWideChar(const wchar_t *w, Py_ssize_t size)
 {
     if (w == NULL) {
         if (size == 0)
@@ -2278,16 +2314,9 @@ PyUnicode_FromWideChar(register const wchar_t *w, Py_ssize_t size)
 
 static void
 makefmt(char *fmt, int longflag, int longlongflag, int size_tflag,
-        int zeropad, int width, int precision, char c)
+        char c)
 {
     *fmt++ = '%';
-    if (width) {
-        if (zeropad)
-            *fmt++ = '0';
-        fmt += sprintf(fmt, "%d", width);
-    }
-    if (precision)
-        fmt += sprintf(fmt, ".%d", precision);
     if (longflag)
         *fmt++ = 'l';
     else if (longlongflag) {
@@ -2312,44 +2341,139 @@ makefmt(char *fmt, int longflag, int longlongflag, int size_tflag,
     *fmt = '\0';
 }
 
-/* helper for PyUnicode_FromFormatV() */
+/* maximum number of characters required for output of %lld or %p.
+   We need at most ceil(log10(256)*SIZEOF_LONG_LONG) digits,
+   plus 1 for the sign.  53/22 is an upper bound for log10(256). */
+#define MAX_LONG_LONG_CHARS (2 + (SIZEOF_LONG_LONG*53-1) / 22)
+
+static int
+unicode_fromformat_write_str(_PyUnicodeWriter *writer, PyObject *str,
+                             Py_ssize_t width, Py_ssize_t precision)
+{
+    Py_ssize_t length, fill, arglen;
+    Py_UCS4 maxchar;
+
+    if (PyUnicode_READY(str) == -1)
+        return -1;
+
+    length = PyUnicode_GET_LENGTH(str);
+    if ((precision == -1 || precision >= length)
+        && width <= length)
+        return _PyUnicodeWriter_WriteStr(writer, str);
+
+    if (precision != -1)
+        length = Py_MIN(precision, length);
+
+    arglen = Py_MAX(length, width);
+    if (PyUnicode_MAX_CHAR_VALUE(str) > writer->maxchar)
+        maxchar = _PyUnicode_FindMaxChar(str, 0, length);
+    else
+        maxchar = writer->maxchar;
+
+    if (_PyUnicodeWriter_Prepare(writer, arglen, maxchar) == -1)
+        return -1;
+
+    if (width > length) {
+        fill = width - length;
+        if (PyUnicode_Fill(writer->buffer, writer->pos, fill, ' ') == -1)
+            return -1;
+        writer->pos += fill;
+    }
+
+    _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
+                                  str, 0, length);
+    writer->pos += length;
+    return 0;
+}
+
+static int
+unicode_fromformat_write_cstr(_PyUnicodeWriter *writer, const char *str,
+                              Py_ssize_t width, Py_ssize_t precision)
+{
+    /* UTF-8 */
+    Py_ssize_t length;
+    PyObject *unicode;
+    int res;
+
+    length = strlen(str);
+    if (precision != -1)
+        length = Py_MIN(length, precision);
+    unicode = PyUnicode_DecodeUTF8Stateful(str, length, "replace", NULL);
+    if (unicode == NULL)
+        return -1;
+
+    res = unicode_fromformat_write_str(writer, unicode, width, -1);
+    Py_DECREF(unicode);
+    return res;
+}
 
 static const char*
-parse_format_flags(const char *f,
-                   int *p_width, int *p_precision,
-                   int *p_longflag, int *p_longlongflag, int *p_size_tflag)
+unicode_fromformat_arg(_PyUnicodeWriter *writer,
+                       const char *f, va_list *vargs)
 {
-    int width, precision, longflag, longlongflag, size_tflag;
+    const char *p;
+    Py_ssize_t len;
+    int zeropad;
+    Py_ssize_t width;
+    Py_ssize_t precision;
+    int longflag;
+    int longlongflag;
+    int size_tflag;
+    Py_ssize_t fill;
 
-    /* parse the width.precision part, e.g. "%2.5s" => width=2, precision=5 */
+    p = f;
     f++;
-    width = 0;
-    while (Py_ISDIGIT((unsigned)*f))
-        width = (width*10) + *f++ - '0';
-    precision = 0;
+    zeropad = 0;
+    if (*f == '0') {
+        zeropad = 1;
+        f++;
+    }
+
+    /* parse the width.precision part, e.g. "%2.5s" => width=2, precision=5 */
+    width = -1;
+    if (Py_ISDIGIT((unsigned)*f)) {
+        width = *f - '0';
+        f++;
+        while (Py_ISDIGIT((unsigned)*f)) {
+            if (width > (PY_SSIZE_T_MAX - ((int)*f - '0')) / 10) {
+                PyErr_SetString(PyExc_ValueError,
+                                "width too big");
+                return NULL;
+            }
+            width = (width * 10) + (*f - '0');
+            f++;
+        }
+    }
+    precision = -1;
     if (*f == '.') {
         f++;
-        while (Py_ISDIGIT((unsigned)*f))
-            precision = (precision*10) + *f++ - '0';
+        if (Py_ISDIGIT((unsigned)*f)) {
+            precision = (*f - '0');
+            f++;
+            while (Py_ISDIGIT((unsigned)*f)) {
+                if (precision > (PY_SSIZE_T_MAX - ((int)*f - '0')) / 10) {
+                    PyErr_SetString(PyExc_ValueError,
+                                    "precision too big");
+                    return NULL;
+                }
+                precision = (precision * 10) + (*f - '0');
+                f++;
+            }
+        }
         if (*f == '%') {
             /* "%.3%s" => f points to "3" */
             f--;
         }
     }
     if (*f == '\0') {
-        /* bogus format "%.1" => go backward, f points to "1" */
+        /* bogus format "%.123" => go backward, f points to "3" */
         f--;
     }
-    if (p_width != NULL)
-        *p_width = width;
-    if (p_precision != NULL)
-        *p_precision = precision;
 
     /* Handle %ld, %lu, %lld and %llu. */
     longflag = 0;
     longlongflag = 0;
     size_tflag = 0;
-
     if (*f == 'l') {
         if (f[1] == 'd' || f[1] == 'u' || f[1] == 'i') {
             longflag = 1;
@@ -2368,499 +2492,284 @@ parse_format_flags(const char *f,
         size_tflag = 1;
         ++f;
     }
-    if (p_longflag != NULL)
-        *p_longflag = longflag;
-    if (p_longlongflag != NULL)
-        *p_longlongflag = longlongflag;
-    if (p_size_tflag != NULL)
-        *p_size_tflag = size_tflag;
-    return f;
-}
 
-/* maximum number of characters required for output of %ld.  21 characters
-   allows for 64-bit integers (in decimal) and an optional sign. */
-#define MAX_LONG_CHARS 21
-/* maximum number of characters required for output of %lld.
-   We need at most ceil(log10(256)*SIZEOF_LONG_LONG) digits,
-   plus 1 for the sign.  53/22 is an upper bound for log10(256). */
-#define MAX_LONG_LONG_CHARS (2 + (SIZEOF_LONG_LONG*53-1) / 22)
+    if (f[1] == '\0')
+        writer->overallocate = 0;
 
-PyObject *
-PyUnicode_FromFormatV(const char *format, va_list vargs)
-{
-    va_list count;
-    Py_ssize_t callcount = 0;
-    PyObject **callresults = NULL;
-    PyObject **callresult = NULL;
-    Py_ssize_t n = 0;
-    int width = 0;
-    int precision = 0;
-    int zeropad;
-    const char* f;
-    PyObject *string;
-    /* used by sprintf */
-    char fmt[61]; /* should be enough for %0width.precisionlld */
-    Py_UCS4 maxchar = 127; /* result is ASCII by default */
-    Py_UCS4 argmaxchar;
-    Py_ssize_t numbersize = 0;
-    char *numberresults = NULL;
-    char *numberresult = NULL;
-    Py_ssize_t i;
-    int kind;
-    void *data;
+    switch (*f) {
+    case 'c':
+    {
+        int ordinal = va_arg(*vargs, int);
+        if (ordinal < 0 || ordinal > MAX_UNICODE) {
+            PyErr_SetString(PyExc_OverflowError,
+                            "character argument not in range(0x110000)");
+            return NULL;
+        }
+        if (_PyUnicodeWriter_WriteCharInline(writer, ordinal) < 0)
+            return NULL;
+        break;
+    }
 
-    Py_VA_COPY(count, vargs);
-    /* step 1: count the number of %S/%R/%A/%s format specifications
-     * (we call PyObject_Str()/PyObject_Repr()/PyObject_ASCII()/
-     * PyUnicode_DecodeUTF8() for these objects once during step 3 and put the
-     * result in an array)
-     * also estimate a upper bound for all the number formats in the string,
-     * numbers will be formatted in step 3 and be kept in a '\0'-separated
-     * buffer before putting everything together. */
-    for (f = format; *f; f++) {
-        if (*f == '%') {
-            int longlongflag;
-            /* skip width or width.precision (eg. "1.2" of "%1.2f") */
-            f = parse_format_flags(f, &width, NULL, NULL, &longlongflag, NULL);
-            if (*f == 's' || *f=='S' || *f=='R' || *f=='A' || *f=='V')
-                ++callcount;
+    case 'i':
+    case 'd':
+    case 'u':
+    case 'x':
+    {
+        /* used by sprintf */
+        char fmt[10]; /* should be enough for "%0lld\0" */
+        char buffer[MAX_LONG_LONG_CHARS];
+        Py_ssize_t arglen;
 
-            else if (*f == 'd' || *f=='u' || *f=='i' || *f=='x' || *f=='p') {
+        if (*f == 'u') {
+            makefmt(fmt, longflag, longlongflag, size_tflag, *f);
+
+            if (longflag)
+                len = sprintf(buffer, fmt,
+                        va_arg(*vargs, unsigned long));
 #ifdef HAVE_LONG_LONG
-                if (longlongflag) {
-                    if (width < MAX_LONG_LONG_CHARS)
-                        width = MAX_LONG_LONG_CHARS;
-                }
-                else
+            else if (longlongflag)
+                len = sprintf(buffer, fmt,
+                        va_arg(*vargs, unsigned PY_LONG_LONG));
 #endif
-                    /* MAX_LONG_CHARS is enough to hold a 64-bit integer,
-                       including sign.  Decimal takes the most space.  This
-                       isn't enough for octal.  If a width is specified we
-                       need more (which we allocate later). */
-                    if (width < MAX_LONG_CHARS)
-                        width = MAX_LONG_CHARS;
-
-                /* account for the size + '\0' to separate numbers
-                   inside of the numberresults buffer */
-                numbersize += (width + 1);
-            }
+            else if (size_tflag)
+                len = sprintf(buffer, fmt,
+                        va_arg(*vargs, size_t));
+            else
+                len = sprintf(buffer, fmt,
+                        va_arg(*vargs, unsigned int));
         }
-        else if ((unsigned char)*f > 127) {
-            PyErr_Format(PyExc_ValueError,
-                "PyUnicode_FromFormatV() expects an ASCII-encoded format "
-                "string, got a non-ASCII byte: 0x%02x",
-                (unsigned char)*f);
+        else if (*f == 'x') {
+            makefmt(fmt, 0, 0, 0, 'x');
+            len = sprintf(buffer, fmt, va_arg(*vargs, int));
+        }
+        else {
+            makefmt(fmt, longflag, longlongflag, size_tflag, *f);
+
+            if (longflag)
+                len = sprintf(buffer, fmt,
+                        va_arg(*vargs, long));
+#ifdef HAVE_LONG_LONG
+            else if (longlongflag)
+                len = sprintf(buffer, fmt,
+                        va_arg(*vargs, PY_LONG_LONG));
+#endif
+            else if (size_tflag)
+                len = sprintf(buffer, fmt,
+                        va_arg(*vargs, Py_ssize_t));
+            else
+                len = sprintf(buffer, fmt,
+                        va_arg(*vargs, int));
+        }
+        assert(len >= 0);
+
+        if (precision < len)
+            precision = len;
+
+        arglen = Py_MAX(precision, width);
+        if (_PyUnicodeWriter_Prepare(writer, arglen, 127) == -1)
             return NULL;
+
+        if (width > precision) {
+            Py_UCS4 fillchar;
+            fill = width - precision;
+            fillchar = zeropad?'0':' ';
+            if (PyUnicode_Fill(writer->buffer, writer->pos, fill, fillchar) == -1)
+                return NULL;
+            writer->pos += fill;
         }
+        if (precision > len) {
+            fill = precision - len;
+            if (PyUnicode_Fill(writer->buffer, writer->pos, fill, '0') == -1)
+                return NULL;
+            writer->pos += fill;
+        }
+
+        if (_PyUnicodeWriter_WriteASCIIString(writer, buffer, len) < 0)
+            return NULL;
+        break;
     }
-    /* step 2: allocate memory for the results of
-     * PyObject_Str()/PyObject_Repr()/PyUnicode_DecodeUTF8() calls */
-    if (callcount) {
-        callresults = PyObject_Malloc(sizeof(PyObject *) * callcount);
-        if (!callresults) {
-            PyErr_NoMemory();
+
+    case 'p':
+    {
+        char number[MAX_LONG_LONG_CHARS];
+
+        len = sprintf(number, "%p", va_arg(*vargs, void*));
+        assert(len >= 0);
+
+        /* %p is ill-defined:  ensure leading 0x. */
+        if (number[1] == 'X')
+            number[1] = 'x';
+        else if (number[1] != 'x') {
+            memmove(number + 2, number,
+                    strlen(number) + 1);
+            number[0] = '0';
+            number[1] = 'x';
+            len += 2;
+        }
+
+        if (_PyUnicodeWriter_WriteASCIIString(writer, number, len) < 0)
             return NULL;
+        break;
+    }
+
+    case 's':
+    {
+        /* UTF-8 */
+        const char *s = va_arg(*vargs, const char*);
+        if (unicode_fromformat_write_cstr(writer, s, width, precision) < 0)
+            return NULL;
+        break;
+    }
+
+    case 'U':
+    {
+        PyObject *obj = va_arg(*vargs, PyObject *);
+        assert(obj && _PyUnicode_CHECK(obj));
+
+        if (unicode_fromformat_write_str(writer, obj, width, precision) == -1)
+            return NULL;
+        break;
+    }
+
+    case 'V':
+    {
+        PyObject *obj = va_arg(*vargs, PyObject *);
+        const char *str = va_arg(*vargs, const char *);
+        if (obj) {
+            assert(_PyUnicode_CHECK(obj));
+            if (unicode_fromformat_write_str(writer, obj, width, precision) == -1)
+                return NULL;
         }
-        callresult = callresults;
+        else {
+            assert(str != NULL);
+            if (unicode_fromformat_write_cstr(writer, str, width, precision) < 0)
+                return NULL;
+        }
+        break;
     }
-    /* step 2.5: allocate memory for the results of formating numbers */
-    if (numbersize) {
-        numberresults = PyObject_Malloc(numbersize);
-        if (!numberresults) {
-            PyErr_NoMemory();
-            goto fail;
+
+    case 'S':
+    {
+        PyObject *obj = va_arg(*vargs, PyObject *);
+        PyObject *str;
+        assert(obj);
+        str = PyObject_Str(obj);
+        if (!str)
+            return NULL;
+        if (unicode_fromformat_write_str(writer, str, width, precision) == -1) {
+            Py_DECREF(str);
+            return NULL;
         }
-        numberresult = numberresults;
+        Py_DECREF(str);
+        break;
     }
 
-    /* step 3: format numbers and figure out how large a buffer we need */
-    for (f = format; *f; f++) {
-        if (*f == '%') {
-            const char* p;
-            int longflag;
-            int longlongflag;
-            int size_tflag;
-            int numprinted;
+    case 'R':
+    {
+        PyObject *obj = va_arg(*vargs, PyObject *);
+        PyObject *repr;
+        assert(obj);
+        repr = PyObject_Repr(obj);
+        if (!repr)
+            return NULL;
+        if (unicode_fromformat_write_str(writer, repr, width, precision) == -1) {
+            Py_DECREF(repr);
+            return NULL;
+        }
+        Py_DECREF(repr);
+        break;
+    }
 
-            p = f;
-            zeropad = (f[1] == '0');
-            f = parse_format_flags(f, &width, &precision,
-                                   &longflag, &longlongflag, &size_tflag);
-            switch (*f) {
-            case 'c':
-            {
-                int ordinal = va_arg(count, int);
-                if (ordinal < 0 || ordinal > MAX_UNICODE) {
-                    PyErr_SetString(PyExc_OverflowError,
-                                    "%c arg not in range(0x110000)");
-                    goto fail;
-                }
-                maxchar = Py_MAX(maxchar, (Py_UCS4)ordinal);
-                n++;
-                break;
-            }
-            case '%':
-                n++;
-                break;
-            case 'i':
-            case 'd':
-                makefmt(fmt, longflag, longlongflag, size_tflag, zeropad,
-                        width, precision, *f);
-                if (longflag)
-                    numprinted = sprintf(numberresult, fmt,
-                                         va_arg(count, long));
-#ifdef HAVE_LONG_LONG
-                else if (longlongflag)
-                    numprinted = sprintf(numberresult, fmt,
-                                         va_arg(count, PY_LONG_LONG));
-#endif
-                else if (size_tflag)
-                    numprinted = sprintf(numberresult, fmt,
-                                         va_arg(count, Py_ssize_t));
-                else
-                    numprinted = sprintf(numberresult, fmt,
-                                         va_arg(count, int));
-                n += numprinted;
-                /* advance by +1 to skip over the '\0' */
-                numberresult += (numprinted + 1);
-                assert(*(numberresult - 1) == '\0');
-                assert(*(numberresult - 2) != '\0');
-                assert(numprinted >= 0);
-                assert(numberresult <= numberresults + numbersize);
-                break;
-            case 'u':
-                makefmt(fmt, longflag, longlongflag, size_tflag, zeropad,
-                        width, precision, 'u');
-                if (longflag)
-                    numprinted = sprintf(numberresult, fmt,
-                                         va_arg(count, unsigned long));
-#ifdef HAVE_LONG_LONG
-                else if (longlongflag)
-                    numprinted = sprintf(numberresult, fmt,
-                                         va_arg(count, unsigned PY_LONG_LONG));
-#endif
-                else if (size_tflag)
-                    numprinted = sprintf(numberresult, fmt,
-                                         va_arg(count, size_t));
-                else
-                    numprinted = sprintf(numberresult, fmt,
-                                         va_arg(count, unsigned int));
-                n += numprinted;
-                numberresult += (numprinted + 1);
-                assert(*(numberresult - 1) == '\0');
-                assert(*(numberresult - 2) != '\0');
-                assert(numprinted >= 0);
-                assert(numberresult <= numberresults + numbersize);
-                break;
-            case 'x':
-                makefmt(fmt, 0, 0, 0, zeropad, width, precision, 'x');
-                numprinted = sprintf(numberresult, fmt, va_arg(count, int));
-                n += numprinted;
-                numberresult += (numprinted + 1);
-                assert(*(numberresult - 1) == '\0');
-                assert(*(numberresult - 2) != '\0');
-                assert(numprinted >= 0);
-                assert(numberresult <= numberresults + numbersize);
-                break;
-            case 'p':
-                numprinted = sprintf(numberresult, "%p", va_arg(count, void*));
-                /* %p is ill-defined:  ensure leading 0x. */
-                if (numberresult[1] == 'X')
-                    numberresult[1] = 'x';
-                else if (numberresult[1] != 'x') {
-                    memmove(numberresult + 2, numberresult,
-                            strlen(numberresult) + 1);
-                    numberresult[0] = '0';
-                    numberresult[1] = 'x';
-                    numprinted += 2;
-                }
-                n += numprinted;
-                numberresult += (numprinted + 1);
-                assert(*(numberresult - 1) == '\0');
-                assert(*(numberresult - 2) != '\0');
-                assert(numprinted >= 0);
-                assert(numberresult <= numberresults + numbersize);
-                break;
-            case 's':
-            {
-                /* UTF-8 */
-                const char *s = va_arg(count, const char*);
-                PyObject *str = PyUnicode_DecodeUTF8Stateful(s, strlen(s), "replace", NULL);
-                if (!str)
-                    goto fail;
-                /* since PyUnicode_DecodeUTF8 returns already flexible
-                   unicode objects, there is no need to call ready on them */
-                argmaxchar = PyUnicode_MAX_CHAR_VALUE(str);
-                maxchar = Py_MAX(maxchar, argmaxchar);
-                n += PyUnicode_GET_LENGTH(str);
-                /* Remember the str and switch to the next slot */
-                *callresult++ = str;
-                break;
-            }
-            case 'U':
-            {
-                PyObject *obj = va_arg(count, PyObject *);
-                assert(obj && _PyUnicode_CHECK(obj));
-                if (PyUnicode_READY(obj) == -1)
-                    goto fail;
-                argmaxchar = PyUnicode_MAX_CHAR_VALUE(obj);
-                maxchar = Py_MAX(maxchar, argmaxchar);
-                n += PyUnicode_GET_LENGTH(obj);
-                break;
-            }
-            case 'V':
-            {
-                PyObject *obj = va_arg(count, PyObject *);
-                const char *str = va_arg(count, const char *);
-                PyObject *str_obj;
-                assert(obj || str);
-                assert(!obj || _PyUnicode_CHECK(obj));
-                if (obj) {
-                    if (PyUnicode_READY(obj) == -1)
-                        goto fail;
-                    argmaxchar = PyUnicode_MAX_CHAR_VALUE(obj);
-                    maxchar = Py_MAX(maxchar, argmaxchar);
-                    n += PyUnicode_GET_LENGTH(obj);
-                    *callresult++ = NULL;
-                }
-                else {
-                    str_obj = PyUnicode_DecodeUTF8Stateful(str, strlen(str), "replace", NULL);
-                    if (!str_obj)
-                        goto fail;
-                    if (PyUnicode_READY(str_obj) == -1) {
-                        Py_DECREF(str_obj);
-                        goto fail;
-                    }
-                    argmaxchar = PyUnicode_MAX_CHAR_VALUE(str_obj);
-                    maxchar = Py_MAX(maxchar, argmaxchar);
-                    n += PyUnicode_GET_LENGTH(str_obj);
-                    *callresult++ = str_obj;
-                }
-                break;
-            }
-            case 'S':
-            {
-                PyObject *obj = va_arg(count, PyObject *);
-                PyObject *str;
-                assert(obj);
-                str = PyObject_Str(obj);
-                if (!str)
-                    goto fail;
-                if (PyUnicode_READY(str) == -1) {
-                    Py_DECREF(str);
-                    goto fail;
-                }
-                argmaxchar = PyUnicode_MAX_CHAR_VALUE(str);
-                maxchar = Py_MAX(maxchar, argmaxchar);
-                n += PyUnicode_GET_LENGTH(str);
-                /* Remember the str and switch to the next slot */
-                *callresult++ = str;
-                break;
-            }
-            case 'R':
-            {
-                PyObject *obj = va_arg(count, PyObject *);
-                PyObject *repr;
-                assert(obj);
-                repr = PyObject_Repr(obj);
-                if (!repr)
-                    goto fail;
-                if (PyUnicode_READY(repr) == -1) {
-                    Py_DECREF(repr);
-                    goto fail;
-                }
-                argmaxchar = PyUnicode_MAX_CHAR_VALUE(repr);
-                maxchar = Py_MAX(maxchar, argmaxchar);
-                n += PyUnicode_GET_LENGTH(repr);
-                /* Remember the repr and switch to the next slot */
-                *callresult++ = repr;
-                break;
-            }
-            case 'A':
-            {
-                PyObject *obj = va_arg(count, PyObject *);
-                PyObject *ascii;
-                assert(obj);
-                ascii = PyObject_ASCII(obj);
-                if (!ascii)
-                    goto fail;
-                if (PyUnicode_READY(ascii) == -1) {
-                    Py_DECREF(ascii);
-                    goto fail;
-                }
-                argmaxchar = PyUnicode_MAX_CHAR_VALUE(ascii);
-                maxchar = Py_MAX(maxchar, argmaxchar);
-                n += PyUnicode_GET_LENGTH(ascii);
-                /* Remember the repr and switch to the next slot */
-                *callresult++ = ascii;
-                break;
-            }
-            default:
-                /* if we stumble upon an unknown
-                   formatting code, copy the rest of
-                   the format string to the output
-                   string. (we cannot just skip the
-                   code, since there's no way to know
-                   what's in the argument list) */
-                n += strlen(p);
-                goto expand;
-            }
-        } else
-            n++;
-    }
-  expand:
-    /* step 4: fill the buffer */
-    /* Since we've analyzed how much space we need,
-       we don't have to resize the string.
-       There can be no errors beyond this point. */
-    string = PyUnicode_New(n, maxchar);
-    if (!string)
-        goto fail;
-    kind = PyUnicode_KIND(string);
-    data = PyUnicode_DATA(string);
-    callresult = callresults;
-    numberresult = numberresults;
+    case 'A':
+    {
+        PyObject *obj = va_arg(*vargs, PyObject *);
+        PyObject *ascii;
+        assert(obj);
+        ascii = PyObject_ASCII(obj);
+        if (!ascii)
+            return NULL;
+        if (unicode_fromformat_write_str(writer, ascii, width, precision) == -1) {
+            Py_DECREF(ascii);
+            return NULL;
+        }
+        Py_DECREF(ascii);
+        break;
+    }
+
+    case '%':
+        if (_PyUnicodeWriter_WriteCharInline(writer, '%') < 0)
+            return NULL;
+        break;
+
+    default:
+        /* if we stumble upon an unknown formatting code, copy the rest
+           of the format string to the output string. (we cannot just
+           skip the code, since there's no way to know what's in the
+           argument list) */
+        len = strlen(p);
+        if (_PyUnicodeWriter_WriteLatin1String(writer, p, len) == -1)
+            return NULL;
+        f = p+len;
+        return f;
+    }
 
-    for (i = 0, f = format; *f; f++) {
+    f++;
+    return f;
+}
+
+PyObject *
+PyUnicode_FromFormatV(const char *format, va_list vargs)
+{
+    va_list vargs2;
+    const char *f;
+    _PyUnicodeWriter writer;
+
+    _PyUnicodeWriter_Init(&writer);
+    writer.min_length = strlen(format) + 100;
+    writer.overallocate = 1;
+
+    /* va_list may be an array (of 1 item) on some platforms (ex: AMD64).
+       Copy it to be able to pass a reference to a subfunction. */
+    Py_VA_COPY(vargs2, vargs);
+
+    for (f = format; *f; ) {
         if (*f == '%') {
-            const char* p;
+            f = unicode_fromformat_arg(&writer, f, &vargs2);
+            if (f == NULL)
+                goto fail;
+        }
+        else {
+            const char *p;
+            Py_ssize_t len;
 
             p = f;
-            f = parse_format_flags(f, NULL, NULL, NULL, NULL, NULL);
-            /* checking for == because the last argument could be a empty
-               string, which causes i to point to end, the assert at the end of
-               the loop */
-            assert(i <= PyUnicode_GET_LENGTH(string));
-
-            switch (*f) {
-            case 'c':
+            do
             {
-                const int ordinal = va_arg(vargs, int);
-                PyUnicode_WRITE(kind, data, i++, ordinal);
-                break;
-            }
-            case 'i':
-            case 'd':
-            case 'u':
-            case 'x':
-            case 'p':
-            {
-                Py_ssize_t len;
-                /* unused, since we already have the result */
-                if (*f == 'p')
-                    (void) va_arg(vargs, void *);
-                else
-                    (void) va_arg(vargs, int);
-                /* extract the result from numberresults and append. */
-                len = strlen(numberresult);
-                unicode_write_cstr(string, i, numberresult, len);
-                /* skip over the separating '\0' */
-                i += len;
-                numberresult += len;
-                assert(*numberresult == '\0');
-                numberresult++;
-                assert(numberresult <= numberresults + numbersize);
-                break;
-            }
-            case 's':
-            {
-                /* unused, since we already have the result */
-                Py_ssize_t size;
-                (void) va_arg(vargs, char *);
-                size = PyUnicode_GET_LENGTH(*callresult);
-                assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string));
-                _PyUnicode_FastCopyCharacters(string, i, *callresult, 0, size);
-                i += size;
-                /* We're done with the unicode()/repr() => forget it */
-                Py_DECREF(*callresult);
-                /* switch to next unicode()/repr() result */
-                ++callresult;
-                break;
-            }
-            case 'U':
-            {
-                PyObject *obj = va_arg(vargs, PyObject *);
-                Py_ssize_t size;
-                assert(PyUnicode_KIND(obj) <= PyUnicode_KIND(string));
-                size = PyUnicode_GET_LENGTH(obj);
-                _PyUnicode_FastCopyCharacters(string, i, obj, 0, size);
-                i += size;
-                break;
-            }
-            case 'V':
-            {
-                Py_ssize_t size;
-                PyObject *obj = va_arg(vargs, PyObject *);
-                va_arg(vargs, const char *);
-                if (obj) {
-                    size = PyUnicode_GET_LENGTH(obj);
-                    assert(PyUnicode_KIND(obj) <= PyUnicode_KIND(string));
-                    _PyUnicode_FastCopyCharacters(string, i, obj, 0, size);
-                    i += size;
-                } else {
-                    size = PyUnicode_GET_LENGTH(*callresult);
-                    assert(PyUnicode_KIND(*callresult) <=
-                           PyUnicode_KIND(string));
-                    _PyUnicode_FastCopyCharacters(string, i, *callresult, 0, size);
-                    i += size;
-                    Py_DECREF(*callresult);
+                if ((unsigned char)*p > 127) {
+                    PyErr_Format(PyExc_ValueError,
+                        "PyUnicode_FromFormatV() expects an ASCII-encoded format "
+                        "string, got a non-ASCII byte: 0x%02x",
+                        (unsigned char)*p);
+                    return NULL;
                 }
-                ++callresult;
-                break;
+                p++;
             }
-            case 'S':
-            case 'R':
-            case 'A':
-            {
-                Py_ssize_t size = PyUnicode_GET_LENGTH(*callresult);
-                /* unused, since we already have the result */
-                (void) va_arg(vargs, PyObject *);
-                assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string));
-                _PyUnicode_FastCopyCharacters(string, i, *callresult, 0,  size);
-                i += size;
-                /* We're done with the unicode()/repr() => forget it */
-                Py_DECREF(*callresult);
-                /* switch to next unicode()/repr() result */
-                ++callresult;
-                break;
-            }
-            case '%':
-                PyUnicode_WRITE(kind, data, i++, '%');
-                break;
-            default:
-            {
-                Py_ssize_t len = strlen(p);
-                unicode_write_cstr(string, i, p, len);
-                i += len;
-                assert(i == PyUnicode_GET_LENGTH(string));
-                goto end;
-            }
-            }
-        }
-        else {
-            assert(i < PyUnicode_GET_LENGTH(string));
-            PyUnicode_WRITE(kind, data, i++, *f);
+            while (*p != '\0' && *p != '%');
+            len = p - f;
+
+            if (*p == '\0')
+                writer.overallocate = 0;
+
+            if (_PyUnicodeWriter_WriteASCIIString(&writer, f, len) < 0)
+                goto fail;
+
+            f = p;
         }
     }
-    assert(i == PyUnicode_GET_LENGTH(string));
+    return _PyUnicodeWriter_Finish(&writer);
 
-  end:
-    if (callresults)
-        PyObject_Free(callresults);
-    if (numberresults)
-        PyObject_Free(numberresults);
-    return unicode_result(string);
   fail:
-    if (callresults) {
-        PyObject **callresult2 = callresults;
-        while (callresult2 < callresult) {
-            Py_XDECREF(*callresult2);
-            ++callresult2;
-        }
-        PyObject_Free(callresults);
-    }
-    if (numberresults)
-        PyObject_Free(numberresults);
+    _PyUnicodeWriter_Dealloc(&writer);
     return NULL;
 }
 
@@ -2967,26 +2876,17 @@ PyUnicode_AsWideCharString(PyObject *unicode,
 PyObject *
 PyUnicode_FromOrdinal(int ordinal)
 {
-    PyObject *v;
     if (ordinal < 0 || ordinal > MAX_UNICODE) {
         PyErr_SetString(PyExc_ValueError,
                         "chr() arg not in range(0x110000)");
         return NULL;
     }
 
-    if ((Py_UCS4)ordinal < 256)
-        return get_latin1_char((unsigned char)ordinal);
-
-    v = PyUnicode_New(1, ordinal);
-    if (v == NULL)
-        return NULL;
-    PyUnicode_WRITE(PyUnicode_KIND(v), PyUnicode_DATA(v), 0, ordinal);
-    assert(_PyUnicode_CheckConsistency(v, 1));
-    return v;
+    return unicode_char((Py_UCS4)ordinal);
 }
 
 PyObject *
-PyUnicode_FromObject(register PyObject *obj)
+PyUnicode_FromObject(PyObject *obj)
 {
     /* XXX Perhaps we should make this API an alias of
        PyObject_Str() instead ?! */
@@ -3008,7 +2908,7 @@ PyUnicode_FromObject(register PyObject *obj)
 }
 
 PyObject *
-PyUnicode_FromEncodedObject(register PyObject *obj,
+PyUnicode_FromEncodedObject(PyObject *obj,
                             const char *encoding,
                             const char *errors)
 {
@@ -3060,14 +2960,17 @@ PyUnicode_FromEncodedObject(register PyObject *obj,
    1 on success. */
 int
 _Py_normalize_encoding(const char *encoding,
-                   char *lower,
-                   size_t lower_len)
+                       char *lower,
+                       size_t lower_len)
 {
     const char *e;
     char *l;
     char *l_end;
 
     if (encoding == NULL) {
+        /* 6 == strlen("utf-8") + 1 */
+        if (lower_len < 6)
+            return 0;
         strcpy(lower, "utf-8");
         return 1;
     }
@@ -3109,7 +3012,8 @@ PyUnicode_Decode(const char *s,
             return PyUnicode_DecodeUTF8Stateful(s, size, errors, NULL);
         else if ((strcmp(lower, "latin-1") == 0) ||
                  (strcmp(lower, "latin1") == 0) ||
-                 (strcmp(lower, "iso-8859-1") == 0))
+                 (strcmp(lower, "iso-8859-1") == 0) ||
+                 (strcmp(lower, "iso8859-1") == 0))
             return PyUnicode_DecodeLatin1(s, size, errors);
 #ifdef HAVE_MBCS
         else if (strcmp(lower, "mbcs") == 0)
@@ -3130,13 +3034,15 @@ PyUnicode_Decode(const char *s,
     buffer = PyMemoryView_FromBuffer(&info);
     if (buffer == NULL)
         goto onError;
-    unicode = PyCodec_Decode(buffer, encoding, errors);
+    unicode = _PyCodec_DecodeText(buffer, encoding, errors);
     if (unicode == NULL)
         goto onError;
     if (!PyUnicode_Check(unicode)) {
         PyErr_Format(PyExc_TypeError,
-                     "decoder did not return a str object (type=%.400s)",
-                     Py_TYPE(unicode)->tp_name);
+                     "'%.400s' decoder returned '%.400s' instead of 'str'; "
+                     "use codecs.decode() to decode to arbitrary types",
+                     encoding,
+                     Py_TYPE(unicode)->tp_name, Py_TYPE(unicode)->tp_name);
         Py_DECREF(unicode);
         goto onError;
     }
@@ -3194,8 +3100,10 @@ PyUnicode_AsDecodedUnicode(PyObject *unicode,
         goto onError;
     if (!PyUnicode_Check(v)) {
         PyErr_Format(PyExc_TypeError,
-                     "decoder did not return a str object (type=%.400s)",
-                     Py_TYPE(v)->tp_name);
+                     "'%.400s' decoder returned '%.400s' instead of 'str'; "
+                     "use codecs.decode() to decode to arbitrary types",
+                     encoding,
+                     Py_TYPE(unicode)->tp_name, Py_TYPE(unicode)->tp_name);
         Py_DECREF(v);
         goto onError;
     }
@@ -3343,7 +3251,7 @@ PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
     }
 
     if (surrogateescape) {
-        /* locale encoding with surrogateescape */
+        /* "surrogateescape" error handler */
         char *str;
 
         str = _Py_wchar2char(wstr, &error_pos);
@@ -3363,6 +3271,7 @@ PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
         PyMem_Free(str);
     }
     else {
+        /* strict mode */
         size_t len, len2;
 
         len = wcstombs(NULL, wstr, 0);
@@ -3401,7 +3310,7 @@ encode_error:
         wstr = _Py_char2wchar(errmsg, &errlen);
         if (wstr != NULL) {
             reason = PyUnicode_FromWideChar(wstr, errlen);
-            PyMem_Free(wstr);
+            PyMem_RawFree(wstr);
         } else
             errmsg = NULL;
     }
@@ -3479,7 +3388,8 @@ PyUnicode_AsEncodedString(PyObject *unicode,
         }
         else if ((strcmp(lower, "latin-1") == 0) ||
                  (strcmp(lower, "latin1") == 0) ||
-                 (strcmp(lower, "iso-8859-1") == 0))
+                 (strcmp(lower, "iso-8859-1") == 0) ||
+                 (strcmp(lower, "iso8859-1") == 0))
             return _PyUnicode_AsLatin1String(unicode, errors);
 #ifdef HAVE_MBCS
         else if (strcmp(lower, "mbcs") == 0)
@@ -3490,7 +3400,7 @@ PyUnicode_AsEncodedString(PyObject *unicode,
     }
 
     /* Encode via the codec registry */
-    v = PyCodec_Encode(unicode, encoding, errors);
+    v = _PyCodec_EncodeText(unicode, encoding, errors);
     if (v == NULL)
         return NULL;
 
@@ -3504,7 +3414,8 @@ PyUnicode_AsEncodedString(PyObject *unicode,
         PyObject *b;
 
         error = PyErr_WarnFormat(PyExc_RuntimeWarning, 1,
-            "encoder %s returned bytearray instead of bytes",
+            "encoder %s returned bytearray instead of bytes; "
+            "use codecs.encode() to encode to arbitrary types",
             encoding);
         if (error) {
             Py_DECREF(v);
@@ -3517,8 +3428,10 @@ PyUnicode_AsEncodedString(PyObject *unicode,
     }
 
     PyErr_Format(PyExc_TypeError,
-                 "encoder did not return a bytes object (type=%.400s)",
-                 Py_TYPE(v)->tp_name);
+                 "'%.400s' encoder returned '%.400s' instead of 'bytes'; "
+                 "use codecs.encode() to encode to arbitrary types",
+                 encoding,
+                 Py_TYPE(v)->tp_name, Py_TYPE(v)->tp_name);
     Py_DECREF(v);
     return NULL;
 }
@@ -3544,8 +3457,10 @@ PyUnicode_AsEncodedUnicode(PyObject *unicode,
         goto onError;
     if (!PyUnicode_Check(v)) {
         PyErr_Format(PyExc_TypeError,
-                     "encoder did not return an str object (type=%.400s)",
-                     Py_TYPE(v)->tp_name);
+                     "'%.400s' encoder returned '%.400s' instead of 'str'; "
+                     "use codecs.encode() to encode to arbitrary types",
+                     encoding,
+                     Py_TYPE(v)->tp_name, Py_TYPE(v)->tp_name);
         Py_DECREF(v);
         goto onError;
     }
@@ -3608,8 +3523,8 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
         return NULL;
     }
 
-    if (surrogateescape)
-    {
+    if (surrogateescape) {
+        /* "surrogateescape" error handler */
         wstr = _Py_char2wchar(str, &wlen);
         if (wstr == NULL) {
             if (wlen == (size_t)-1)
@@ -3620,9 +3535,10 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
         }
 
         unicode = PyUnicode_FromWideChar(wstr, wlen);
-        PyMem_Free(wstr);
+        PyMem_RawFree(wstr);
     }
     else {
+        /* strict mode */
 #ifndef HAVE_BROKEN_MBSTOWCS
         wlen = mbstowcs(NULL, str, 0);
 #else
@@ -3642,7 +3558,6 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
                 return PyErr_NoMemory();
         }
 
-        /* This shouldn't fail now */
         wlen2 = mbstowcs(wstr, str, wlen+1);
         if (wlen2 == (size_t)-1) {
             if (wstr != smallbuf)
@@ -3668,7 +3583,7 @@ decode_error:
         wstr = _Py_char2wchar(errmsg, &errlen);
         if (wstr != NULL) {
             reason = PyUnicode_FromWideChar(wstr, errlen);
-            PyMem_Free(wstr);
+            PyMem_RawFree(wstr);
         } else
             errmsg = NULL;
     }
@@ -3736,18 +3651,20 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
 
 
 int
-_PyUnicode_HasNULChars(PyObject* s)
+_PyUnicode_HasNULChars(PyObject* str)
 {
-    static PyObject *nul = NULL;
+    Py_ssize_t pos;
 
-    if (nul == NULL)
-        nul = PyUnicode_FromStringAndSize("\0", 1);
-    if (nul == NULL)
+    if (PyUnicode_READY(str) == -1)
         return -1;
-    return PyUnicode_Contains(s, nul);
+    pos = findchar(PyUnicode_DATA(str), PyUnicode_KIND(str),
+                   PyUnicode_GET_LENGTH(str), '\0', 1);
+    if (pos == -1)
+        return 0;
+    else
+        return 1;
 }
 
-
 int
 PyUnicode_FSConverter(PyObject* arg, void* addr)
 {
@@ -3851,6 +3768,7 @@ PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize)
             return NULL;
         _PyUnicode_UTF8(unicode) = PyObject_MALLOC(PyBytes_GET_SIZE(bytes) + 1);
         if (_PyUnicode_UTF8(unicode) == NULL) {
+            PyErr_NoMemory();
             Py_DECREF(bytes);
             return NULL;
         }
@@ -4019,6 +3937,9 @@ PyUnicode_GetLength(PyObject *unicode)
 Py_UCS4
 PyUnicode_ReadChar(PyObject *unicode, Py_ssize_t index)
 {
+    void *data;
+    int kind;
+
     if (!PyUnicode_Check(unicode) || PyUnicode_READY(unicode) == -1) {
         PyErr_BadArgument();
         return (Py_UCS4)-1;
@@ -4027,7 +3948,9 @@ PyUnicode_ReadChar(PyObject *unicode, Py_ssize_t index)
         PyErr_SetString(PyExc_IndexError, "string index out of range");
         return (Py_UCS4)-1;
     }
-    return PyUnicode_READ_CHAR(unicode, index);
+    data = PyUnicode_DATA(unicode);
+    kind = PyUnicode_KIND(unicode);
+    return PyUnicode_READ(kind, data, index);
 }
 
 int
@@ -4086,6 +4009,7 @@ onError:
     *exceptionObject = NULL;
 }
 
+#ifdef HAVE_MBCS
 /* error handling callback helper:
    build arguments, call the callback and check the arguments,
    if no exception occurred, copy the replacement to the output
@@ -4094,11 +4018,12 @@ onError:
 */
 
 static int
-unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler,
-                                 const char *encoding, const char *reason,
-                                 const char **input, const char **inend, Py_ssize_t *startinpos,
-                                 Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr,
-                                 PyObject **output, Py_ssize_t *outpos)
+unicode_decode_call_errorhandler_wchar(
+    const char *errors, PyObject **errorHandler,
+    const char *encoding, const char *reason,
+    const char **input, const char **inend, Py_ssize_t *startinpos,
+    Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr,
+    PyObject **output, Py_ssize_t *outpos)
 {
     static char *argparse = "O!n;decoding error handler must return (str, int) tuple";
 
@@ -4109,12 +4034,11 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler,
     Py_ssize_t requiredsize;
     Py_ssize_t newpos;
     PyObject *inputobj = NULL;
-    int res = -1;
+    wchar_t *repwstr;
+    Py_ssize_t repwlen;
 
-    if (_PyUnicode_KIND(*output) != PyUnicode_WCHAR_KIND)
-        outsize = PyUnicode_GET_LENGTH(*output);
-    else
-        outsize = _PyUnicode_WSTR_LENGTH(*output);
+    assert (_PyUnicode_KIND(*output) == PyUnicode_WCHAR_KIND);
+    outsize = _PyUnicode_WSTR_LENGTH(*output);
 
     if (*errorHandler == NULL) {
         *errorHandler = PyCodec_LookupError(errors);
@@ -4139,8 +4063,6 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler,
     }
     if (!PyArg_ParseTuple(restuple, argparse, &PyUnicode_Type, &repunicode, &newpos))
         goto onError;
-    if (PyUnicode_READY(repunicode) == -1)
-        goto onError;
 
     /* Copy back the bytes variables, which might have been modified by the
        callback */
@@ -4164,54 +4086,118 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler,
         goto onError;
     }
 
-    if (_PyUnicode_KIND(*output) != PyUnicode_WCHAR_KIND) {
-        /* need more space? (at least enough for what we
-           have+the replacement+the rest of the string (starting
-           at the new input position), so we won't have to check space
-           when there are no errors in the rest of the string) */
-        Py_ssize_t replen = PyUnicode_GET_LENGTH(repunicode);
-        requiredsize = *outpos + replen + insize-newpos;
-        if (requiredsize > outsize) {
-            if (requiredsize<2*outsize)
-                requiredsize = 2*outsize;
-            if (unicode_resize(output, requiredsize) < 0)
-                goto onError;
-        }
-        if (unicode_widen(output, *outpos,
-                          PyUnicode_MAX_CHAR_VALUE(repunicode)) < 0)
+    repwstr = PyUnicode_AsUnicodeAndSize(repunicode, &repwlen);
+    if (repwstr == NULL)
+        goto onError;
+    /* need more space? (at least enough for what we
+       have+the replacement+the rest of the string (starting
+       at the new input position), so we won't have to check space
+       when there are no errors in the rest of the string) */
+    requiredsize = *outpos + repwlen + insize-newpos;
+    if (requiredsize > outsize) {
+        if (requiredsize < 2*outsize)
+            requiredsize = 2*outsize;
+        if (unicode_resize(output, requiredsize) < 0)
             goto onError;
-        _PyUnicode_FastCopyCharacters(*output, *outpos, repunicode, 0, replen);
-        *outpos += replen;
     }
-    else {
-        wchar_t *repwstr;
-        Py_ssize_t repwlen;
-        repwstr = PyUnicode_AsUnicodeAndSize(repunicode, &repwlen);
-        if (repwstr == NULL)
+    wcsncpy(_PyUnicode_WSTR(*output) + *outpos, repwstr, repwlen);
+    *outpos += repwlen;
+
+    *endinpos = newpos;
+    *inptr = *input + newpos;
+
+    /* we made it! */
+    Py_XDECREF(restuple);
+    return 0;
+
+  onError:
+    Py_XDECREF(restuple);
+    return -1;
+}
+#endif   /* HAVE_MBCS */
+
+static int
+unicode_decode_call_errorhandler_writer(
+    const char *errors, PyObject **errorHandler,
+    const char *encoding, const char *reason,
+    const char **input, const char **inend, Py_ssize_t *startinpos,
+    Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr,
+    _PyUnicodeWriter *writer /* PyObject **output, Py_ssize_t *outpos */)
+{
+    static char *argparse = "O!n;decoding error handler must return (str, int) tuple";
+
+    PyObject *restuple = NULL;
+    PyObject *repunicode = NULL;
+    Py_ssize_t insize;
+    Py_ssize_t newpos;
+    Py_ssize_t replen;
+    PyObject *inputobj = NULL;
+
+    if (*errorHandler == NULL) {
+        *errorHandler = PyCodec_LookupError(errors);
+        if (*errorHandler == NULL)
             goto onError;
-        /* need more space? (at least enough for what we
-           have+the replacement+the rest of the string (starting
-           at the new input position), so we won't have to check space
-           when there are no errors in the rest of the string) */
-        requiredsize = *outpos + repwlen + insize-newpos;
-        if (requiredsize > outsize) {
-            if (requiredsize < 2*outsize)
-                requiredsize = 2*outsize;
-            if (unicode_resize(output, requiredsize) < 0)
-                goto onError;
-        }
-        wcsncpy(_PyUnicode_WSTR(*output) + *outpos, repwstr, repwlen);
-        *outpos += repwlen;
     }
+
+    make_decode_exception(exceptionObject,
+        encoding,
+        *input, *inend - *input,
+        *startinpos, *endinpos,
+        reason);
+    if (*exceptionObject == NULL)
+        goto onError;
+
+    restuple = PyObject_CallFunctionObjArgs(*errorHandler, *exceptionObject, NULL);
+    if (restuple == NULL)
+        goto onError;
+    if (!PyTuple_Check(restuple)) {
+        PyErr_SetString(PyExc_TypeError, &argparse[4]);
+        goto onError;
+    }
+    if (!PyArg_ParseTuple(restuple, argparse, &PyUnicode_Type, &repunicode, &newpos))
+        goto onError;
+
+    /* Copy back the bytes variables, which might have been modified by the
+       callback */
+    inputobj = PyUnicodeDecodeError_GetObject(*exceptionObject);
+    if (!inputobj)
+        goto onError;
+    if (!PyBytes_Check(inputobj)) {
+        PyErr_Format(PyExc_TypeError, "exception attribute object must be bytes");
+    }
+    *input = PyBytes_AS_STRING(inputobj);
+    insize = PyBytes_GET_SIZE(inputobj);
+    *inend = *input + insize;
+    /* we can DECREF safely, as the exception has another reference,
+       so the object won't go away. */
+    Py_DECREF(inputobj);
+
+    if (newpos<0)
+        newpos = insize+newpos;
+    if (newpos<0 || newpos>insize) {
+        PyErr_Format(PyExc_IndexError, "position %zd from error handler out of bounds", newpos);
+        goto onError;
+    }
+
+    if (PyUnicode_READY(repunicode) < 0)
+        goto onError;
+    replen = PyUnicode_GET_LENGTH(repunicode);
+    writer->min_length += replen;
+    if (replen > 1)
+        writer->overallocate = 1;
+    if (_PyUnicodeWriter_WriteStr(writer, repunicode) == -1)
+        goto onError;
+
     *endinpos = newpos;
     *inptr = *input + newpos;
 
     /* we made it! */
-    res = 0;
+    Py_XDECREF(restuple);
+    return 0;
 
   onError:
     Py_XDECREF(restuple);
-    return res;
+    return -1;
 }
 
 /* --- UTF-7 Codec -------------------------------------------------------- */
@@ -4319,9 +4305,8 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
     const char *starts = s;
     Py_ssize_t startinpos;
     Py_ssize_t endinpos;
-    Py_ssize_t outpos;
     const char *e;
-    PyObject *unicode;
+    _PyUnicodeWriter writer;
     const char *errmsg = "";
     int inShift = 0;
     Py_ssize_t shiftOutStart;
@@ -4331,17 +4316,17 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
     PyObject *errorHandler = NULL;
     PyObject *exc = NULL;
 
-    /* Start off assuming it's all ASCII. Widen later as necessary. */
-    unicode = PyUnicode_New(size, 127);
-    if (!unicode)
-        return NULL;
     if (size == 0) {
         if (consumed)
             *consumed = 0;
-        return unicode;
+        _Py_RETURN_UNICODE_EMPTY();
     }
 
-    shiftOutStart = outpos = 0;
+    /* Start off assuming it's all ASCII. Widen later as necessary. */
+    _PyUnicodeWriter_Init(&writer);
+    writer.min_length = size;
+
+    shiftOutStart = 0;
     e = s + size;
 
     while (s < e) {
@@ -4364,13 +4349,13 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
                         /* expecting a second surrogate */
                         if (Py_UNICODE_IS_LOW_SURROGATE(outCh)) {
                             Py_UCS4 ch2 = Py_UNICODE_JOIN_SURROGATES(surrogate, outCh);
-                            if (unicode_putchar(&unicode, &outpos, ch2) < 0)
+                            if (_PyUnicodeWriter_WriteCharInline(&writer, ch2) < 0)
                                 goto onError;
                             surrogate = 0;
                             continue;
                         }
                         else {
-                            if (unicode_putchar(&unicode, &outpos, surrogate) < 0)
+                            if (_PyUnicodeWriter_WriteCharInline(&writer, surrogate) < 0)
                                 goto onError;
                             surrogate = 0;
                         }
@@ -4380,7 +4365,7 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
                         surrogate = outCh;
                     }
                     else {
-                        if (unicode_putchar(&unicode, &outpos, outCh) < 0)
+                        if (_PyUnicodeWriter_WriteCharInline(&writer, outCh) < 0)
                             goto onError;
                     }
                 }
@@ -4389,7 +4374,7 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
                 inShift = 0;
                 s++;
                 if (surrogate) {
-                    if (unicode_putchar(&unicode, &outpos, surrogate) < 0)
+                    if (_PyUnicodeWriter_WriteCharInline(&writer, surrogate) < 0)
                         goto onError;
                     surrogate = 0;
                 }
@@ -4410,7 +4395,7 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
                 if (ch != '-') {
                     /* '-' is absorbed; other terminating
                        characters are preserved */
-                    if (unicode_putchar(&unicode, &outpos, ch) < 0)
+                    if (_PyUnicodeWriter_WriteCharInline(&writer, ch) < 0)
                         goto onError;
                 }
             }
@@ -4420,20 +4405,20 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
             s++; /* consume '+' */
             if (s < e && *s == '-') { /* '+-' encodes '+' */
                 s++;
-                if (unicode_putchar(&unicode, &outpos, '+') < 0)
+                if (_PyUnicodeWriter_WriteCharInline(&writer, '+') < 0)
                     goto onError;
             }
             else { /* begin base64-encoded section */
                 inShift = 1;
-                shiftOutStart = outpos;
+                shiftOutStart = writer.pos;
                 base64bits = 0;
                 base64buffer = 0;
             }
         }
         else if (DECODE_DIRECT(ch)) { /* character decodes as itself */
-            if (unicode_putchar(&unicode, &outpos, ch) < 0)
-                goto onError;
             s++;
+            if (_PyUnicodeWriter_WriteCharInline(&writer, ch) < 0)
+                goto onError;
         }
         else {
             startinpos = s-starts;
@@ -4444,11 +4429,11 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
         continue;
 utf7Error:
         endinpos = s-starts;
-        if (unicode_decode_call_errorhandler(
+        if (unicode_decode_call_errorhandler_writer(
                 errors, &errorHandler,
                 "utf7", errmsg,
                 &starts, &e, &startinpos, &endinpos, &exc, &s,
-                &unicode, &outpos))
+                &writer))
             goto onError;
     }
 
@@ -4460,11 +4445,11 @@ utf7Error:
                 (base64bits >= 6) ||
                 (base64bits > 0 && base64buffer != 0)) {
             endinpos = size;
-            if (unicode_decode_call_errorhandler(
+            if (unicode_decode_call_errorhandler_writer(
                     errors, &errorHandler,
                     "utf7", "unterminated shift sequence",
                     &starts, &e, &startinpos, &endinpos, &exc, &s,
-                    &unicode, &outpos))
+                    &writer))
                 goto onError;
             if (s < e)
                 goto restart;
@@ -4474,7 +4459,7 @@ utf7Error:
     /* return state */
     if (consumed) {
         if (inShift) {
-            outpos = shiftOutStart; /* back off output */
+            writer.pos = shiftOutStart; /* back off output */
             *consumed = startinpos;
         }
         else {
@@ -4482,17 +4467,14 @@ utf7Error:
         }
     }
 
-    if (unicode_resize(&unicode, outpos) < 0)
-        goto onError;
-
     Py_XDECREF(errorHandler);
     Py_XDECREF(exc);
-    return unicode_result(unicode);
+    return _PyUnicodeWriter_Finish(&writer);
 
   onError:
     Py_XDECREF(errorHandler);
     Py_XDECREF(exc);
-    Py_DECREF(unicode);
+    _PyUnicodeWriter_Dealloc(&writer);
     return NULL;
 }
 
@@ -4575,7 +4557,7 @@ encode_char:
 
             /* code first surrogate */
             base64bits += 16;
-            base64buffer = (base64buffer << 16) | 0xd800 | ((ch-0x10000) >> 10);
+            base64buffer = (base64buffer << 16) | Py_UNICODE_HIGH_SURROGATE(ch);
             while (base64bits >= 6) {
                 *out++ = TO_BASE64(base64buffer >> (base64bits-6));
                 base64bits -= 6;
@@ -4676,9 +4658,9 @@ ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
     if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) {
         /* Fast path, see in STRINGLIB(utf8_decode) for
            an explanation. */
-        /* Help register allocation */
-        register const char *_p = p;
-        register Py_UCS1 * q = dest;
+        /* Help allocation */
+        const char *_p = p;
+        Py_UCS1 * q = dest;
         while (_p < aligned_end) {
             unsigned long value = *(const unsigned long *) _p;
             if (value & ASCII_CHAR_MASK)
@@ -4701,8 +4683,8 @@ ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
         /* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
            for an explanation. */
         if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) {
-            /* Help register allocation */
-            register const char *_p = p;
+            /* Help allocation */
+            const char *_p = p;
             while (_p < aligned_end) {
                 unsigned long value = *(unsigned long *) _p;
                 if (value & ASCII_CHAR_MASK)
@@ -4727,10 +4709,9 @@ PyUnicode_DecodeUTF8Stateful(const char *s,
                              const char *errors,
                              Py_ssize_t *consumed)
 {
-    PyObject *unicode;
+    _PyUnicodeWriter writer;
     const char *starts = s;
     const char *end = s + size;
-    Py_ssize_t outpos;
 
     Py_ssize_t startinpos;
     Py_ssize_t endinpos;
@@ -4751,29 +4732,26 @@ PyUnicode_DecodeUTF8Stateful(const char *s,
         return get_latin1_char((unsigned char)s[0]);
     }
 
-    unicode = PyUnicode_New(size, 127);
-    if (!unicode)
-        return NULL;
+    _PyUnicodeWriter_Init(&writer);
+    writer.min_length = size;
+    if (_PyUnicodeWriter_Prepare(&writer, writer.min_length, 127) == -1)
+        goto onError;
 
-    outpos = ascii_decode(s, end, PyUnicode_1BYTE_DATA(unicode));
-    s += outpos;
+    writer.pos = ascii_decode(s, end, writer.data);
+    s += writer.pos;
     while (s < end) {
         Py_UCS4 ch;
-        int kind = PyUnicode_KIND(unicode);
+        int kind = writer.kind;
         if (kind == PyUnicode_1BYTE_KIND) {
-            if (PyUnicode_IS_ASCII(unicode))
-                ch = asciilib_utf8_decode(&s, end,
-                        PyUnicode_1BYTE_DATA(unicode), &outpos);
+            if (PyUnicode_IS_ASCII(writer.buffer))
+                ch = asciilib_utf8_decode(&s, end, writer.data, &writer.pos);
             else
-                ch = ucs1lib_utf8_decode(&s, end,
-                        PyUnicode_1BYTE_DATA(unicode), &outpos);
+                ch = ucs1lib_utf8_decode(&s, end, writer.data, &writer.pos);
         } else if (kind == PyUnicode_2BYTE_KIND) {
-            ch = ucs2lib_utf8_decode(&s, end,
-                    PyUnicode_2BYTE_DATA(unicode), &outpos);
+            ch = ucs2lib_utf8_decode(&s, end, writer.data, &writer.pos);
         } else {
             assert(kind == PyUnicode_4BYTE_KIND);
-            ch = ucs4lib_utf8_decode(&s, end,
-                    PyUnicode_4BYTE_DATA(unicode), &outpos);
+            ch = ucs4lib_utf8_decode(&s, end, writer.data, &writer.pos);
         }
 
         switch (ch) {
@@ -4797,35 +4775,31 @@ PyUnicode_DecodeUTF8Stateful(const char *s,
             endinpos = startinpos + ch - 1;
             break;
         default:
-            if (unicode_putchar(&unicode, &outpos, ch) < 0)
+            if (_PyUnicodeWriter_WriteCharInline(&writer, ch) < 0)
                 goto onError;
             continue;
         }
 
-        if (unicode_decode_call_errorhandler(
+        if (unicode_decode_call_errorhandler_writer(
                 errors, &errorHandler,
                 "utf-8", errmsg,
                 &starts, &end, &startinpos, &endinpos, &exc, &s,
-                &unicode, &outpos))
+                &writer))
             goto onError;
     }
 
 End:
-    if (unicode_resize(&unicode, outpos) < 0)
-        goto onError;
-
     if (consumed)
         *consumed = s - starts;
 
     Py_XDECREF(errorHandler);
     Py_XDECREF(exc);
-    assert(_PyUnicode_CheckConsistency(unicode, 1));
-    return unicode;
+    return _PyUnicodeWriter_Finish(&writer);
 
 onError:
     Py_XDECREF(errorHandler);
     Py_XDECREF(exc);
-    Py_XDECREF(unicode);
+    _PyUnicodeWriter_Dealloc(&writer);
     return NULL;
 }
 
@@ -4835,7 +4809,7 @@ onError:
    used to decode the command line arguments on Mac OS X.
 
    Return a pointer to a newly allocated wide character string (use
-   PyMem_Free() to free the memory), or NULL on memory allocation error. */
+   PyMem_RawFree() to free the memory), or NULL on memory allocation error. */
 
 wchar_t*
 _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size)
@@ -4848,7 +4822,7 @@ _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size)
        character count */
     if (PY_SSIZE_T_MAX / sizeof(wchar_t) < (size + 1))
         return NULL;
-    unicode = PyMem_Malloc((size + 1) * sizeof(wchar_t));
+    unicode = PyMem_RawMalloc((size + 1) * sizeof(wchar_t));
     if (!unicode)
         return NULL;
 
@@ -4971,17 +4945,11 @@ PyUnicode_DecodeUTF32Stateful(const char *s,
     const char *starts = s;
     Py_ssize_t startinpos;
     Py_ssize_t endinpos;
-    Py_ssize_t outpos;
-    PyObject *unicode;
+    _PyUnicodeWriter writer;
     const unsigned char *q, *e;
-    int bo = 0;       /* assume native ordering by default */
+    int le, bo = 0;       /* assume native ordering by default */
+    const char *encoding;
     const char *errmsg = "";
-    /* Offsets from q for retrieving bytes in the right order. */
-#ifdef BYTEORDER_IS_LITTLE_ENDIAN
-    int iorder[] = {0, 1, 2, 3};
-#else
-    int iorder[] = {3, 2, 1, 0};
-#endif
     PyObject *errorHandler = NULL;
     PyObject *exc = NULL;
 
@@ -4995,107 +4963,118 @@ PyUnicode_DecodeUTF32Stateful(const char *s,
        byte order setting accordingly. In native mode, the leading BOM
        mark is skipped, in all other modes, it is copied to the output
        stream as-is (giving a ZWNBSP character). */
-    if (bo == 0) {
-        if (size >= 4) {
-            const Py_UCS4 bom = (q[iorder[3]] << 24) | (q[iorder[2]] << 16) |
-                (q[iorder[1]] << 8) | q[iorder[0]];
-#ifdef BYTEORDER_IS_LITTLE_ENDIAN
-            if (bom == 0x0000FEFF) {
-                q += 4;
-                bo = -1;
-            }
-            else if (bom == 0xFFFE0000) {
-                q += 4;
-                bo = 1;
-            }
-#else
-            if (bom == 0x0000FEFF) {
-                q += 4;
-                bo = 1;
-            }
-            else if (bom == 0xFFFE0000) {
-                q += 4;
-                bo = -1;
-            }
-#endif
+    if (bo == 0 && size >= 4) {
+        Py_UCS4 bom = (q[3] << 24) | (q[2] << 16) | (q[1] << 8) | q[0];
+        if (bom == 0x0000FEFF) {
+            bo = -1;
+            q += 4;
+        }
+        else if (bom == 0xFFFE0000) {
+            bo = 1;
+            q += 4;
         }
+        if (byteorder)
+            *byteorder = bo;
     }
 
-    if (bo == -1) {
-        /* force LE */
-        iorder[0] = 0;
-        iorder[1] = 1;
-        iorder[2] = 2;
-        iorder[3] = 3;
-    }
-    else if (bo == 1) {
-        /* force BE */
-        iorder[0] = 3;
-        iorder[1] = 2;
-        iorder[2] = 1;
-        iorder[3] = 0;
+    if (q == e) {
+        if (consumed)
+            *consumed = size;
+        _Py_RETURN_UNICODE_EMPTY();
     }
 
-    /* This might be one to much, because of a BOM */
-    unicode = PyUnicode_New((size+3)/4, 127);
-    if (!unicode)
-        return NULL;
-    if (size == 0)
-        return unicode;
-    outpos = 0;
+#ifdef WORDS_BIGENDIAN
+    le = bo < 0;
+#else
+    le = bo <= 0;
+#endif
+    encoding = le ? "utf-32-le" : "utf-32-be";
 
-    while (q < e) {
-        Py_UCS4 ch;
-        /* remaining bytes at the end? (size should be divisible by 4) */
-        if (e-q<4) {
-            if (consumed)
+    _PyUnicodeWriter_Init(&writer);
+    writer.min_length = (e - q + 3) / 4;
+    if (_PyUnicodeWriter_Prepare(&writer, writer.min_length, 127) == -1)
+        goto onError;
+
+    while (1) {
+        Py_UCS4 ch = 0;
+        Py_UCS4 maxch = PyUnicode_MAX_CHAR_VALUE(writer.buffer);
+
+        if (e - q >= 4) {
+            enum PyUnicode_Kind kind = writer.kind;
+            void *data = writer.data;
+            const unsigned char *last = e - 4;
+            Py_ssize_t pos = writer.pos;
+            if (le) {
+                do {
+                    ch = (q[3] << 24) | (q[2] << 16) | (q[1] << 8) | q[0];
+                    if (ch > maxch)
+                        break;
+                    if (kind != PyUnicode_1BYTE_KIND &&
+                        Py_UNICODE_IS_SURROGATE(ch))
+                        break;
+                    PyUnicode_WRITE(kind, data, pos++, ch);
+                    q += 4;
+                } while (q <= last);
+            }
+            else {
+                do {
+                    ch = (q[0] << 24) | (q[1] << 16) | (q[2] << 8) | q[3];
+                    if (ch > maxch)
+                        break;
+                    if (kind != PyUnicode_1BYTE_KIND &&
+                        Py_UNICODE_IS_SURROGATE(ch))
+                        break;
+                    PyUnicode_WRITE(kind, data, pos++, ch);
+                    q += 4;
+                } while (q <= last);
+            }
+            writer.pos = pos;
+        }
+
+        if (Py_UNICODE_IS_SURROGATE(ch)) {
+            errmsg = "codepoint in surrogate code point range(0xd800, 0xe000)";
+            startinpos = ((const char *)q) - starts;
+            endinpos = startinpos + 4;
+        }
+        else if (ch <= maxch) {
+            if (q == e || consumed)
                 break;
+            /* remaining bytes at the end? (size should be divisible by 4) */
             errmsg = "truncated data";
-            startinpos = ((const char *)q)-starts;
-            endinpos = ((const char *)e)-starts;
-            goto utf32Error;
-            /* The remaining input chars are ignored if the callback
-               chooses to skip the input */
+            startinpos = ((const char *)q) - starts;
+            endinpos = ((const char *)e) - starts;
         }
-        ch = (q[iorder[3]] << 24) | (q[iorder[2]] << 16) |
-            (q[iorder[1]] << 8) | q[iorder[0]];
-
-        if (ch >= 0x110000)
-        {
+        else {
+            if (ch < 0x110000) {
+                if (_PyUnicodeWriter_WriteCharInline(&writer, ch) < 0)
+                    goto onError;
+                q += 4;
+                continue;
+            }
             errmsg = "codepoint not in range(0x110000)";
-            startinpos = ((const char *)q)-starts;
-            endinpos = startinpos+4;
-            goto utf32Error;
+            startinpos = ((const char *)q) - starts;
+            endinpos = startinpos + 4;
         }
-        if (unicode_putchar(&unicode, &outpos, ch) < 0)
-            goto onError;
-        q += 4;
-        continue;
-      utf32Error:
-        if (unicode_decode_call_errorhandler(
+
+        /* The remaining input chars are ignored if the callback
+           chooses to skip the input */
+        if (unicode_decode_call_errorhandler_writer(
                 errors, &errorHandler,
-                "utf32", errmsg,
+                encoding, errmsg,
                 &starts, (const char **)&e, &startinpos, &endinpos, &exc, (const char **)&q,
-                &unicode, &outpos))
+                &writer))
             goto onError;
     }
 
-    if (byteorder)
-        *byteorder = bo;
-
     if (consumed)
         *consumed = (const char *)q-starts;
 
-    /* Adjust length */
-    if (unicode_resize(&unicode, outpos) < 0)
-        goto onError;
-
     Py_XDECREF(errorHandler);
     Py_XDECREF(exc);
-    return unicode_result(unicode);
+    return _PyUnicodeWriter_Finish(&writer);
 
   onError:
-    Py_DECREF(unicode);
+    _PyUnicodeWriter_Dealloc(&writer);
     Py_XDECREF(errorHandler);
     Py_XDECREF(exc);
     return NULL;
@@ -5113,11 +5092,15 @@ _PyUnicode_EncodeUTF32(PyObject *str,
     unsigned char *p;
     Py_ssize_t nsize, i;
     /* Offsets from p for storing byte pairs in the right order. */
-#ifdef BYTEORDER_IS_LITTLE_ENDIAN
+#if PY_LITTLE_ENDIAN
     int iorder[] = {0, 1, 2, 3};
 #else
     int iorder[] = {3, 2, 1, 0};
 #endif
+    const char *encoding;
+    PyObject *errorHandler = NULL;
+    PyObject *exc = NULL;
+    PyObject *rep = NULL;
 
 #define STORECHAR(CH)                           \
     do {                                        \
@@ -5149,7 +5132,7 @@ _PyUnicode_EncodeUTF32(PyObject *str,
     if (byteorder == 0)
         STORECHAR(0xFEFF);
     if (len == 0)
-        goto done;
+        return v;
 
     if (byteorder == -1) {
         /* force LE */
@@ -5157,6 +5140,7 @@ _PyUnicode_EncodeUTF32(PyObject *str,
         iorder[1] = 1;
         iorder[2] = 2;
         iorder[3] = 3;
+        encoding = "utf-32-le";
     }
     else if (byteorder == 1) {
         /* force BE */
@@ -5164,13 +5148,103 @@ _PyUnicode_EncodeUTF32(PyObject *str,
         iorder[1] = 2;
         iorder[2] = 1;
         iorder[3] = 0;
+        encoding = "utf-32-be";
     }
+    else
+        encoding = "utf-32";
 
-    for (i = 0; i < len; i++)
-        STORECHAR(PyUnicode_READ(kind, data, i));
+    if (kind == PyUnicode_1BYTE_KIND) {
+        for (i = 0; i < len; i++)
+            STORECHAR(PyUnicode_READ(kind, data, i));
+        return v;
+    }
 
-  done:
+    for (i = 0; i < len;) {
+        Py_ssize_t repsize, moreunits;
+        Py_UCS4 ch = PyUnicode_READ(kind, data, i);
+        i++;
+        assert(ch <= MAX_UNICODE);
+        if (!Py_UNICODE_IS_SURROGATE(ch)) {
+            STORECHAR(ch);
+            continue;
+        }
+
+        rep = unicode_encode_call_errorhandler(
+                errors, &errorHandler,
+                encoding, "surrogates not allowed",
+                str, &exc, i-1, i, &i);
+
+        if (!rep)
+            goto error;
+
+        if (PyBytes_Check(rep)) {
+            repsize = PyBytes_GET_SIZE(rep);
+            if (repsize & 3) {
+                raise_encode_exception(&exc, encoding,
+                                       str, i - 1, i,
+                                       "surrogates not allowed");
+                goto error;
+            }
+            moreunits = repsize / 4;
+        }
+        else {
+            assert(PyUnicode_Check(rep));
+            if (PyUnicode_READY(rep) < 0)
+                goto error;
+            moreunits = repsize = PyUnicode_GET_LENGTH(rep);
+            if (!PyUnicode_IS_ASCII(rep)) {
+                raise_encode_exception(&exc, encoding,
+                                       str, i - 1, i,
+                                       "surrogates not allowed");
+                goto error;
+            }
+        }
+
+        /* four bytes are reserved for each surrogate */
+        if (moreunits > 1) {
+            Py_ssize_t outpos = p - (unsigned char*) PyBytes_AS_STRING(v);
+            Py_ssize_t morebytes = 4 * (moreunits - 1);
+            if (PyBytes_GET_SIZE(v) > PY_SSIZE_T_MAX - morebytes) {
+                /* integer overflow */
+                PyErr_NoMemory();
+                goto error;
+            }
+            if (_PyBytes_Resize(&v, PyBytes_GET_SIZE(v) + morebytes) < 0)
+                goto error;
+            p = (unsigned char*) PyBytes_AS_STRING(v) + outpos;
+        }
+
+        if (PyBytes_Check(rep)) {
+            Py_MEMCPY(p, PyBytes_AS_STRING(rep), repsize);
+            p += repsize;
+        } else /* rep is unicode */ {
+            const Py_UCS1 *repdata;
+            assert(PyUnicode_KIND(rep) == PyUnicode_1BYTE_KIND);
+            repdata = PyUnicode_1BYTE_DATA(rep);
+            while (repsize--) {
+                Py_UCS4 ch = *repdata++;
+                STORECHAR(ch);
+            }
+        }
+
+        Py_CLEAR(rep);
+    }
+
+    /* Cut back to size actually needed. This is necessary for, for example,
+       encoding of a string containing isolated surrogates and the 'ignore'
+       handler is used. */
+    nsize = p - (unsigned char*) PyBytes_AS_STRING(v);
+    if (nsize != PyBytes_GET_SIZE(v))
+      _PyBytes_Resize(&v, nsize);
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(exc);
     return v;
+  error:
+    Py_XDECREF(rep);
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(exc);
+    Py_XDECREF(v);
+    return NULL;
 #undef STORECHAR
 }
 
@@ -5216,14 +5290,14 @@ PyUnicode_DecodeUTF16Stateful(const char *s,
     const char *starts = s;
     Py_ssize_t startinpos;
     Py_ssize_t endinpos;
-    Py_ssize_t outpos;
-    PyObject *unicode;
+    _PyUnicodeWriter writer;
     const unsigned char *q, *e;
     int bo = 0;       /* assume native ordering by default */
     int native_ordering;
     const char *errmsg = "";
     PyObject *errorHandler = NULL;
     PyObject *exc = NULL;
+    const char *encoding;
 
     q = (unsigned char *)s;
     e = q + size;
@@ -5255,40 +5329,42 @@ PyUnicode_DecodeUTF16Stateful(const char *s,
         _Py_RETURN_UNICODE_EMPTY();
     }
 
-#ifdef BYTEORDER_IS_LITTLE_ENDIAN
+#if PY_LITTLE_ENDIAN
     native_ordering = bo <= 0;
+    encoding = bo <= 0 ? "utf-16-le" : "utf-16-be";
 #else
     native_ordering = bo >= 0;
+    encoding = bo >= 0 ? "utf-16-be" : "utf-16-le";
 #endif
 
     /* Note: size will always be longer than the resulting Unicode
        character count */
-    unicode = PyUnicode_New((e - q + 1) / 2, 127);
-    if (!unicode)
-        return NULL;
+    _PyUnicodeWriter_Init(&writer);
+    writer.min_length = (e - q + 1) / 2;
+    if (_PyUnicodeWriter_Prepare(&writer, writer.min_length, 127) == -1)
+        goto onError;
 
-    outpos = 0;
     while (1) {
         Py_UCS4 ch = 0;
         if (e - q >= 2) {
-            int kind = PyUnicode_KIND(unicode);
+            int kind = writer.kind;
             if (kind == PyUnicode_1BYTE_KIND) {
-                if (PyUnicode_IS_ASCII(unicode))
+                if (PyUnicode_IS_ASCII(writer.buffer))
                     ch = asciilib_utf16_decode(&q, e,
-                            PyUnicode_1BYTE_DATA(unicode), &outpos,
+                            (Py_UCS1*)writer.data, &writer.pos,
                             native_ordering);
                 else
                     ch = ucs1lib_utf16_decode(&q, e,
-                            PyUnicode_1BYTE_DATA(unicode), &outpos,
+                            (Py_UCS1*)writer.data, &writer.pos,
                             native_ordering);
             } else if (kind == PyUnicode_2BYTE_KIND) {
                 ch = ucs2lib_utf16_decode(&q, e,
-                        PyUnicode_2BYTE_DATA(unicode), &outpos,
+                        (Py_UCS2*)writer.data, &writer.pos,
                         native_ordering);
             } else {
                 assert(kind == PyUnicode_4BYTE_KIND);
                 ch = ucs4lib_utf16_decode(&q, e,
-                        PyUnicode_4BYTE_DATA(unicode), &outpos,
+                        (Py_UCS4*)writer.data, &writer.pos,
                         native_ordering);
             }
         }
@@ -5324,23 +5400,22 @@ PyUnicode_DecodeUTF16Stateful(const char *s,
             endinpos = startinpos + 2;
             break;
         default:
-            if (unicode_putchar(&unicode, &outpos, ch) < 0)
+            if (_PyUnicodeWriter_WriteCharInline(&writer, ch) < 0)
                 goto onError;
             continue;
         }
 
-        if (unicode_decode_call_errorhandler(
+        if (unicode_decode_call_errorhandler_writer(
                 errors,
                 &errorHandler,
-                "utf16", errmsg,
+                encoding, errmsg,
                 &starts,
                 (const char **)&e,
                 &startinpos,
                 &endinpos,
                 &exc,
                 (const char **)&q,
-                &unicode,
-                &outpos))
+                &writer))
             goto onError;
     }
 
@@ -5348,16 +5423,12 @@ End:
     if (consumed)
         *consumed = (const char *)q-starts;
 
-    /* Adjust length */
-    if (unicode_resize(&unicode, outpos) < 0)
-        goto onError;
-
     Py_XDECREF(errorHandler);
     Py_XDECREF(exc);
-    return unicode_result(unicode);
+    return _PyUnicodeWriter_Finish(&writer);
 
   onError:
-    Py_DECREF(unicode);
+    _PyUnicodeWriter_Dealloc(&writer);
     Py_XDECREF(errorHandler);
     Py_XDECREF(exc);
     return NULL;
@@ -5373,13 +5444,17 @@ _PyUnicode_EncodeUTF16(PyObject *str,
     Py_ssize_t len;
     PyObject *v;
     unsigned short *out;
-    Py_ssize_t bytesize;
     Py_ssize_t pairs;
-#ifdef WORDS_BIGENDIAN
+#if PY_BIG_ENDIAN
     int native_ordering = byteorder >= 0;
 #else
     int native_ordering = byteorder <= 0;
 #endif
+    const char *encoding;
+    Py_ssize_t nsize, pos;
+    PyObject *errorHandler = NULL;
+    PyObject *exc = NULL;
+    PyObject *rep = NULL;
 
     if (!PyUnicode_Check(str)) {
         PyErr_BadArgument();
@@ -5401,8 +5476,8 @@ _PyUnicode_EncodeUTF16(PyObject *str,
     }
     if (len > PY_SSIZE_T_MAX / 2 - pairs - (byteorder == 0))
         return PyErr_NoMemory();
-    bytesize = (len + pairs + (byteorder == 0)) * 2;
-    v = PyBytes_FromStringAndSize(NULL, bytesize);
+    nsize = len + pairs + (byteorder == 0);
+    v = PyBytes_FromStringAndSize(NULL, nsize * 2);
     if (v == NULL)
         return NULL;
 
@@ -5414,25 +5489,107 @@ _PyUnicode_EncodeUTF16(PyObject *str,
     if (len == 0)
         goto done;
 
-    switch (kind) {
-    case PyUnicode_1BYTE_KIND: {
-        ucs1lib_utf16_encode(out, (const Py_UCS1 *)data, len, native_ordering);
-        break;
-    }
-    case PyUnicode_2BYTE_KIND: {
-        ucs2lib_utf16_encode(out, (const Py_UCS2 *)data, len, native_ordering);
-        break;
-    }
-    case PyUnicode_4BYTE_KIND: {
-        ucs4lib_utf16_encode(out, (const Py_UCS4 *)data, len, native_ordering);
-        break;
+    if (kind == PyUnicode_1BYTE_KIND) {
+        ucs1lib_utf16_encode((const Py_UCS1 *)data, len, &out, native_ordering);
+        goto done;
     }
-    default:
-        assert(0);
+
+    if (byteorder < 0)
+        encoding = "utf-16-le";
+    else if (byteorder > 0)
+        encoding = "utf-16-be";
+    else
+        encoding = "utf-16";
+
+    pos = 0;
+    while (pos < len) {
+        Py_ssize_t repsize, moreunits;
+
+        if (kind == PyUnicode_2BYTE_KIND) {
+            pos += ucs2lib_utf16_encode((const Py_UCS2 *)data + pos, len - pos,
+                                        &out, native_ordering);
+        }
+        else {
+            assert(kind == PyUnicode_4BYTE_KIND);
+            pos += ucs4lib_utf16_encode((const Py_UCS4 *)data + pos, len - pos,
+                                        &out, native_ordering);
+        }
+        if (pos == len)
+            break;
+
+        rep = unicode_encode_call_errorhandler(
+                errors, &errorHandler,
+                encoding, "surrogates not allowed",
+                str, &exc, pos, pos + 1, &pos);
+        if (!rep)
+            goto error;
+
+        if (PyBytes_Check(rep)) {
+            repsize = PyBytes_GET_SIZE(rep);
+            if (repsize & 1) {
+                raise_encode_exception(&exc, encoding,
+                                       str, pos - 1, pos,
+                                       "surrogates not allowed");
+                goto error;
+            }
+            moreunits = repsize / 2;
+        }
+        else {
+            assert(PyUnicode_Check(rep));
+            if (PyUnicode_READY(rep) < 0)
+                goto error;
+            moreunits = repsize = PyUnicode_GET_LENGTH(rep);
+            if (!PyUnicode_IS_ASCII(rep)) {
+                raise_encode_exception(&exc, encoding,
+                                       str, pos - 1, pos,
+                                       "surrogates not allowed");
+                goto error;
+            }
+        }
+
+        /* two bytes are reserved for each surrogate */
+        if (moreunits > 1) {
+            Py_ssize_t outpos = out - (unsigned short*) PyBytes_AS_STRING(v);
+            Py_ssize_t morebytes = 2 * (moreunits - 1);
+            if (PyBytes_GET_SIZE(v) > PY_SSIZE_T_MAX - morebytes) {
+                /* integer overflow */
+                PyErr_NoMemory();
+                goto error;
+            }
+            if (_PyBytes_Resize(&v, PyBytes_GET_SIZE(v) + morebytes) < 0)
+                goto error;
+            out = (unsigned short*) PyBytes_AS_STRING(v) + outpos;
+        }
+
+        if (PyBytes_Check(rep)) {
+            Py_MEMCPY(out, PyBytes_AS_STRING(rep), repsize);
+            out += moreunits;
+        } else /* rep is unicode */ {
+            assert(PyUnicode_KIND(rep) == PyUnicode_1BYTE_KIND);
+            ucs1lib_utf16_encode(PyUnicode_1BYTE_DATA(rep), repsize,
+                                 &out, native_ordering);
+        }
+
+        Py_CLEAR(rep);
     }
 
+    /* Cut back to size actually needed. This is necessary for, for example,
+    encoding of a string containing isolated surrogates and the 'ignore' handler
+    is used. */
+    nsize = (unsigned char*) out - (unsigned char*) PyBytes_AS_STRING(v);
+    if (nsize != PyBytes_GET_SIZE(v))
+      _PyBytes_Resize(&v, nsize);
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(exc);
   done:
     return v;
+  error:
+    Py_XDECREF(rep);
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(exc);
+    Py_XDECREF(v);
+    return NULL;
+#undef STORECHAR
 }
 
 PyObject *
@@ -5523,27 +5680,26 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
     const char *starts = s;
     Py_ssize_t startinpos;
     Py_ssize_t endinpos;
-    PyObject *v;
+    _PyUnicodeWriter writer;
     const char *end;
     char* message;
     Py_UCS4 chr = 0xffffffff; /* in case 'getcode' messes up */
     PyObject *errorHandler = NULL;
     PyObject *exc = NULL;
     Py_ssize_t len;
-    Py_ssize_t i;
 
     len = length_of_escaped_ascii_string(s, size);
+    if (len == 0)
+        _Py_RETURN_UNICODE_EMPTY();
 
     /* After length_of_escaped_ascii_string() there are two alternatives,
        either the string is pure ASCII with named escapes like \n, etc.
        and we determined it's exact size (common case)
        or it contains \x, \u, ... escape sequences.  then we create a
        legacy wchar string and resize it at the end of this function. */
-    if (len >= 0) {
-        v = PyUnicode_New(len, 127);
-        if (!v)
-            goto onError;
-        assert(PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND);
+    _PyUnicodeWriter_Init(&writer);
+    if (len > 0) {
+        writer.min_length = len;
     }
     else {
         /* Escaped strings will always be longer than the resulting
@@ -5551,15 +5707,11 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
            length after conversion to the true value.
            (but if the error callback returns a long replacement string
            we'll have to allocate more space) */
-        v = PyUnicode_New(size, 127);
-        if (!v)
-            goto onError;
-        len = size;
+        writer.min_length = size;
     }
 
     if (size == 0)
-        return v;
-    i = 0;
+        return _PyUnicodeWriter_Finish(&writer);
     end = s + size;
 
     while (s < end) {
@@ -5567,13 +5719,11 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
         Py_UCS4 x;
         int digits;
 
-        /* The only case in which i == ascii_length is a backslash
-           followed by a newline. */
-        assert(i <= len);
-
         /* Non-escape characters are interpreted as Unicode ordinals */
         if (*s != '\\') {
-            if (unicode_putchar(&v, &i, (unsigned char) *s++) < 0)
+            x = (unsigned char)*s;
+            s++;
+            if (_PyUnicodeWriter_WriteCharInline(&writer, x) < 0)
                 goto onError;
             continue;
         }
@@ -5585,18 +5735,14 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
         if (s > end)
             c = '\0'; /* Invalid after \ */
 
-        /* The only case in which i == ascii_length is a backslash
-           followed by a newline. */
-        assert(i < len || (i == len && c == '\n'));
-
         switch (c) {
 
             /* \x escapes */
-#define WRITECHAR(ch)                                   \
-            do {                                        \
-                if (unicode_putchar(&v, &i, ch) < 0)    \
-                    goto onError;                       \
-            }while(0)
+#define WRITECHAR(ch)                                                      \
+            do {                                                           \
+                if (_PyUnicodeWriter_WriteCharInline(&writer, (ch)) < 0)    \
+                    goto onError;                                          \
+            } while(0)
 
         case '\n': break;
         case '\\': WRITECHAR('\\'); break;
@@ -5720,35 +5866,32 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
 
       error:
         endinpos = s-starts;
-        if (unicode_decode_call_errorhandler(
+        if (unicode_decode_call_errorhandler_writer(
                 errors, &errorHandler,
                 "unicodeescape", message,
                 &starts, &end, &startinpos, &endinpos, &exc, &s,
-                &v, &i))
+                &writer))
             goto onError;
-        len = PyUnicode_GET_LENGTH(v);
         continue;
     }
 #undef WRITECHAR
 
-    if (unicode_resize(&v, i) < 0)
-        goto onError;
     Py_XDECREF(errorHandler);
     Py_XDECREF(exc);
-    return unicode_result(v);
+    return _PyUnicodeWriter_Finish(&writer);
 
   ucnhashError:
     PyErr_SetString(
         PyExc_UnicodeError,
         "\\N escapes not supported (can't load unicodedata module)"
         );
-    Py_XDECREF(v);
+    _PyUnicodeWriter_Dealloc(&writer);
     Py_XDECREF(errorHandler);
     Py_XDECREF(exc);
     return NULL;
 
   onError:
-    Py_XDECREF(v);
+    _PyUnicodeWriter_Dealloc(&writer);
     Py_XDECREF(errorHandler);
     Py_XDECREF(exc);
     return NULL;
@@ -5901,23 +6044,22 @@ PyUnicode_DecodeRawUnicodeEscape(const char *s,
     const char *starts = s;
     Py_ssize_t startinpos;
     Py_ssize_t endinpos;
-    Py_ssize_t outpos;
-    PyObject *v;
+    _PyUnicodeWriter writer;
     const char *end;
     const char *bs;
     PyObject *errorHandler = NULL;
     PyObject *exc = NULL;
 
+    if (size == 0)
+        _Py_RETURN_UNICODE_EMPTY();
+
     /* Escaped strings will always be longer than the resulting
        Unicode string, so we start with size here and then reduce the
        length after conversion to the true value. (But decoding error
        handler might have to resize the string) */
-    v = PyUnicode_New(size, 127);
-    if (v == NULL)
-        goto onError;
-    if (size == 0)
-        return v;
-    outpos = 0;
+    _PyUnicodeWriter_Init(&writer);
+    writer.min_length = size;
+
     end = s + size;
     while (s < end) {
         unsigned char c;
@@ -5927,7 +6069,8 @@ PyUnicode_DecodeRawUnicodeEscape(const char *s,
 
         /* Non-escape characters are interpreted as Unicode ordinals */
         if (*s != '\\') {
-            if (unicode_putchar(&v, &outpos, (unsigned char)*s++) < 0)
+            x = (unsigned char)*s++;
+            if (_PyUnicodeWriter_WriteCharInline(&writer, x) < 0)
                 goto onError;
             continue;
         }
@@ -5939,7 +6082,8 @@ PyUnicode_DecodeRawUnicodeEscape(const char *s,
         for (;s < end;) {
             if (*s != '\\')
                 break;
-            if (unicode_putchar(&v, &outpos, (unsigned char)*s++) < 0)
+            x = (unsigned char)*s++;
+            if (_PyUnicodeWriter_WriteCharInline(&writer, x) < 0)
                 goto onError;
         }
         if (((s - bs) & 1) == 0 ||
@@ -5947,7 +6091,7 @@ PyUnicode_DecodeRawUnicodeEscape(const char *s,
             (*s != 'u' && *s != 'U')) {
             continue;
         }
-        outpos--;
+        writer.pos--;
         count = *s=='u' ? 4 : 8;
         s++;
 
@@ -5956,11 +6100,11 @@ PyUnicode_DecodeRawUnicodeEscape(const char *s,
             c = (unsigned char)*s;
             if (!Py_ISXDIGIT(c)) {
                 endinpos = s-starts;
-                if (unicode_decode_call_errorhandler(
+                if (unicode_decode_call_errorhandler_writer(
                         errors, &errorHandler,
                         "rawunicodeescape", "truncated \\uXXXX",
                         &starts, &end, &startinpos, &endinpos, &exc, &s,
-                        &v, &outpos))
+                        &writer))
                     goto onError;
                 goto nextByte;
             }
@@ -5973,28 +6117,27 @@ PyUnicode_DecodeRawUnicodeEscape(const char *s,
                 x += 10 + c - 'A';
         }
         if (x <= MAX_UNICODE) {
-            if (unicode_putchar(&v, &outpos, x) < 0)
+            if (_PyUnicodeWriter_WriteCharInline(&writer, x) < 0)
                 goto onError;
-        } else {
+        }
+        else {
             endinpos = s-starts;
-            if (unicode_decode_call_errorhandler(
+            if (unicode_decode_call_errorhandler_writer(
                     errors, &errorHandler,
                     "rawunicodeescape", "\\Uxxxxxxxx out of range",
                     &starts, &end, &startinpos, &endinpos, &exc, &s,
-                    &v, &outpos))
+                    &writer))
                 goto onError;
         }
       nextByte:
         ;
     }
-    if (unicode_resize(&v, outpos) < 0)
-        goto onError;
     Py_XDECREF(errorHandler);
     Py_XDECREF(exc);
-    return unicode_result(v);
+    return _PyUnicodeWriter_Finish(&writer);
 
   onError:
-    Py_XDECREF(v);
+    _PyUnicodeWriter_Dealloc(&writer);
     Py_XDECREF(errorHandler);
     Py_XDECREF(exc);
     return NULL;
@@ -6094,8 +6237,7 @@ _PyUnicode_DecodeUnicodeInternal(const char *s,
     const char *starts = s;
     Py_ssize_t startinpos;
     Py_ssize_t endinpos;
-    Py_ssize_t outpos;
-    PyObject *v;
+    _PyUnicodeWriter writer;
     const char *end;
     const char *reason;
     PyObject *errorHandler = NULL;
@@ -6106,15 +6248,17 @@ _PyUnicode_DecodeUnicodeInternal(const char *s,
                      1))
         return NULL;
 
-    /* XXX overflow detection missing */
-    v = PyUnicode_New((size+Py_UNICODE_SIZE-1)/ Py_UNICODE_SIZE, 127);
-    if (v == NULL)
+    if (size == 0)
+        _Py_RETURN_UNICODE_EMPTY();
+
+    _PyUnicodeWriter_Init(&writer);
+    if (size / Py_UNICODE_SIZE > PY_SSIZE_T_MAX - 1) {
+        PyErr_NoMemory();
         goto onError;
-    if (PyUnicode_GET_LENGTH(v) == 0)
-        return v;
-    outpos = 0;
-    end = s + size;
+    }
+    writer.min_length = (size + (Py_UNICODE_SIZE - 1)) / Py_UNICODE_SIZE;
 
+    end = s + size;
     while (s < end) {
         Py_UNICODE uch;
         Py_UCS4 ch;
@@ -6156,28 +6300,26 @@ _PyUnicode_DecodeUnicodeInternal(const char *s,
         }
 #endif
 
-        if (unicode_putchar(&v, &outpos, ch) < 0)
+        if (_PyUnicodeWriter_WriteCharInline(&writer, ch) < 0)
             goto onError;
         continue;
 
   error:
         startinpos = s - starts;
-        if (unicode_decode_call_errorhandler(
+        if (unicode_decode_call_errorhandler_writer(
                 errors, &errorHandler,
                 "unicode_internal", reason,
                 &starts, &end, &startinpos, &endinpos, &exc, &s,
-                &v, &outpos))
+                &writer))
             goto onError;
     }
 
-    if (unicode_resize(&v, outpos) < 0)
-        goto onError;
     Py_XDECREF(errorHandler);
     Py_XDECREF(exc);
-    return unicode_result(v);
+    return _PyUnicodeWriter_Finish(&writer);
 
   onError:
-    Py_XDECREF(v);
+    _PyUnicodeWriter_Dealloc(&writer);
     Py_XDECREF(errorHandler);
     Py_XDECREF(exc);
     return NULL;
@@ -6541,7 +6683,7 @@ PyUnicode_DecodeASCII(const char *s,
                       const char *errors)
 {
     const char *starts = s;
-    PyObject *unicode;
+    _PyUnicodeWriter writer;
     int kind;
     void *data;
     Py_ssize_t startinpos;
@@ -6558,46 +6700,46 @@ PyUnicode_DecodeASCII(const char *s,
     if (size == 1 && (unsigned char)s[0] < 128)
         return get_latin1_char((unsigned char)s[0]);
 
-    unicode = PyUnicode_New(size, 127);
-    if (unicode == NULL)
-        goto onError;
+    _PyUnicodeWriter_Init(&writer);
+    writer.min_length = size;
+    if (_PyUnicodeWriter_Prepare(&writer, writer.min_length, 127) < 0)
+        return NULL;
 
     e = s + size;
-    data = PyUnicode_1BYTE_DATA(unicode);
+    data = writer.data;
     outpos = ascii_decode(s, e, (Py_UCS1 *)data);
-    if (outpos == size)
-        return unicode;
+    writer.pos = outpos;
+    if (writer.pos == size)
+        return _PyUnicodeWriter_Finish(&writer);
 
-    s += outpos;
-    kind = PyUnicode_1BYTE_KIND;
+    s += writer.pos;
+    kind = writer.kind;
     while (s < e) {
-        register unsigned char c = (unsigned char)*s;
+        unsigned char c = (unsigned char)*s;
         if (c < 128) {
-            PyUnicode_WRITE(kind, data, outpos++, c);
+            PyUnicode_WRITE(kind, data, writer.pos, c);
+            writer.pos++;
             ++s;
         }
         else {
             startinpos = s-starts;
             endinpos = startinpos + 1;
-            if (unicode_decode_call_errorhandler(
+            if (unicode_decode_call_errorhandler_writer(
                     errors, &errorHandler,
                     "ascii", "ordinal not in range(128)",
                     &starts, &e, &startinpos, &endinpos, &exc, &s,
-                    &unicode, &outpos))
+                    &writer))
                 goto onError;
-            kind = PyUnicode_KIND(unicode);
-            data = PyUnicode_DATA(unicode);
+            kind = writer.kind;
+            data = writer.data;
         }
     }
-    if (unicode_resize(&unicode, outpos) < 0)
-        goto onError;
     Py_XDECREF(errorHandler);
     Py_XDECREF(exc);
-    assert(_PyUnicode_CheckConsistency(unicode, 1));
-    return unicode;
+    return _PyUnicodeWriter_Finish(&writer);
 
   onError:
-    Py_XDECREF(unicode);
+    _PyUnicodeWriter_Dealloc(&writer);
     Py_XDECREF(errorHandler);
     Py_XDECREF(exc);
     return NULL;
@@ -6629,7 +6771,7 @@ _PyUnicode_AsASCIIString(PyObject *unicode, const char *errors)
         return NULL;
     /* Fast path: if it is an ASCII-only string, construct bytes object
        directly. Else defer to above function to raise the exception. */
-    if (PyUnicode_MAX_CHAR_VALUE(unicode) < 128)
+    if (PyUnicode_IS_ASCII(unicode))
         return PyBytes_FromStringAndSize(PyUnicode_DATA(unicode),
                                          PyUnicode_GET_LENGTH(unicode));
     return unicode_encode_ucs1(unicode, errors, 128);
@@ -6707,8 +6849,8 @@ decode_code_page_flags(UINT code_page)
  * Decode a byte string from a Windows code page into unicode object in strict
  * mode.
  *
- * Returns consumed size if succeed, returns -2 on decode error, or raise a
- * WindowsError and returns -1 on other error.
+ * Returns consumed size if succeed, returns -2 on decode error, or raise an
+ * OSError and returns -1 on other error.
  */
 static int
 decode_code_page_strict(UINT code_page,
@@ -6759,7 +6901,7 @@ error:
  * Decode a byte string from a code page into unicode object with an error
  * handler.
  *
- * Returns consumed size if succeed, or raise a WindowsError or
+ * Returns consumed size if succeed, or raise an OSError or
  * UnicodeDecodeError exception and returns -1 on error.
  */
 static int
@@ -6778,7 +6920,8 @@ decode_code_page_errors(UINT code_page,
     /* each step cannot decode more than 1 character, but a character can be
        represented as a surrogate pair */
     wchar_t buffer[2], *startout, *out;
-    int insize, outsize;
+    int insize;
+    Py_ssize_t outsize;
     PyObject *errorHandler = NULL;
     PyObject *exc = NULL;
     PyObject *encoding_obj = NULL;
@@ -6858,7 +7001,7 @@ decode_code_page_errors(UINT code_page,
             startinpos = in - startin;
             endinpos = startinpos + 1;
             outpos = out - PyUnicode_AS_UNICODE(*v);
-            if (unicode_decode_call_errorhandler(
+            if (unicode_decode_call_errorhandler_wchar(
                     errors, &errorHandler,
                     encoding, reason,
                     &startin, &endin, &startinpos, &endinpos, &exc, &in,
@@ -7014,7 +7157,7 @@ encode_code_page_flags(UINT code_page, const char *errors)
  * mode.
  *
  * Returns consumed characters if succeed, returns -2 on encode error, or raise
- * a WindowsError and returns -1 on other error.
+ * an OSError and returns -1 on other error.
  */
 static int
 encode_code_page_strict(UINT code_page, PyObject **outbytes,
@@ -7048,10 +7191,11 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes,
         Py_DECREF(substring);
         return -1;
     }
+    assert(size <= INT_MAX);
 
     /* First get the size of the result */
     outsize = WideCharToMultiByte(code_page, flags,
-                                  p, size,
+                                  p, (int)size,
                                   NULL, 0,
                                   NULL, pusedDefaultChar);
     if (outsize <= 0)
@@ -7088,7 +7232,7 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes,
 
     /* Do the conversion */
     outsize = WideCharToMultiByte(code_page, flags,
-                                  p, size,
+                                  p, (int)size,
                                   out, outsize,
                                   NULL, pusedDefaultChar);
     Py_CLEAR(substring);
@@ -7110,7 +7254,7 @@ error:
  * Encode a Unicode string to a Windows code page into a byte string using a
  * error handler.
  *
- * Returns consumed characters if succeed, or raise a WindowsError and returns
+ * Returns consumed characters if succeed, or raise an OSError and returns
  * -1 on other error.
  */
 static int
@@ -7196,9 +7340,8 @@ encode_code_page_errors(UINT code_page, PyObject **outbytes,
             charsize = 1;
         }
         else {
-            ch -= 0x10000;
-            chars[0] = 0xd800 + (ch >> 10);
-            chars[1] = 0xdc00 + (ch & 0x3ff);
+            chars[0] = Py_UNICODE_HIGH_SURROGATE(ch);
+            chars[1] = Py_UNICODE_LOW_SURROGATE(ch);
             charsize = 2;
         }
 
@@ -7391,220 +7534,258 @@ PyUnicode_AsMBCSString(PyObject *unicode)
 
 /* --- Character Mapping Codec -------------------------------------------- */
 
-PyObject *
-PyUnicode_DecodeCharmap(const char *s,
-                        Py_ssize_t size,
-                        PyObject *mapping,
-                        const char *errors)
+static int
+charmap_decode_string(const char *s,
+                      Py_ssize_t size,
+                      PyObject *mapping,
+                      const char *errors,
+                      _PyUnicodeWriter *writer)
 {
     const char *starts = s;
-    Py_ssize_t startinpos;
-    Py_ssize_t endinpos;
-    Py_ssize_t outpos;
     const char *e;
-    PyObject *v;
-    Py_ssize_t extrachars = 0;
-    PyObject *errorHandler = NULL;
-    PyObject *exc = NULL;
+    Py_ssize_t startinpos, endinpos;
+    PyObject *errorHandler = NULL, *exc = NULL;
+    Py_ssize_t maplen;
+    enum PyUnicode_Kind mapkind;
+    void *mapdata;
+    Py_UCS4 x;
+    unsigned char ch;
+
+    if (PyUnicode_READY(mapping) == -1)
+        return -1;
 
-    /* Default to Latin-1 */
-    if (mapping == NULL)
-        return PyUnicode_DecodeLatin1(s, size, errors);
+    maplen = PyUnicode_GET_LENGTH(mapping);
+    mapdata = PyUnicode_DATA(mapping);
+    mapkind = PyUnicode_KIND(mapping);
 
-    v = PyUnicode_New(size, 127);
-    if (v == NULL)
-        goto onError;
-    if (size == 0)
-        return v;
-    outpos = 0;
     e = s + size;
-    if (PyUnicode_CheckExact(mapping)) {
-        Py_ssize_t maplen;
-        enum PyUnicode_Kind mapkind;
-        void *mapdata;
-        Py_UCS4 x;
 
-        if (PyUnicode_READY(mapping) == -1)
-            return NULL;
+    if (mapkind == PyUnicode_1BYTE_KIND && maplen >= 256) {
+        /* fast-path for cp037, cp500 and iso8859_1 encodings. iso8859_1
+         * is disabled in encoding aliases, latin1 is preferred because
+         * its implementation is faster. */
+        Py_UCS1 *mapdata_ucs1 = (Py_UCS1 *)mapdata;
+        Py_UCS1 *outdata = (Py_UCS1 *)writer->data;
+        Py_UCS4 maxchar = writer->maxchar;
 
-        maplen = PyUnicode_GET_LENGTH(mapping);
-        mapdata = PyUnicode_DATA(mapping);
-        mapkind = PyUnicode_KIND(mapping);
+        assert (writer->kind == PyUnicode_1BYTE_KIND);
         while (s < e) {
-            unsigned char ch;
-            if (mapkind == PyUnicode_2BYTE_KIND && maplen >= 256) {
-                enum PyUnicode_Kind outkind = PyUnicode_KIND(v);
-                if (outkind == PyUnicode_1BYTE_KIND) {
-                    void *outdata = PyUnicode_DATA(v);
-                    Py_UCS4 maxchar = PyUnicode_MAX_CHAR_VALUE(v);
-                    while (s < e) {
-                        unsigned char ch = *s;
-                        x = PyUnicode_READ(PyUnicode_2BYTE_KIND, mapdata, ch);
-                        if (x > maxchar)
-                            goto Error;
-                        PyUnicode_WRITE(PyUnicode_1BYTE_KIND, outdata, outpos++, x);
-                        ++s;
-                    }
-                    break;
+            ch = *s;
+            x = mapdata_ucs1[ch];
+            if (x > maxchar) {
+                if (_PyUnicodeWriter_Prepare(writer, 1, 0xff) == -1)
+                    goto onError;
+                maxchar = writer->maxchar;
+                outdata = (Py_UCS1 *)writer->data;
+            }
+            outdata[writer->pos] = x;
+            writer->pos++;
+            ++s;
+        }
+        return 0;
+    }
+
+    while (s < e) {
+        if (mapkind == PyUnicode_2BYTE_KIND && maplen >= 256) {
+            enum PyUnicode_Kind outkind = writer->kind;
+            Py_UCS2 *mapdata_ucs2 = (Py_UCS2 *)mapdata;
+            if (outkind == PyUnicode_1BYTE_KIND) {
+                Py_UCS1 *outdata = (Py_UCS1 *)writer->data;
+                Py_UCS4 maxchar = writer->maxchar;
+                while (s < e) {
+                    ch = *s;
+                    x = mapdata_ucs2[ch];
+                    if (x > maxchar)
+                        goto Error;
+                    outdata[writer->pos] = x;
+                    writer->pos++;
+                    ++s;
                 }
-                else if (outkind == PyUnicode_2BYTE_KIND) {
-                    void *outdata = PyUnicode_DATA(v);
-                    while (s < e) {
-                        unsigned char ch = *s;
-                        x = PyUnicode_READ(PyUnicode_2BYTE_KIND, mapdata, ch);
-                        if (x == 0xFFFE)
-                            goto Error;
-                        PyUnicode_WRITE(PyUnicode_2BYTE_KIND, outdata, outpos++, x);
-                        ++s;
-                    }
-                    break;
+                break;
+            }
+            else if (outkind == PyUnicode_2BYTE_KIND) {
+                Py_UCS2 *outdata = (Py_UCS2 *)writer->data;
+                while (s < e) {
+                    ch = *s;
+                    x = mapdata_ucs2[ch];
+                    if (x == 0xFFFE)
+                        goto Error;
+                    outdata[writer->pos] = x;
+                    writer->pos++;
+                    ++s;
                 }
+                break;
             }
-            ch = *s;
+        }
+        ch = *s;
 
-            if (ch < maplen)
-                x = PyUnicode_READ(mapkind, mapdata, ch);
-            else
-                x = 0xfffe; /* invalid value */
+        if (ch < maplen)
+            x = PyUnicode_READ(mapkind, mapdata, ch);
+        else
+            x = 0xfffe; /* invalid value */
 Error:
-            if (x == 0xfffe)
-            {
-                /* undefined mapping */
-                startinpos = s-starts;
-                endinpos = startinpos+1;
-                if (unicode_decode_call_errorhandler(
-                        errors, &errorHandler,
-                        "charmap", "character maps to <undefined>",
-                        &starts, &e, &startinpos, &endinpos, &exc, &s,
-                        &v, &outpos)) {
-                    goto onError;
-                }
-                continue;
+        if (x == 0xfffe)
+        {
+            /* undefined mapping */
+            startinpos = s-starts;
+            endinpos = startinpos+1;
+            if (unicode_decode_call_errorhandler_writer(
+                    errors, &errorHandler,
+                    "charmap", "character maps to <undefined>",
+                    &starts, &e, &startinpos, &endinpos, &exc, &s,
+                    writer)) {
+                goto onError;
             }
+            continue;
+        }
+
+        if (_PyUnicodeWriter_WriteCharInline(writer, x) < 0)
+            goto onError;
+        ++s;
+    }
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(exc);
+    return 0;
+
+onError:
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(exc);
+    return -1;
+}
+
+static int
+charmap_decode_mapping(const char *s,
+                       Py_ssize_t size,
+                       PyObject *mapping,
+                       const char *errors,
+                       _PyUnicodeWriter *writer)
+{
+    const char *starts = s;
+    const char *e;
+    Py_ssize_t startinpos, endinpos;
+    PyObject *errorHandler = NULL, *exc = NULL;
+    unsigned char ch;
+    PyObject *key, *item = NULL;
 
-            if (unicode_putchar(&v, &outpos, x) < 0)
+    e = s + size;
+
+    while (s < e) {
+        ch = *s;
+
+        /* Get mapping (char ordinal -> integer, Unicode char or None) */
+        key = PyLong_FromLong((long)ch);
+        if (key == NULL)
+            goto onError;
+
+        item = PyObject_GetItem(mapping, key);
+        Py_DECREF(key);
+        if (item == NULL) {
+            if (PyErr_ExceptionMatches(PyExc_LookupError)) {
+                /* No mapping found means: mapping is undefined. */
+                PyErr_Clear();
+                goto Undefined;
+            } else
                 goto onError;
-            ++s;
         }
-    }
-    else {
-        while (s < e) {
-            unsigned char ch = *s;
-            PyObject *w, *x;
 
-            /* Get mapping (char ordinal -> integer, Unicode char or None) */
-            w = PyLong_FromLong((long)ch);
-            if (w == NULL)
+        /* Apply mapping */
+        if (item == Py_None)
+            goto Undefined;
+        if (PyLong_Check(item)) {
+            long value = PyLong_AS_LONG(item);
+            if (value == 0xFFFE)
+                goto Undefined;
+            if (value < 0 || value > MAX_UNICODE) {
+                PyErr_Format(PyExc_TypeError,
+                             "character mapping must be in range(0x%lx)",
+                             (unsigned long)MAX_UNICODE + 1);
                 goto onError;
-            x = PyObject_GetItem(mapping, w);
-            Py_DECREF(w);
-            if (x == NULL) {
-                if (PyErr_ExceptionMatches(PyExc_LookupError)) {
-                    /* No mapping found means: mapping is undefined. */
-                    PyErr_Clear();
-                    goto Undefined;
-                } else
-                    goto onError;
             }
 
-            /* Apply mapping */
-            if (x == Py_None)
-                goto Undefined;
-            if (PyLong_Check(x)) {
-                long value = PyLong_AS_LONG(x);
+            if (_PyUnicodeWriter_WriteCharInline(writer, value) < 0)
+                goto onError;
+        }
+        else if (PyUnicode_Check(item)) {
+            if (PyUnicode_READY(item) == -1)
+                goto onError;
+            if (PyUnicode_GET_LENGTH(item) == 1) {
+                Py_UCS4 value = PyUnicode_READ_CHAR(item, 0);
                 if (value == 0xFFFE)
                     goto Undefined;
-                if (value < 0 || value > MAX_UNICODE) {
-                    PyErr_Format(PyExc_TypeError,
-                                 "character mapping must be in range(0x%lx)",
-                                 (unsigned long)MAX_UNICODE + 1);
-                    Py_DECREF(x);
-                    goto onError;
-                }
-                if (unicode_putchar(&v, &outpos, value) < 0) {
-                    Py_DECREF(x);
+                if (_PyUnicodeWriter_WriteCharInline(writer, value) < 0)
                     goto onError;
-                }
-            }
-            else if (PyUnicode_Check(x)) {
-                Py_ssize_t targetsize;
-
-                if (PyUnicode_READY(x) == -1) {
-                    Py_DECREF(x);
-                    goto onError;
-                }
-                targetsize = PyUnicode_GET_LENGTH(x);
-
-                if (targetsize == 1) {
-                    /* 1-1 mapping */
-                    Py_UCS4 value = PyUnicode_READ_CHAR(x, 0);
-                    if (value == 0xFFFE)
-                        goto Undefined;
-                    if (unicode_putchar(&v, &outpos, value) < 0) {
-                        Py_DECREF(x);
-                        goto onError;
-                    }
-                }
-                else if (targetsize > 1) {
-                    /* 1-n mapping */
-                    if (targetsize > extrachars) {
-                        /* resize first */
-                        Py_ssize_t needed = (targetsize - extrachars) + \
-                            (targetsize << 2);
-                        extrachars += needed;
-                        /* XXX overflow detection missing */
-                        if (unicode_resize(&v,
-                                           PyUnicode_GET_LENGTH(v) + needed) < 0)
-                        {
-                            Py_DECREF(x);
-                            goto onError;
-                        }
-                    }
-                    if (unicode_widen(&v, outpos,
-                                      PyUnicode_MAX_CHAR_VALUE(x)) < 0) {
-                        Py_DECREF(x);
-                        goto onError;
-                    }
-                    PyUnicode_CopyCharacters(v, outpos, x, 0, targetsize);
-                    outpos += targetsize;
-                    extrachars -= targetsize;
-                }
-                /* 1-0 mapping: skip the character */
             }
             else {
-                /* wrong return value */
-                PyErr_SetString(PyExc_TypeError,
-                                "character mapping must return integer, None or str");
-                Py_DECREF(x);
-                goto onError;
+                writer->overallocate = 1;
+                if (_PyUnicodeWriter_WriteStr(writer, item) == -1)
+                    goto onError;
             }
-            Py_DECREF(x);
-            ++s;
-            continue;
+        }
+        else {
+            /* wrong return value */
+            PyErr_SetString(PyExc_TypeError,
+                            "character mapping must return integer, None or str");
+            goto onError;
+        }
+        Py_CLEAR(item);
+        ++s;
+        continue;
+
 Undefined:
-            /* undefined mapping */
-            Py_XDECREF(x);
-            startinpos = s-starts;
-            endinpos = startinpos+1;
-            if (unicode_decode_call_errorhandler(
-                    errors, &errorHandler,
-                    "charmap", "character maps to <undefined>",
-                    &starts, &e, &startinpos, &endinpos, &exc, &s,
-                    &v, &outpos)) {
-                goto onError;
-            }
+        /* undefined mapping */
+        Py_CLEAR(item);
+        startinpos = s-starts;
+        endinpos = startinpos+1;
+        if (unicode_decode_call_errorhandler_writer(
+                errors, &errorHandler,
+                "charmap", "character maps to <undefined>",
+                &starts, &e, &startinpos, &endinpos, &exc, &s,
+                writer)) {
+            goto onError;
         }
     }
-    if (unicode_resize(&v, outpos) < 0)
-        goto onError;
     Py_XDECREF(errorHandler);
     Py_XDECREF(exc);
-    return unicode_result(v);
+    return 0;
 
-  onError:
+onError:
+    Py_XDECREF(item);
     Py_XDECREF(errorHandler);
     Py_XDECREF(exc);
-    Py_XDECREF(v);
+    return -1;
+}
+
+PyObject *
+PyUnicode_DecodeCharmap(const char *s,
+                        Py_ssize_t size,
+                        PyObject *mapping,
+                        const char *errors)
+{
+    _PyUnicodeWriter writer;
+
+    /* Default to Latin-1 */
+    if (mapping == NULL)
+        return PyUnicode_DecodeLatin1(s, size, errors);
+
+    if (size == 0)
+        _Py_RETURN_UNICODE_EMPTY();
+    _PyUnicodeWriter_Init(&writer);
+    writer.min_length = size;
+    if (_PyUnicodeWriter_Prepare(&writer, writer.min_length, 127) == -1)
+        goto onError;
+
+    if (PyUnicode_CheckExact(mapping)) {
+        if (charmap_decode_string(s, size, mapping, errors, &writer) < 0)
+            goto onError;
+    }
+    else {
+        if (charmap_decode_mapping(s, size, mapping, errors, &writer) < 0)
+            goto onError;
+    }
+    return _PyUnicodeWriter_Finish(&writer);
+
+  onError:
+    _PyUnicodeWriter_Dealloc(&writer);
     return NULL;
 }
 
@@ -8118,10 +8299,14 @@ _PyUnicode_EncodeCharmap(PyObject *unicode,
      * -1=not initialized, 0=unknown, 1=strict, 2=replace,
      * 3=ignore, 4=xmlcharrefreplace */
     int known_errorHandler = -1;
+    void *data;
+    int kind;
 
     if (PyUnicode_READY(unicode) == -1)
         return NULL;
     size = PyUnicode_GET_LENGTH(unicode);
+    data = PyUnicode_DATA(unicode);
+    kind = PyUnicode_KIND(unicode);
 
     /* Default to Latin-1 */
     if (mapping == NULL)
@@ -8136,7 +8321,7 @@ _PyUnicode_EncodeCharmap(PyObject *unicode,
         return res;
 
     while (inpos<size) {
-        Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, inpos);
+        Py_UCS4 ch = PyUnicode_READ(kind, data, inpos);
         /* try to encode it */
         charmapencode_result x = charmapencode_output(ch, mapping, &res, &respos);
         if (x==enc_EXCEPTION) /* error */
@@ -8222,19 +8407,6 @@ make_translate_exception(PyObject **exceptionObject,
     }
 }
 
-/* raises a UnicodeTranslateError */
-static void
-raise_translate_exception(PyObject **exceptionObject,
-                          PyObject *unicode,
-                          Py_ssize_t startpos, Py_ssize_t endpos,
-                          const char *reason)
-{
-    make_translate_exception(exceptionObject,
-                             unicode, startpos, endpos, reason);
-    if (*exceptionObject != NULL)
-        PyCodec_StrictErrors(*exceptionObject);
-}
-
 /* error handling callback helper:
    build arguments, call the callback and check the arguments,
    put the result into newpos and return the replacement string, which
@@ -8510,8 +8682,10 @@ _PyUnicode_TranslateCharmap(PyObject *input,
             }
             switch (known_errorHandler) {
             case 1: /* strict */
-                raise_translate_exception(&exc, input, collstart,
-                                          collend, reason);
+                make_translate_exception(&exc,
+                                         input, collstart, collend, reason);
+                if (exc != NULL)
+                    PyCodec_StrictErrors(exc);
                 goto onError;
             case 2: /* replace */
                 /* No need to check for space, this is a 1:1 replacement */
@@ -9103,7 +9277,7 @@ tailmatch(PyObject *self,
 
     if (PyUnicode_READY(self) == -1 ||
         PyUnicode_READY(substring) == -1)
-        return 0;
+        return -1;
 
     if (PyUnicode_GET_LENGTH(substring) == 0)
         return 1;
@@ -9141,7 +9315,6 @@ tailmatch(PyObject *self,
             /* We do not need to compare 0 and len(substring)-1 because
                the if statement above ensured already that they are equal
                when we end up here. */
-            /* TODO: honor direction and do a forward or backwards search */
             for (i = 1; i < end_sub; ++i) {
                 if (PyUnicode_READ(kind_self, data_self, offset + i) !=
                     PyUnicode_READ(kind_sub, data_sub, i))
@@ -9603,41 +9776,49 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
             sep_data = PyUnicode_1BYTE_DATA(sep);
     }
 #endif
-    for (i = 0, res_offset = 0; i < seqlen; ++i) {
-        Py_ssize_t itemlen;
-        item = items[i];
-        /* Copy item, and maybe the separator. */
-        if (i && seplen != 0) {
-            if (use_memcpy) {
+    if (use_memcpy) {
+        for (i = 0; i < seqlen; ++i) {
+            Py_ssize_t itemlen;
+            item = items[i];
+
+            /* Copy item, and maybe the separator. */
+            if (i && seplen != 0) {
                 Py_MEMCPY(res_data,
                           sep_data,
                           kind * seplen);
                 res_data += kind * seplen;
             }
-            else {
-                _PyUnicode_FastCopyCharacters(res, res_offset, sep, 0, seplen);
-                res_offset += seplen;
-            }
-        }
-        itemlen = PyUnicode_GET_LENGTH(item);
-        if (itemlen != 0) {
-            if (use_memcpy) {
+
+            itemlen = PyUnicode_GET_LENGTH(item);
+            if (itemlen != 0) {
                 Py_MEMCPY(res_data,
                           PyUnicode_DATA(item),
                           kind * itemlen);
                 res_data += kind * itemlen;
             }
-            else {
+        }
+        assert(res_data == PyUnicode_1BYTE_DATA(res)
+                           + kind * PyUnicode_GET_LENGTH(res));
+    }
+    else {
+        for (i = 0, res_offset = 0; i < seqlen; ++i) {
+            Py_ssize_t itemlen;
+            item = items[i];
+
+            /* Copy item, and maybe the separator. */
+            if (i && seplen != 0) {
+                _PyUnicode_FastCopyCharacters(res, res_offset, sep, 0, seplen);
+                res_offset += seplen;
+            }
+
+            itemlen = PyUnicode_GET_LENGTH(item);
+            if (itemlen != 0) {
                 _PyUnicode_FastCopyCharacters(res, res_offset, item, 0, itemlen);
                 res_offset += itemlen;
             }
         }
-    }
-    if (use_memcpy)
-        assert(res_data == PyUnicode_1BYTE_DATA(res)
-                           + kind * PyUnicode_GET_LENGTH(res));
-    else
         assert(res_offset == PyUnicode_GET_LENGTH(res));
+    }
 
     Py_DECREF(fseq);
     Py_XDECREF(sep);
@@ -10029,6 +10210,31 @@ anylib_count(int kind, PyObject *sstr, void* sbuf, Py_ssize_t slen,
     return 0;
 }
 
+static void
+replace_1char_inplace(PyObject *u, Py_ssize_t pos,
+                      Py_UCS4 u1, Py_UCS4 u2, Py_ssize_t maxcount)
+{
+    int kind = PyUnicode_KIND(u);
+    void *data = PyUnicode_DATA(u);
+    Py_ssize_t len = PyUnicode_GET_LENGTH(u);
+    if (kind == PyUnicode_1BYTE_KIND) {
+        ucs1lib_replace_1char_inplace((Py_UCS1 *)data + pos,
+                                      (Py_UCS1 *)data + len,
+                                      u1, u2, maxcount);
+    }
+    else if (kind == PyUnicode_2BYTE_KIND) {
+        ucs2lib_replace_1char_inplace((Py_UCS2 *)data + pos,
+                                      (Py_UCS2 *)data + len,
+                                      u1, u2, maxcount);
+    }
+    else {
+        assert(kind == PyUnicode_4BYTE_KIND);
+        ucs4lib_replace_1char_inplace((Py_UCS4 *)data + pos,
+                                      (Py_UCS4 *)data + len,
+                                      u1, u2, maxcount);
+    }
+}
+
 static PyObject *
 replace(PyObject *self, PyObject *str1,
         PyObject *str2, Py_ssize_t maxcount)
@@ -10045,7 +10251,7 @@ replace(PyObject *self, PyObject *str1,
     Py_ssize_t len1 = PyUnicode_GET_LENGTH(str1);
     Py_ssize_t len2 = PyUnicode_GET_LENGTH(str2);
     int mayshrink;
-    Py_UCS4 maxchar, maxchar_str2;
+    Py_UCS4 maxchar, maxchar_str1, maxchar_str2;
 
     if (maxcount < 0)
         maxcount = PY_SSIZE_T_MAX;
@@ -10054,15 +10260,16 @@ replace(PyObject *self, PyObject *str1,
 
     if (str1 == str2)
         goto nothing;
-    if (skind < kind1)
-        /* substring too wide to be present */
-        goto nothing;
 
     maxchar = PyUnicode_MAX_CHAR_VALUE(self);
+    maxchar_str1 = PyUnicode_MAX_CHAR_VALUE(str1);
+    if (maxchar < maxchar_str1)
+        /* substring too wide to be present */
+        goto nothing;
     maxchar_str2 = PyUnicode_MAX_CHAR_VALUE(str2);
     /* Replacing str1 with str2 may cause a maxchar reduction in the
        result string. */
-    mayshrink = (maxchar_str2 < maxchar);
+    mayshrink = (maxchar_str2 < maxchar_str1) && (maxchar == maxchar_str1);
     maxchar = Py_MAX(maxchar, maxchar_str2);
 
     if (len1 == len2) {
@@ -10072,35 +10279,19 @@ replace(PyObject *self, PyObject *str1,
         if (len1 == 1) {
             /* replace characters */
             Py_UCS4 u1, u2;
-            int rkind;
-            Py_ssize_t index, pos;
-            char *src;
+            Py_ssize_t pos;
 
-            u1 = PyUnicode_READ_CHAR(str1, 0);
-            pos = findchar(sbuf, PyUnicode_KIND(self), slen, u1, 1);
+            u1 = PyUnicode_READ(kind1, buf1, 0);
+            pos = findchar(sbuf, skind, slen, u1, 1);
             if (pos < 0)
                 goto nothing;
-            u2 = PyUnicode_READ_CHAR(str2, 0);
+            u2 = PyUnicode_READ(kind2, buf2, 0);
             u = PyUnicode_New(slen, maxchar);
             if (!u)
                 goto error;
-            _PyUnicode_FastCopyCharacters(u, 0, self, 0, slen);
-            rkind = PyUnicode_KIND(u);
 
-            PyUnicode_WRITE(rkind, PyUnicode_DATA(u), pos, u2);
-            index = 0;
-            src = sbuf;
-            while (--maxcount)
-            {
-                pos++;
-                src += pos * PyUnicode_KIND(self);
-                slen -= pos;
-                index += pos;
-                pos = findchar(src, PyUnicode_KIND(self), slen, u1, 1);
-                if (pos < 0)
-                    break;
-                PyUnicode_WRITE(rkind, PyUnicode_DATA(u), index + pos, u2);
-            }
+            _PyUnicode_FastCopyCharacters(u, 0, self, 0, slen);
+            replace_1char_inplace(u, pos, u1, u2, maxcount);
         }
         else {
             int rkind = skind;
@@ -10412,9 +10603,24 @@ unicode_center(PyObject *self, PyObject *args)
 static int
 unicode_compare(PyObject *str1, PyObject *str2)
 {
+#define COMPARE(TYPE1, TYPE2) \
+    do { \
+        TYPE1* p1 = (TYPE1 *)data1; \
+        TYPE2* p2 = (TYPE2 *)data2; \
+        TYPE1* end = p1 + len; \
+        Py_UCS4 c1, c2; \
+        for (; p1 != end; p1++, p2++) { \
+            c1 = *p1; \
+            c2 = *p2; \
+            if (c1 != c2) \
+                return (c1 < c2) ? -1 : 1; \
+        } \
+    } \
+    while (0)
+
     int kind1, kind2;
     void *data1, *data2;
-    Py_ssize_t len1, len2, i;
+    Py_ssize_t len1, len2, len;
 
     kind1 = PyUnicode_KIND(str1);
     kind2 = PyUnicode_KIND(str2);
@@ -10422,19 +10628,116 @@ unicode_compare(PyObject *str1, PyObject *str2)
     data2 = PyUnicode_DATA(str2);
     len1 = PyUnicode_GET_LENGTH(str1);
     len2 = PyUnicode_GET_LENGTH(str2);
+    len = Py_MIN(len1, len2);
 
-    for (i = 0; i < len1 && i < len2; ++i) {
-        Py_UCS4 c1, c2;
-        c1 = PyUnicode_READ(kind1, data1, i);
-        c2 = PyUnicode_READ(kind2, data2, i);
-
-        if (c1 != c2)
-            return (c1 < c2) ? -1 : 1;
+    switch(kind1) {
+    case PyUnicode_1BYTE_KIND:
+    {
+        switch(kind2) {
+        case PyUnicode_1BYTE_KIND:
+        {
+            int cmp = memcmp(data1, data2, len);
+            /* normalize result of memcmp() into the range [-1; 1] */
+            if (cmp < 0)
+                return -1;
+            if (cmp > 0)
+                return 1;
+            break;
+        }
+        case PyUnicode_2BYTE_KIND:
+            COMPARE(Py_UCS1, Py_UCS2);
+            break;
+        case PyUnicode_4BYTE_KIND:
+            COMPARE(Py_UCS1, Py_UCS4);
+            break;
+        default:
+            assert(0);
+        }
+        break;
+    }
+    case PyUnicode_2BYTE_KIND:
+    {
+        switch(kind2) {
+        case PyUnicode_1BYTE_KIND:
+            COMPARE(Py_UCS2, Py_UCS1);
+            break;
+        case PyUnicode_2BYTE_KIND:
+        {
+            COMPARE(Py_UCS2, Py_UCS2);
+            break;
+        }
+        case PyUnicode_4BYTE_KIND:
+            COMPARE(Py_UCS2, Py_UCS4);
+            break;
+        default:
+            assert(0);
+        }
+        break;
+    }
+    case PyUnicode_4BYTE_KIND:
+    {
+        switch(kind2) {
+        case PyUnicode_1BYTE_KIND:
+            COMPARE(Py_UCS4, Py_UCS1);
+            break;
+        case PyUnicode_2BYTE_KIND:
+            COMPARE(Py_UCS4, Py_UCS2);
+            break;
+        case PyUnicode_4BYTE_KIND:
+        {
+#if defined(HAVE_WMEMCMP) && SIZEOF_WCHAR_T == 4
+            int cmp = wmemcmp((wchar_t *)data1, (wchar_t *)data2, len);
+            /* normalize result of wmemcmp() into the range [-1; 1] */
+            if (cmp < 0)
+                return -1;
+            if (cmp > 0)
+                return 1;
+#else
+            COMPARE(Py_UCS4, Py_UCS4);
+#endif
+            break;
+        }
+        default:
+            assert(0);
+        }
+        break;
+    }
+    default:
+        assert(0);
     }
 
-    return (len1 < len2) ? -1 : (len1 != len2);
+    if (len1 == len2)
+        return 0;
+    if (len1 < len2)
+        return -1;
+    else
+        return 1;
+
+#undef COMPARE
 }
 
+Py_LOCAL(int)
+unicode_compare_eq(PyObject *str1, PyObject *str2)
+{
+    int kind;
+    void *data1, *data2;
+    Py_ssize_t len;
+    int cmp;
+
+    len = PyUnicode_GET_LENGTH(str1);
+    if (PyUnicode_GET_LENGTH(str2) != len)
+        return 0;
+    kind = PyUnicode_KIND(str1);
+    if (PyUnicode_KIND(str2) != kind)
+        return 0;
+    data1 = PyUnicode_DATA(str1);
+    data2 = PyUnicode_DATA(str2);
+
+    cmp = memcmp(data1, data2, len * kind);
+    return (cmp == 0);
+}
+
+
 int
 PyUnicode_Compare(PyObject *left, PyObject *right)
 {
@@ -10442,6 +10745,11 @@ PyUnicode_Compare(PyObject *left, PyObject *right)
         if (PyUnicode_READY(left) == -1 ||
             PyUnicode_READY(right) == -1)
             return -1;
+
+        /* a string is equal to itself */
+        if (left == right)
+            return 0;
+
         return unicode_compare(left, right);
     }
     PyErr_Format(PyExc_TypeError,
@@ -10452,29 +10760,59 @@ PyUnicode_Compare(PyObject *left, PyObject *right)
 }
 
 int
+_PyUnicode_CompareWithId(PyObject *left, _Py_Identifier *right)
+{
+    PyObject *right_str = _PyUnicode_FromId(right);   /* borrowed */
+    if (right_str == NULL)
+        return -1;
+    return PyUnicode_Compare(left, right_str);
+}
+
+int
 PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str)
 {
     Py_ssize_t i;
     int kind;
-    void *data;
     Py_UCS4 chr;
 
     assert(_PyUnicode_CHECK(uni));
     if (PyUnicode_READY(uni) == -1)
         return -1;
     kind = PyUnicode_KIND(uni);
-    data = PyUnicode_DATA(uni);
-    /* Compare Unicode string and source character set string */
-    for (i = 0; (chr = PyUnicode_READ(kind, data, i)) && str[i]; i++)
-        if (chr != str[i])
-            return (chr < (unsigned char)(str[i])) ? -1 : 1;
-    /* This check keeps Python strings that end in '\0' from comparing equal
-     to C strings identical up to that point. */
-    if (PyUnicode_GET_LENGTH(uni) != i || chr)
-        return 1; /* uni is longer */
-    if (str[i])
-        return -1; /* str is longer */
-    return 0;
+    if (kind == PyUnicode_1BYTE_KIND) {
+        const void *data = PyUnicode_1BYTE_DATA(uni);
+        size_t len1 = (size_t)PyUnicode_GET_LENGTH(uni);
+        size_t len, len2 = strlen(str);
+        int cmp;
+
+        len = Py_MIN(len1, len2);
+        cmp = memcmp(data, str, len);
+        if (cmp != 0) {
+            if (cmp < 0)
+                return -1;
+            else
+                return 1;
+        }
+        if (len1 > len2)
+            return 1; /* uni is longer */
+        if (len2 > len1)
+            return -1; /* str is longer */
+        return 0;
+    }
+    else {
+        void *data = PyUnicode_DATA(uni);
+        /* Compare Unicode string and source character set string */
+        for (i = 0; (chr = PyUnicode_READ(kind, data, i)) && str[i]; i++)
+            if (chr != str[i])
+                return (chr < (unsigned char)(str[i])) ? -1 : 1;
+        /* This check keeps Python strings that end in '\0' from comparing equal
+         to C strings identical up to that point. */
+        if (PyUnicode_GET_LENGTH(uni) != i || chr)
+            return 1; /* uni is longer */
+        if (str[i])
+            return -1; /* str is longer */
+        return 0;
+    }
 }
 
 
@@ -10485,36 +10823,43 @@ PyObject *
 PyUnicode_RichCompare(PyObject *left, PyObject *right, int op)
 {
     int result;
+    PyObject *v;
 
-    if (PyUnicode_Check(left) && PyUnicode_Check(right)) {
-        PyObject *v;
-        if (PyUnicode_READY(left) == -1 ||
-            PyUnicode_READY(right) == -1)
-            return NULL;
-        if (PyUnicode_GET_LENGTH(left) != PyUnicode_GET_LENGTH(right) ||
-            PyUnicode_KIND(left) != PyUnicode_KIND(right)) {
-            if (op == Py_EQ) {
-                Py_INCREF(Py_False);
-                return Py_False;
-            }
-            if (op == Py_NE) {
-                Py_INCREF(Py_True);
-                return Py_True;
-            }
-        }
-        if (left == right)
-            result = 0;
-        else
-            result = unicode_compare(left, right);
+    if (!PyUnicode_Check(left) || !PyUnicode_Check(right))
+        Py_RETURN_NOTIMPLEMENTED;
 
-        /* Convert the return value to a Boolean */
+    if (PyUnicode_READY(left) == -1 ||
+        PyUnicode_READY(right) == -1)
+        return NULL;
+
+    if (left == right) {
         switch (op) {
         case Py_EQ:
-            v = TEST_COND(result == 0);
+        case Py_LE:
+        case Py_GE:
+            /* a string is equal to itself */
+            v = Py_True;
             break;
         case Py_NE:
-            v = TEST_COND(result != 0);
+        case Py_LT:
+        case Py_GT:
+            v = Py_False;
             break;
+        default:
+            PyErr_BadArgument();
+            return NULL;
+        }
+    }
+    else if (op == Py_EQ || op == Py_NE) {
+        result = unicode_compare_eq(left, right);
+        result ^= (op == Py_NE);
+        v = TEST_COND(result);
+    }
+    else {
+        result = unicode_compare(left, right);
+
+        /* Convert the return value to a Boolean */
+        switch (op) {
         case Py_LE:
             v = TEST_COND(result <= 0);
             break;
@@ -10531,18 +10876,16 @@ PyUnicode_RichCompare(PyObject *left, PyObject *right, int op)
             PyErr_BadArgument();
             return NULL;
         }
-        Py_INCREF(v);
-        return v;
     }
-
-    Py_RETURN_NOTIMPLEMENTED;
+    Py_INCREF(v);
+    return v;
 }
 
 int
 PyUnicode_Contains(PyObject *container, PyObject *element)
 {
     PyObject *str, *sub;
-    int kind1, kind2, kind;
+    int kind1, kind2;
     void *buf1, *buf2;
     Py_ssize_t len1, len2;
     int result;
@@ -10561,23 +10904,18 @@ PyUnicode_Contains(PyObject *container, PyObject *element)
         Py_DECREF(sub);
         return -1;
     }
-    if (PyUnicode_READY(sub) == -1 || PyUnicode_READY(str) == -1) {
-        Py_DECREF(sub);
-        Py_DECREF(str);
-    }
 
     kind1 = PyUnicode_KIND(str);
     kind2 = PyUnicode_KIND(sub);
-    kind = kind1;
     buf1 = PyUnicode_DATA(str);
     buf2 = PyUnicode_DATA(sub);
-    if (kind2 != kind) {
-        if (kind2 > kind) {
+    if (kind2 != kind1) {
+        if (kind2 > kind1) {
             Py_DECREF(sub);
             Py_DECREF(str);
             return 0;
         }
-        buf2 = _PyUnicode_AsKind(sub, kind);
+        buf2 = _PyUnicode_AsKind(sub, kind1);
     }
     if (!buf2) {
         Py_DECREF(sub);
@@ -10587,7 +10925,7 @@ PyUnicode_Contains(PyObject *container, PyObject *element)
     len1 = PyUnicode_GET_LENGTH(str);
     len2 = PyUnicode_GET_LENGTH(sub);
 
-    switch (kind) {
+    switch (kind1) {
     case PyUnicode_1BYTE_KIND:
         result = ucs1lib_find(buf1, len1, buf2, len2, 0) != -1;
         break;
@@ -10605,7 +10943,7 @@ PyUnicode_Contains(PyObject *container, PyObject *element)
     Py_DECREF(str);
     Py_DECREF(sub);
 
-    if (kind2 != kind)
+    if (kind2 != kind1)
         PyMem_Free(buf2);
 
     return result;
@@ -10681,7 +11019,8 @@ PyUnicode_Append(PyObject **p_left, PyObject *right)
         return;
     }
     left = *p_left;
-    if (right == NULL || left == NULL || !PyUnicode_Check(left)) {
+    if (right == NULL || left == NULL
+        || !PyUnicode_Check(left) || !PyUnicode_Check(right)) {
         if (!PyErr_Occurred())
             PyErr_BadInternalCall();
         goto error;
@@ -10721,15 +11060,9 @@ PyUnicode_Append(PyObject **p_left, PyObject *right)
         && !(PyUnicode_IS_ASCII(left) && !PyUnicode_IS_ASCII(right)))
     {
         /* append inplace */
-        if (unicode_resize(p_left, new_len) != 0) {
-            /* XXX if _PyUnicode_Resize() fails, 'left' has been
-             * deallocated so it cannot be put back into
-             * 'variable'.  The MemoryError is raised when there
-             * is no value in 'variable', which might (very
-             * remotely) be a cause of incompatibilities.
-             */
+        if (unicode_resize(p_left, new_len) != 0)
             goto error;
-        }
+
         /* copy 'right' into the newly allocated area of 'left' */
         _PyUnicode_FastCopyCharacters(*p_left, left_len, right, 0, right_len);
     }
@@ -10785,8 +11118,10 @@ unicode_count(PyObject *self, PyObject *args)
 
     kind1 = PyUnicode_KIND(self);
     kind2 = PyUnicode_KIND(substring);
-    if (kind2 > kind1)
+    if (kind2 > kind1) {
+        Py_DECREF(substring);
         return PyLong_FromLong(0);
+    }
     kind = kind1;
     buf1 = PyUnicode_DATA(self);
     buf2 = PyUnicode_DATA(substring);
@@ -10857,23 +11192,25 @@ unicode_encode(PyObject *self, PyObject *args, PyObject *kwargs)
 }
 
 PyDoc_STRVAR(expandtabs__doc__,
-             "S.expandtabs([tabsize]) -> str\n\
+             "S.expandtabs(tabsize=8) -> str\n\
 \n\
 Return a copy of S where all tab characters are expanded using spaces.\n\
 If tabsize is not given, a tab size of 8 characters is assumed.");
 
 static PyObject*
-unicode_expandtabs(PyObject *self, PyObject *args)
+unicode_expandtabs(PyObject *self, PyObject *args, PyObject *kwds)
 {
     Py_ssize_t i, j, line_pos, src_len, incr;
     Py_UCS4 ch;
     PyObject *u;
     void *src_data, *dest_data;
+    static char *kwlist[] = {"tabsize", 0};
     int tabsize = 8;
     int kind;
     int found;
 
-    if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:expandtabs",
+                                     kwlist, &tabsize))
         return NULL;
 
     if (PyUnicode_READY(self) == -1)
@@ -10964,10 +11301,14 @@ unicode_find(PyObject *self, PyObject *args)
                                             &start, &end))
         return NULL;
 
-    if (PyUnicode_READY(self) == -1)
+    if (PyUnicode_READY(self) == -1) {
+        Py_DECREF(substring);
         return NULL;
-    if (PyUnicode_READY(substring) == -1)
+    }
+    if (PyUnicode_READY(substring) == -1) {
+        Py_DECREF(substring);
         return NULL;
+    }
 
     result = any_find_slice(1, self, substring, start, end);
 
@@ -10985,7 +11326,6 @@ unicode_getitem(PyObject *self, Py_ssize_t index)
     void *data;
     enum PyUnicode_Kind kind;
     Py_UCS4 ch;
-    PyObject *res;
 
     if (!PyUnicode_Check(self) || PyUnicode_READY(self) == -1) {
         PyErr_BadArgument();
@@ -10998,17 +11338,7 @@ unicode_getitem(PyObject *self, Py_ssize_t index)
     kind = PyUnicode_KIND(self);
     data = PyUnicode_DATA(self);
     ch = PyUnicode_READ(kind, data, index);
-    if (ch < 256)
-        return get_latin1_char(ch);
-
-    res = PyUnicode_New(1, ch);
-    if (res == NULL)
-        return NULL;
-    kind = PyUnicode_KIND(res);
-    data = PyUnicode_DATA(res);
-    PyUnicode_WRITE(kind, data, 0, ch);
-    assert(_PyUnicode_CheckConsistency(res, 1));
-    return res;
+    return unicode_char(ch);
 }
 
 /* Believe it or not, this produces the same value for ASCII strings
@@ -11035,43 +11365,11 @@ unicode_hash(PyObject *self)
         _PyUnicode_HASH(self) = 0;
         return 0;
     }
-
-    /* The hash function as a macro, gets expanded three times below. */
-#define HASH(P)                                            \
-    x ^= (Py_uhash_t) *P << 7;                             \
-    while (--len >= 0)                                     \
-        x = (_PyHASH_MULTIPLIER * x) ^ (Py_uhash_t) *P++;  \
-
-    x = (Py_uhash_t) _Py_HashSecret.prefix;
-    switch (PyUnicode_KIND(self)) {
-    case PyUnicode_1BYTE_KIND: {
-        const unsigned char *c = PyUnicode_1BYTE_DATA(self);
-        HASH(c);
-        break;
-    }
-    case PyUnicode_2BYTE_KIND: {
-        const Py_UCS2 *s = PyUnicode_2BYTE_DATA(self);
-        HASH(s);
-        break;
-    }
-    default: {
-        Py_UCS4 *l;
-        assert(PyUnicode_KIND(self) == PyUnicode_4BYTE_KIND &&
-               "Impossible switch case in unicode_hash");
-        l = PyUnicode_4BYTE_DATA(self);
-        HASH(l);
-        break;
-    }
-    }
-    x ^= (Py_uhash_t) PyUnicode_GET_LENGTH(self);
-    x ^= (Py_uhash_t) _Py_HashSecret.suffix;
-
-    if (x == -1)
-        x = -2;
+    x = _Py_HashBytes(PyUnicode_DATA(self),
+                      PyUnicode_GET_LENGTH(self) * PyUnicode_KIND(self));
     _PyUnicode_HASH(self) = x;
     return x;
 }
-#undef HASH
 
 PyDoc_STRVAR(index__doc__,
              "S.index(sub[, start[, end]]) -> int\n\
@@ -11090,10 +11388,14 @@ unicode_index(PyObject *self, PyObject *args)
                                             &start, &end))
         return NULL;
 
-    if (PyUnicode_READY(self) == -1)
+    if (PyUnicode_READY(self) == -1) {
+        Py_DECREF(substring);
         return NULL;
-    if (PyUnicode_READY(substring) == -1)
+    }
+    if (PyUnicode_READY(substring) == -1) {
+        Py_DECREF(substring);
         return NULL;
+    }
 
     result = any_find_slice(1, self, substring, start, end);
 
@@ -11623,6 +11925,7 @@ _PyUnicode_XStrip(PyObject *self, int striptype, PyObject *sepobj)
     int kind;
     Py_ssize_t i, j, len;
     BLOOM_MASK sepmask;
+    Py_ssize_t seplen;
 
     if (PyUnicode_READY(self) == -1 || PyUnicode_READY(sepobj) == -1)
         return NULL;
@@ -11630,24 +11933,35 @@ _PyUnicode_XStrip(PyObject *self, int striptype, PyObject *sepobj)
     kind = PyUnicode_KIND(self);
     data = PyUnicode_DATA(self);
     len = PyUnicode_GET_LENGTH(self);
+    seplen = PyUnicode_GET_LENGTH(sepobj);
     sepmask = make_bloom_mask(PyUnicode_KIND(sepobj),
                               PyUnicode_DATA(sepobj),
-                              PyUnicode_GET_LENGTH(sepobj));
+                              seplen);
 
     i = 0;
     if (striptype != RIGHTSTRIP) {
-        while (i < len &&
-               BLOOM_MEMBER(sepmask, PyUnicode_READ(kind, data, i), sepobj)) {
+        while (i < len) {
+            Py_UCS4 ch = PyUnicode_READ(kind, data, i);
+            if (!BLOOM(sepmask, ch))
+                break;
+            if (PyUnicode_FindChar(sepobj, ch, 0, seplen, 1) < 0)
+                break;
             i++;
         }
     }
 
     j = len;
     if (striptype != LEFTSTRIP) {
-        do {
+        j--;
+        while (j >= i) {
+            Py_UCS4 ch = PyUnicode_READ(kind, data, j);
+            if (!BLOOM(sepmask, ch))
+                break;
+            if (PyUnicode_FindChar(sepobj, ch, 0, seplen, 1) < 0)
+                break;
             j--;
-        } while (j >= i &&
-                 BLOOM_MEMBER(sepmask, PyUnicode_READ(kind, data, j), sepobj));
+        }
+
         j++;
     }
 
@@ -11694,30 +12008,63 @@ PyUnicode_Substring(PyObject *self, Py_ssize_t start, Py_ssize_t end)
 static PyObject *
 do_strip(PyObject *self, int striptype)
 {
-    int kind;
-    void *data;
     Py_ssize_t len, i, j;
 
     if (PyUnicode_READY(self) == -1)
         return NULL;
 
-    kind = PyUnicode_KIND(self);
-    data = PyUnicode_DATA(self);
     len = PyUnicode_GET_LENGTH(self);
 
-    i = 0;
-    if (striptype != RIGHTSTRIP) {
-        while (i < len && Py_UNICODE_ISSPACE(PyUnicode_READ(kind, data, i))) {
-            i++;
+    if (PyUnicode_IS_ASCII(self)) {
+        Py_UCS1 *data = PyUnicode_1BYTE_DATA(self);
+
+        i = 0;
+        if (striptype != RIGHTSTRIP) {
+            while (i < len) {
+                Py_UCS1 ch = data[i];
+                if (!_Py_ascii_whitespace[ch])
+                    break;
+                i++;
+            }
+        }
+
+        j = len;
+        if (striptype != LEFTSTRIP) {
+            j--;
+            while (j >= i) {
+                Py_UCS1 ch = data[j];
+                if (!_Py_ascii_whitespace[ch])
+                    break;
+                j--;
+            }
+            j++;
         }
     }
+    else {
+        int kind = PyUnicode_KIND(self);
+        void *data = PyUnicode_DATA(self);
 
-    j = len;
-    if (striptype != LEFTSTRIP) {
-        do {
+        i = 0;
+        if (striptype != RIGHTSTRIP) {
+            while (i < len) {
+                Py_UCS4 ch = PyUnicode_READ(kind, data, i);
+                if (!Py_UNICODE_ISSPACE(ch))
+                    break;
+                i++;
+            }
+        }
+
+        j = len;
+        if (striptype != LEFTSTRIP) {
             j--;
-        } while (j >= i && Py_UNICODE_ISSPACE(PyUnicode_READ(kind, data, j)));
-        j++;
+            while (j >= i) {
+                Py_UCS4 ch = PyUnicode_READ(kind, data, j);
+                if (!Py_UNICODE_ISSPACE(ch))
+                    break;
+                j--;
+            }
+            j++;
+        }
     }
 
     return PyUnicode_Substring(self, i, j);
@@ -11729,7 +12076,7 @@ do_argstrip(PyObject *self, int striptype, PyObject *args)
 {
     PyObject *sep = NULL;
 
-    if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
+    if (!PyArg_ParseTuple(args, stripformat[striptype], &sep))
         return NULL;
 
     if (sep != NULL && sep != Py_None) {
@@ -11941,7 +12288,7 @@ unicode_repr(PyObject *unicode)
     Py_ssize_t isize;
     Py_ssize_t osize, squote, dquote, i, o;
     Py_UCS4 max, quote;
-    int ikind, okind;
+    int ikind, okind, unchanged;
     void *idata, *odata;
 
     if (PyUnicode_READY(unicode) == -1)
@@ -11952,7 +12299,7 @@ unicode_repr(PyObject *unicode)
 
     /* Compute length of output, quote characters, and
        maximum character */
-    osize = 2; /* quotes */
+    osize = 0;
     max = 127;
     squote = dquote = 0;
     ikind = PyUnicode_KIND(unicode);
@@ -11983,7 +12330,9 @@ unicode_repr(PyObject *unicode)
     }
 
     quote = '\'';
+    unchanged = (osize == isize);
     if (squote) {
+        unchanged = 0;
         if (dquote)
             /* Both squote and dquote present. Use squote,
                and escape them */
@@ -11991,6 +12340,7 @@ unicode_repr(PyObject *unicode)
         else
             quote = '"';
     }
+    osize += 2;   /* quotes */
 
     repr = PyUnicode_New(osize, max);
     if (repr == NULL)
@@ -12000,82 +12350,88 @@ unicode_repr(PyObject *unicode)
 
     PyUnicode_WRITE(okind, odata, 0, quote);
     PyUnicode_WRITE(okind, odata, osize-1, quote);
+    if (unchanged) {
+        _PyUnicode_FastCopyCharacters(repr, 1,
+                                      unicode, 0,
+                                      isize);
+    }
+    else {
+        for (i = 0, o = 1; i < isize; i++) {
+            Py_UCS4 ch = PyUnicode_READ(ikind, idata, i);
 
-    for (i = 0, o = 1; i < isize; i++) {
-        Py_UCS4 ch = PyUnicode_READ(ikind, idata, i);
-
-        /* Escape quotes and backslashes */
-        if ((ch == quote) || (ch == '\\')) {
-            PyUnicode_WRITE(okind, odata, o++, '\\');
-            PyUnicode_WRITE(okind, odata, o++, ch);
-            continue;
-        }
+            /* Escape quotes and backslashes */
+            if ((ch == quote) || (ch == '\\')) {
+                PyUnicode_WRITE(okind, odata, o++, '\\');
+                PyUnicode_WRITE(okind, odata, o++, ch);
+                continue;
+            }
 
-        /* Map special whitespace to '\t', \n', '\r' */
-        if (ch == '\t') {
-            PyUnicode_WRITE(okind, odata, o++, '\\');
-            PyUnicode_WRITE(okind, odata, o++, 't');
-        }
-        else if (ch == '\n') {
-            PyUnicode_WRITE(okind, odata, o++, '\\');
-            PyUnicode_WRITE(okind, odata, o++, 'n');
-        }
-        else if (ch == '\r') {
-            PyUnicode_WRITE(okind, odata, o++, '\\');
-            PyUnicode_WRITE(okind, odata, o++, 'r');
-        }
+            /* Map special whitespace to '\t', \n', '\r' */
+            if (ch == '\t') {
+                PyUnicode_WRITE(okind, odata, o++, '\\');
+                PyUnicode_WRITE(okind, odata, o++, 't');
+            }
+            else if (ch == '\n') {
+                PyUnicode_WRITE(okind, odata, o++, '\\');
+                PyUnicode_WRITE(okind, odata, o++, 'n');
+            }
+            else if (ch == '\r') {
+                PyUnicode_WRITE(okind, odata, o++, '\\');
+                PyUnicode_WRITE(okind, odata, o++, 'r');
+            }
 
-        /* Map non-printable US ASCII to '\xhh' */
-        else if (ch < ' ' || ch == 0x7F) {
-            PyUnicode_WRITE(okind, odata, o++, '\\');
-            PyUnicode_WRITE(okind, odata, o++, 'x');
-            PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0x000F]);
-            PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0x000F]);
-        }
+            /* Map non-printable US ASCII to '\xhh' */
+            else if (ch < ' ' || ch == 0x7F) {
+                PyUnicode_WRITE(okind, odata, o++, '\\');
+                PyUnicode_WRITE(okind, odata, o++, 'x');
+                PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0x000F]);
+                PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0x000F]);
+            }
 
-        /* Copy ASCII characters as-is */
-        else if (ch < 0x7F) {
-            PyUnicode_WRITE(okind, odata, o++, ch);
-        }
+            /* Copy ASCII characters as-is */
+            else if (ch < 0x7F) {
+                PyUnicode_WRITE(okind, odata, o++, ch);
+            }
 
-        /* Non-ASCII characters */
-        else {
-            /* Map Unicode whitespace and control characters
-               (categories Z* and C* except ASCII space)
-            */
-            if (!Py_UNICODE_ISPRINTABLE(ch)) {
-                PyUnicode_WRITE(okind, odata, o++, '\\');
-                /* Map 8-bit characters to '\xhh' */
-                if (ch <= 0xff) {
-                    PyUnicode_WRITE(okind, odata, o++, 'x');
-                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0x000F]);
-                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0x000F]);
-                }
-                /* Map 16-bit characters to '\uxxxx' */
-                else if (ch <= 0xffff) {
-                    PyUnicode_WRITE(okind, odata, o++, 'u');
-                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 12) & 0xF]);
-                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 8) & 0xF]);
-                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0xF]);
-                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0xF]);
+            /* Non-ASCII characters */
+            else {
+                /* Map Unicode whitespace and control characters
+                   (categories Z* and C* except ASCII space)
+                */
+                if (!Py_UNICODE_ISPRINTABLE(ch)) {
+                    PyUnicode_WRITE(okind, odata, o++, '\\');
+                    /* Map 8-bit characters to '\xhh' */
+                    if (ch <= 0xff) {
+                        PyUnicode_WRITE(okind, odata, o++, 'x');
+                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0x000F]);
+                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0x000F]);
+                    }
+                    /* Map 16-bit characters to '\uxxxx' */
+                    else if (ch <= 0xffff) {
+                        PyUnicode_WRITE(okind, odata, o++, 'u');
+                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 12) & 0xF]);
+                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 8) & 0xF]);
+                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0xF]);
+                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0xF]);
+                    }
+                    /* Map 21-bit characters to '\U00xxxxxx' */
+                    else {
+                        PyUnicode_WRITE(okind, odata, o++, 'U');
+                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 28) & 0xF]);
+                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 24) & 0xF]);
+                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 20) & 0xF]);
+                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 16) & 0xF]);
+                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 12) & 0xF]);
+                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 8) & 0xF]);
+                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0xF]);
+                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0xF]);
+                    }
                 }
-                /* Map 21-bit characters to '\U00xxxxxx' */
+                /* Copy characters as-is */
                 else {
-                    PyUnicode_WRITE(okind, odata, o++, 'U');
-                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 28) & 0xF]);
-                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 24) & 0xF]);
-                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 20) & 0xF]);
-                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 16) & 0xF]);
-                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 12) & 0xF]);
-                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 8) & 0xF]);
-                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0xF]);
-                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0xF]);
+                    PyUnicode_WRITE(okind, odata, o++, ch);
                 }
             }
-            /* Copy characters as-is */
-            else {
-                PyUnicode_WRITE(okind, odata, o++, ch);
-            }
         }
     }
     /* Closing quote already added at the beginning */
@@ -12104,10 +12460,14 @@ unicode_rfind(PyObject *self, PyObject *args)
                                             &start, &end))
         return NULL;
 
-    if (PyUnicode_READY(self) == -1)
+    if (PyUnicode_READY(self) == -1) {
+        Py_DECREF(substring);
         return NULL;
-    if (PyUnicode_READY(substring) == -1)
+    }
+    if (PyUnicode_READY(substring) == -1) {
+        Py_DECREF(substring);
         return NULL;
+    }
 
     result = any_find_slice(-1, self, substring, start, end);
 
@@ -12136,10 +12496,14 @@ unicode_rindex(PyObject *self, PyObject *args)
                                             &start, &end))
         return NULL;
 
-    if (PyUnicode_READY(self) == -1)
+    if (PyUnicode_READY(self) == -1) {
+        Py_DECREF(substring);
         return NULL;
-    if (PyUnicode_READY(substring) == -1)
+    }
+    if (PyUnicode_READY(substring) == -1) {
+        Py_DECREF(substring);
         return NULL;
+    }
 
     result = any_find_slice(-1, self, substring, start, end);
 
@@ -12496,28 +12860,74 @@ unicode_swapcase(PyObject *self)
     return case_operation(self, do_swapcase);
 }
 
-PyDoc_STRVAR(maketrans__doc__,
-             "str.maketrans(x[, y[, z]]) -> dict (static method)\n\
-\n\
-Return a translation table usable for str.translate().\n\
-If there is only one argument, it must be a dictionary mapping Unicode\n\
-ordinals (integers) or characters to Unicode ordinals, strings or None.\n\
-Character keys will be then converted to ordinals.\n\
-If there are two arguments, they must be strings of equal length, and\n\
-in the resulting dictionary, each character in x will be mapped to the\n\
-character at the same position in y. If there is a third argument, it\n\
-must be a string, whose characters will be mapped to None in the result.");
+/*[clinic input]
 
-static PyObject*
-unicode_maketrans(PyObject *null, PyObject *args)
+@staticmethod
+str.maketrans as unicode_maketrans
+
+  x: object
+
+  y: unicode=NULL
+
+  z: unicode=NULL
+
+  /
+
+Return a translation table usable for str.translate().
+
+If there is only one argument, it must be a dictionary mapping Unicode
+ordinals (integers) or characters to Unicode ordinals, strings or None.
+Character keys will be then converted to ordinals.
+If there are two arguments, they must be strings of equal length, and
+in the resulting dictionary, each character in x will be mapped to the
+character at the same position in y. If there is a third argument, it
+must be a string, whose characters will be mapped to None in the result.
+[clinic start generated code]*/
+
+PyDoc_STRVAR(unicode_maketrans__doc__,
+"maketrans(x, y=None, z=None)\n"
+"Return a translation table usable for str.translate().\n"
+"\n"
+"If there is only one argument, it must be a dictionary mapping Unicode\n"
+"ordinals (integers) or characters to Unicode ordinals, strings or None.\n"
+"Character keys will be then converted to ordinals.\n"
+"If there are two arguments, they must be strings of equal length, and\n"
+"in the resulting dictionary, each character in x will be mapped to the\n"
+"character at the same position in y. If there is a third argument, it\n"
+"must be a string, whose characters will be mapped to None in the result.");
+
+#define UNICODE_MAKETRANS_METHODDEF    \
+    {"maketrans", (PyCFunction)unicode_maketrans, METH_VARARGS|METH_STATIC, unicode_maketrans__doc__},
+
+static PyObject *
+unicode_maketrans_impl(void *null, PyObject *x, PyObject *y, PyObject *z);
+
+static PyObject *
+unicode_maketrans(void *null, PyObject *args)
+{
+    PyObject *return_value = NULL;
+    PyObject *x;
+    PyObject *y = NULL;
+    PyObject *z = NULL;
+
+    if (!PyArg_ParseTuple(args,
+        "O|UU:maketrans",
+        &x, &y, &z))
+        goto exit;
+    return_value = unicode_maketrans_impl(null, x, y, z);
+
+exit:
+    return return_value;
+}
+
+static PyObject *
+unicode_maketrans_impl(void *null, PyObject *x, PyObject *y, PyObject *z)
+/*[clinic end generated code: checksum=7f76f414a0dfd0c614e0d4717872eeb520516da7]*/
 {
-    PyObject *x, *y = NULL, *z = NULL;
     PyObject *new = NULL, *key, *value;
     Py_ssize_t i = 0;
     int res;
 
-    if (!PyArg_ParseTuple(args, "O|UU:maketrans", &x, &y, &z))
-        return NULL;
     new = PyDict_New();
     if (!new)
         return NULL;
@@ -12728,6 +13138,8 @@ unicode_startswith(PyObject *self,
                 return NULL;
             result = tailmatch(self, substring, start, end, -1);
             Py_DECREF(substring);
+            if (result == -1)
+                return NULL;
             if (result) {
                 Py_RETURN_TRUE;
             }
@@ -12744,6 +13156,8 @@ unicode_startswith(PyObject *self,
     }
     result = tailmatch(self, substring, start, end, -1);
     Py_DECREF(substring);
+    if (result == -1)
+        return NULL;
     return PyBool_FromLong(result);
 }
 
@@ -12777,6 +13191,8 @@ unicode_endswith(PyObject *self,
                 return NULL;
             result = tailmatch(self, substring, start, end, +1);
             Py_DECREF(substring);
+            if (result == -1)
+                return NULL;
             if (result) {
                 Py_RETURN_TRUE;
             }
@@ -12792,33 +13208,48 @@ unicode_endswith(PyObject *self,
     }
     result = tailmatch(self, substring, start, end, +1);
     Py_DECREF(substring);
+    if (result == -1)
+        return NULL;
     return PyBool_FromLong(result);
 }
 
 Py_LOCAL_INLINE(void)
 _PyUnicodeWriter_Update(_PyUnicodeWriter *writer)
 {
-    writer->size = PyUnicode_GET_LENGTH(writer->buffer);
+    if (!writer->readonly)
+        writer->size = PyUnicode_GET_LENGTH(writer->buffer);
+    else {
+        /* Copy-on-write mode: set buffer size to 0 so
+         * _PyUnicodeWriter_Prepare() will copy (and enlarge) the buffer on
+         * next write. */
+        writer->size = 0;
+    }
     writer->maxchar = PyUnicode_MAX_CHAR_VALUE(writer->buffer);
     writer->data = PyUnicode_DATA(writer->buffer);
     writer->kind = PyUnicode_KIND(writer->buffer);
 }
 
 void
-_PyUnicodeWriter_Init(_PyUnicodeWriter *writer, Py_ssize_t min_length)
+_PyUnicodeWriter_Init(_PyUnicodeWriter *writer)
 {
     memset(writer, 0, sizeof(*writer));
 #ifdef Py_DEBUG
     writer->kind = 5;    /* invalid kind */
 #endif
-    writer->min_length = Py_MAX(min_length, 100);
-    writer->overallocate = (min_length > 0);
+    writer->min_char = 127;
 }
 
 int
 _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
                                  Py_ssize_t length, Py_UCS4 maxchar)
 {
+#ifdef MS_WINDOWS
+   /* On Windows, overallocate by 50% is the best factor */
+#  define OVERALLOCATE_FACTOR 2
+#else
+   /* On Linux, overallocate by 25% is the best factor */
+#  define OVERALLOCATE_FACTOR 4
+#endif
     Py_ssize_t newlen;
     PyObject *newbuffer;
 
@@ -12830,29 +13261,30 @@ _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
     }
     newlen = writer->pos + length;
 
+    maxchar = Py_MAX(maxchar, writer->min_char);
+
     if (writer->buffer == NULL) {
-        if (writer->overallocate) {
-            /* overallocate 25% to limit the number of resize */
-            if (newlen <= (PY_SSIZE_T_MAX - newlen / 4))
-                newlen += newlen / 4;
-            if (newlen < writer->min_length)
-                newlen = writer->min_length;
+        assert(!writer->readonly);
+        if (writer->overallocate
+            && newlen <= (PY_SSIZE_T_MAX - newlen / OVERALLOCATE_FACTOR)) {
+            /* overallocate to limit the number of realloc() */
+            newlen += newlen / OVERALLOCATE_FACTOR;
         }
+        if (newlen < writer->min_length)
+            newlen = writer->min_length;
+
         writer->buffer = PyUnicode_New(newlen, maxchar);
         if (writer->buffer == NULL)
             return -1;
-        _PyUnicodeWriter_Update(writer);
-        return 0;
     }
-
-    if (newlen > writer->size) {
-        if (writer->overallocate) {
-            /* overallocate 25% to limit the number of resize */
-            if (newlen <= (PY_SSIZE_T_MAX - newlen / 4))
-                newlen += newlen / 4;
-            if (newlen < writer->min_length)
-                newlen = writer->min_length;
+    else if (newlen > writer->size) {
+        if (writer->overallocate
+            && newlen <= (PY_SSIZE_T_MAX - newlen / OVERALLOCATE_FACTOR)) {
+            /* overallocate to limit the number of realloc() */
+            newlen += newlen / OVERALLOCATE_FACTOR;
         }
+        if (newlen < writer->min_length)
+            newlen = writer->min_length;
 
         if (maxchar > writer->maxchar || writer->readonly) {
             /* resize + widen */
@@ -12870,7 +13302,6 @@ _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
                 return -1;
         }
         writer->buffer = newbuffer;
-        _PyUnicodeWriter_Update(writer);
     }
     else if (maxchar > writer->maxchar) {
         assert(!writer->readonly);
@@ -12881,12 +13312,30 @@ _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
                                       writer->buffer, 0, writer->pos);
         Py_DECREF(writer->buffer);
         writer->buffer = newbuffer;
-        _PyUnicodeWriter_Update(writer);
     }
+    _PyUnicodeWriter_Update(writer);
+    return 0;
+
+#undef OVERALLOCATE_FACTOR
+}
+
+Py_LOCAL_INLINE(int)
+_PyUnicodeWriter_WriteCharInline(_PyUnicodeWriter *writer, Py_UCS4 ch)
+{
+    if (_PyUnicodeWriter_Prepare(writer, 1, ch) < 0)
+        return -1;
+    PyUnicode_WRITE(writer->kind, writer->data, writer->pos, ch);
+    writer->pos++;
     return 0;
 }
 
 int
+_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer, Py_UCS4 ch)
+{
+    return _PyUnicodeWriter_WriteCharInline(writer, ch);
+}
+
+int
 _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str)
 {
     Py_UCS4 maxchar;
@@ -12900,11 +13349,10 @@ _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str)
     maxchar = PyUnicode_MAX_CHAR_VALUE(str);
     if (maxchar > writer->maxchar || len > writer->size - writer->pos) {
         if (writer->buffer == NULL && !writer->overallocate) {
+            writer->readonly = 1;
             Py_INCREF(str);
             writer->buffer = str;
             _PyUnicodeWriter_Update(writer);
-            writer->readonly = 1;
-            writer->size = 0;
             writer->pos += len;
             return 0;
         }
@@ -12917,28 +13365,143 @@ _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str)
     return 0;
 }
 
+int
+_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, PyObject *str,
+                                Py_ssize_t start, Py_ssize_t end)
+{
+    Py_UCS4 maxchar;
+    Py_ssize_t len;
+
+    if (PyUnicode_READY(str) == -1)
+        return -1;
+
+    assert(0 <= start);
+    assert(end <= PyUnicode_GET_LENGTH(str));
+    assert(start <= end);
+
+    if (end == 0)
+        return 0;
+
+    if (start == 0 && end == PyUnicode_GET_LENGTH(str))
+        return _PyUnicodeWriter_WriteStr(writer, str);
+
+    if (PyUnicode_MAX_CHAR_VALUE(str) > writer->maxchar)
+        maxchar = _PyUnicode_FindMaxChar(str, start, end);
+    else
+        maxchar = writer->maxchar;
+    len = end - start;
+
+    if (_PyUnicodeWriter_Prepare(writer, len, maxchar) < 0)
+        return -1;
+
+    _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
+                                  str, start, len);
+    writer->pos += len;
+    return 0;
+}
+
+int
+_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
+                                  const char *ascii, Py_ssize_t len)
+{
+    if (len == -1)
+        len = strlen(ascii);
+
+    assert(ucs1lib_find_max_char((Py_UCS1*)ascii, (Py_UCS1*)ascii + len) < 128);
+
+    if (writer->buffer == NULL && !writer->overallocate) {
+        PyObject *str;
+
+        str = _PyUnicode_FromASCII(ascii, len);
+        if (str == NULL)
+            return -1;
+
+        writer->readonly = 1;
+        writer->buffer = str;
+        _PyUnicodeWriter_Update(writer);
+        writer->pos += len;
+        return 0;
+    }
+
+    if (_PyUnicodeWriter_Prepare(writer, len, 127) == -1)
+        return -1;
+
+    switch (writer->kind)
+    {
+    case PyUnicode_1BYTE_KIND:
+    {
+        const Py_UCS1 *str = (const Py_UCS1 *)ascii;
+        Py_UCS1 *data = writer->data;
+
+        Py_MEMCPY(data + writer->pos, str, len);
+        break;
+    }
+    case PyUnicode_2BYTE_KIND:
+    {
+        _PyUnicode_CONVERT_BYTES(
+            Py_UCS1, Py_UCS2,
+            ascii, ascii + len,
+            (Py_UCS2 *)writer->data + writer->pos);
+        break;
+    }
+    case PyUnicode_4BYTE_KIND:
+    {
+        _PyUnicode_CONVERT_BYTES(
+            Py_UCS1, Py_UCS4,
+            ascii, ascii + len,
+            (Py_UCS4 *)writer->data + writer->pos);
+        break;
+    }
+    default:
+        assert(0);
+    }
+
+    writer->pos += len;
+    return 0;
+}
+
+int
+_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
+                                   const char *str, Py_ssize_t len)
+{
+    Py_UCS4 maxchar;
+
+    maxchar = ucs1lib_find_max_char((Py_UCS1*)str, (Py_UCS1*)str + len);
+    if (_PyUnicodeWriter_Prepare(writer, len, maxchar) == -1)
+        return -1;
+    unicode_write_cstr(writer->buffer, writer->pos, str, len);
+    writer->pos += len;
+    return 0;
+}
+
 PyObject *
 _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer)
 {
+    PyObject *str;
     if (writer->pos == 0) {
-        Py_XDECREF(writer->buffer);
+        Py_CLEAR(writer->buffer);
         _Py_RETURN_UNICODE_EMPTY();
     }
     if (writer->readonly) {
-        assert(PyUnicode_GET_LENGTH(writer->buffer) == writer->pos);
-        return writer->buffer;
+        str = writer->buffer;
+        writer->buffer = NULL;
+        assert(PyUnicode_GET_LENGTH(str) == writer->pos);
+        return str;
     }
     if (PyUnicode_GET_LENGTH(writer->buffer) != writer->pos) {
         PyObject *newbuffer;
         newbuffer = resize_compact(writer->buffer, writer->pos);
         if (newbuffer == NULL) {
             Py_DECREF(writer->buffer);
+            writer->buffer = NULL;
             return NULL;
         }
         writer->buffer = newbuffer;
     }
-    assert(_PyUnicode_CheckConsistency(writer->buffer, 1));
-    return unicode_result_ready(writer->buffer);
+    str = writer->buffer;
+    writer->buffer = NULL;
+    assert(_PyUnicode_CheckConsistency(str, 1));
+    return unicode_result_ready(str);
 }
 
 void
@@ -12973,7 +13536,7 @@ unicode__format__(PyObject* self, PyObject* args)
 
     if (PyUnicode_READY(self) == -1)
         return NULL;
-    _PyUnicodeWriter_Init(&writer, 0);
+    _PyUnicodeWriter_Init(&writer);
     ret = _PyUnicode_FormatAdvancedWriter(&writer,
                                           self, format_spec, 0,
                                           PyUnicode_GET_LENGTH(format_spec));
@@ -13042,7 +13605,8 @@ static PyMethodDef unicode_methods[] = {
     {"title", (PyCFunction) unicode_title, METH_NOARGS, title__doc__},
     {"center", (PyCFunction) unicode_center, METH_VARARGS, center__doc__},
     {"count", (PyCFunction) unicode_count, METH_VARARGS, count__doc__},
-    {"expandtabs", (PyCFunction) unicode_expandtabs, METH_VARARGS, expandtabs__doc__},
+    {"expandtabs", (PyCFunction) unicode_expandtabs,
+     METH_VARARGS | METH_KEYWORDS, expandtabs__doc__},
     {"find", (PyCFunction) unicode_find, METH_VARARGS, find__doc__},
     {"partition", (PyCFunction) unicode_partition, METH_O, partition__doc__},
     {"index", (PyCFunction) unicode_index, METH_VARARGS, index__doc__},
@@ -13054,7 +13618,8 @@ static PyMethodDef unicode_methods[] = {
     {"rjust", (PyCFunction) unicode_rjust, METH_VARARGS, rjust__doc__},
     {"rstrip", (PyCFunction) unicode_rstrip, METH_VARARGS, rstrip__doc__},
     {"rpartition", (PyCFunction) unicode_rpartition, METH_O, rpartition__doc__},
-    {"splitlines", (PyCFunction) unicode_splitlines, METH_VARARGS | METH_KEYWORDS, splitlines__doc__},
+    {"splitlines", (PyCFunction) unicode_splitlines,
+     METH_VARARGS | METH_KEYWORDS, splitlines__doc__},
     {"strip", (PyCFunction) unicode_strip, METH_VARARGS, strip__doc__},
     {"swapcase", (PyCFunction) unicode_swapcase, METH_NOARGS, swapcase__doc__},
     {"translate", (PyCFunction) unicode_translate, METH_O, translate__doc__},
@@ -13076,8 +13641,7 @@ static PyMethodDef unicode_methods[] = {
     {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
     {"format_map", (PyCFunction) do_string_format_map, METH_O, format_map__doc__},
     {"__format__", (PyCFunction) unicode__format__, METH_VARARGS, p_format__doc__},
-    {"maketrans", (PyCFunction) unicode_maketrans,
-     METH_VARARGS | METH_STATIC, maketrans__doc__},
+    UNICODE_MAKETRANS_METHODDEF
     {"__sizeof__", (PyCFunction) unicode__sizeof__, METH_NOARGS, sizeof__doc__},
 #if 0
     /* These methods are just used for debugging the implementation. */
@@ -13192,16 +13756,39 @@ static PyMappingMethods unicode_as_mapping = {
 
 /* Helpers for PyUnicode_Format() */
 
+struct unicode_formatter_t {
+    PyObject *args;
+    int args_owned;
+    Py_ssize_t arglen, argidx;
+    PyObject *dict;
+
+    enum PyUnicode_Kind fmtkind;
+    Py_ssize_t fmtcnt, fmtpos;
+    void *fmtdata;
+    PyObject *fmtstr;
+
+    _PyUnicodeWriter writer;
+};
+
+struct unicode_format_arg_t {
+    Py_UCS4 ch;
+    int flags;
+    Py_ssize_t width;
+    int prec;
+    int sign;
+};
+
 static PyObject *
-getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
+unicode_format_getnextarg(struct unicode_formatter_t *ctx)
 {
-    Py_ssize_t argidx = *p_argidx;
-    if (argidx < arglen) {
-        (*p_argidx)++;
-        if (arglen < 0)
-            return args;
+    Py_ssize_t argidx = ctx->argidx;
+
+    if (argidx < ctx->arglen) {
+        ctx->argidx++;
+        if (ctx->arglen < 0)
+            return ctx->args;
         else
-            return PyTuple_GetItem(args, argidx);
+            return PyTuple_GetItem(ctx->args, argidx);
     }
     PyErr_SetString(PyExc_TypeError,
                     "not enough arguments for format string");
@@ -13210,33 +13797,42 @@ getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
 
 /* Returns a new reference to a PyUnicode object, or NULL on failure. */
 
+/* Format a float into the writer if the writer is not NULL, or into *p_output
+   otherwise.
+
+   Return 0 on success, raise an exception and return -1 on error. */
 static int
-formatfloat(PyObject *v, int flags, int prec, int type,
-            PyObject **p_output, _PyUnicodeWriter *writer)
+formatfloat(PyObject *v, struct unicode_format_arg_t *arg,
+            PyObject **p_output,
+            _PyUnicodeWriter *writer)
 {
     char *p;
     double x;
     Py_ssize_t len;
+    int prec;
+    int dtoa_flags;
 
     x = PyFloat_AsDouble(v);
     if (x == -1.0 && PyErr_Occurred())
         return -1;
 
+    prec = arg->prec;
     if (prec < 0)
         prec = 6;
 
-    p = PyOS_double_to_string(x, type, prec,
-                              (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
+    if (arg->flags & F_ALT)
+        dtoa_flags = Py_DTSF_ALT;
+    else
+        dtoa_flags = 0;
+    p = PyOS_double_to_string(x, arg->ch, prec, dtoa_flags, NULL);
     if (p == NULL)
         return -1;
     len = strlen(p);
     if (writer) {
-        if (_PyUnicodeWriter_Prepare(writer, len, 127) == -1) {
+        if (_PyUnicodeWriter_WriteASCIIString(writer, p, len) < 0) {
             PyMem_Free(p);
             return -1;
         }
-        unicode_write_cstr(writer->buffer, writer->pos, p, len);
-        writer->pos += len;
     }
     else
         *p_output = _PyUnicode_FromASCII(p, len);
@@ -13263,7 +13859,7 @@ formatfloat(PyObject *v, int flags, int prec, int type,
  * produce a '-' sign, but can for Python's unbounded ints.
  */
 static PyObject*
-formatlong(PyObject *val, int flags, int prec, int type)
+formatlong(PyObject *val, struct unicode_format_arg_t *arg)
 {
     PyObject *result = NULL;
     char *buf;
@@ -13273,6 +13869,8 @@ formatlong(PyObject *val, int flags, int prec, int type)
     Py_ssize_t llen;
     int numdigits;      /* len == numnondigits + numdigits */
     int numnondigits = 0;
+    int prec = arg->prec;
+    int type = arg->ch;
 
     /* Avoid exceeding SSIZE_T_MAX */
     if (prec > INT_MAX-3) {
@@ -13284,13 +13882,14 @@ formatlong(PyObject *val, int flags, int prec, int type)
     assert(PyLong_Check(val));
 
     switch (type) {
+    default:
+        assert(!"'type' not in [diuoxX]");
     case 'd':
+    case 'i':
     case 'u':
-        /* Special-case boolean: we want 0/1 */
-        if (PyBool_Check(val))
-            result = PyNumber_ToBase(val, 10);
-        else
-            result = Py_TYPE(val)->tp_str(val);
+        /* int and int subclasses should print numerically when a numeric */
+        /* format code is used (see issue18780) */
+        result = PyNumber_ToBase(val, 10);
         break;
     case 'o':
         numnondigits = 2;
@@ -13301,8 +13900,6 @@ formatlong(PyObject *val, int flags, int prec, int type)
         numnondigits = 2;
         result = PyNumber_ToBase(val, 16);
         break;
-    default:
-        assert(!"'type' not in [duoxX]");
     }
     if (!result)
         return NULL;
@@ -13313,12 +13910,14 @@ formatlong(PyObject *val, int flags, int prec, int type)
 
     /* To modify the string in-place, there can only be one reference. */
     if (Py_REFCNT(result) != 1) {
+        Py_DECREF(result);
         PyErr_BadInternalCall();
         return NULL;
     }
     buf = PyUnicode_DATA(result);
     llen = PyUnicode_GET_LENGTH(result);
     if (llen > INT_MAX) {
+        Py_DECREF(result);
         PyErr_SetString(PyExc_ValueError,
                         "string too large in _PyBytes_FormatLong");
         return NULL;
@@ -13330,7 +13929,7 @@ formatlong(PyObject *val, int flags, int prec, int type)
     assert(numdigits > 0);
 
     /* Get rid of base marker unless F_ALT */
-    if (((flags & F_ALT) == 0 &&
+    if (((arg->flags & F_ALT) == 0 &&
         (type == 'o' || type == 'x' || type == 'X'))) {
         assert(buf[sign] == '0');
         assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
@@ -13375,15 +13974,121 @@ formatlong(PyObject *val, int flags, int prec, int type)
             if (buf[i] >= 'a' && buf[i] <= 'x')
                 buf[i] -= 'a'-'A';
     }
-    if (!PyUnicode_Check(result) || len != PyUnicode_GET_LENGTH(result)) {
+    if (!PyUnicode_Check(result)
+        || buf != PyUnicode_DATA(result)) {
         PyObject *unicode;
         unicode = _PyUnicode_FromASCII(buf, len);
         Py_DECREF(result);
         result = unicode;
     }
+    else if (len != PyUnicode_GET_LENGTH(result)) {
+        if (PyUnicode_Resize(&result, len) < 0)
+            Py_CLEAR(result);
+    }
     return result;
 }
 
+/* Format an integer or a float as an integer.
+ * Return 1 if the number has been formatted into the writer,
+ *        0 if the number has been formatted into *p_output
+ *       -1 and raise an exception on error */
+static int
+mainformatlong(PyObject *v,
+               struct unicode_format_arg_t *arg,
+               PyObject **p_output,
+               _PyUnicodeWriter *writer)
+{
+    PyObject *iobj, *res;
+    char type = (char)arg->ch;
+
+    if (!PyNumber_Check(v))
+        goto wrongtype;
+
+    /* make sure number is a type of integer */
+    /* if not, issue deprecation warning for now */
+    if (!PyLong_Check(v)) {
+        if (type == 'o' || type == 'x' || type == 'X') {
+            iobj = PyNumber_Index(v);
+            if (iobj == NULL) {
+                PyErr_Clear();
+                if (PyErr_WarnEx(PyExc_DeprecationWarning,
+                                 "automatic int conversions have been deprecated",
+                                 1)) {
+                    return -1;
+                }
+                iobj = PyNumber_Long(v);
+                if (iobj == NULL ) {
+                    if (PyErr_ExceptionMatches(PyExc_TypeError))
+                        goto wrongtype;
+                    return -1;
+                }
+            }
+        }
+        else {
+            iobj = PyNumber_Long(v);
+            if (iobj == NULL ) {
+                if (PyErr_ExceptionMatches(PyExc_TypeError))
+                    goto wrongtype;
+                return -1;
+            }
+        }
+        assert(PyLong_Check(iobj));
+    }
+    else {
+        iobj = v;
+        Py_INCREF(iobj);
+    }
+
+    if (PyLong_CheckExact(v)
+        && arg->width == -1 && arg->prec == -1
+        && !(arg->flags & (F_SIGN | F_BLANK))
+        && type != 'X')
+    {
+        /* Fast path */
+        int alternate = arg->flags & F_ALT;
+        int base;
+
+        switch(type)
+        {
+            default:
+                assert(0 && "'type' not in [diuoxX]");
+            case 'd':
+            case 'i':
+            case 'u':
+                base = 10;
+                break;
+            case 'o':
+                base = 8;
+                break;
+            case 'x':
+            case 'X':
+                base = 16;
+                break;
+        }
+
+        if (_PyLong_FormatWriter(writer, v, base, alternate) == -1) {
+            Py_DECREF(iobj);
+            return -1;
+        }
+        Py_DECREF(iobj);
+        return 1;
+    }
+
+    res = formatlong(iobj, arg);
+    Py_DECREF(iobj);
+    if (res == NULL)
+        return -1;
+    *p_output = res;
+    return 0;
+
+wrongtype:
+    PyErr_Format(PyExc_TypeError,
+            "%%%c format: a number is required, "
+            "not %.200s",
+            type, Py_TYPE(v)->tp_name);
+    return -1;
+}
+
 static Py_UCS4
 formatchar(PyObject *v)
 {
@@ -13395,8 +14100,30 @@ formatchar(PyObject *v)
         goto onError;
     }
     else {
-        /* Integer input truncated to a character */
+        PyObject *iobj;
         long x;
+        /* make sure number is a type of integer */
+        /* if not, issue deprecation warning for now */
+        if (!PyLong_Check(v)) {
+            iobj = PyNumber_Index(v);
+            if (iobj == NULL) {
+                PyErr_Clear();
+                if (PyErr_WarnEx(PyExc_DeprecationWarning,
+                                 "automatic int conversions have been deprecated",
+                                 1)) {
+                    return -1;
+                }
+                iobj = PyNumber_Long(v);
+                if (iobj == NULL ) {
+                    if (PyErr_ExceptionMatches(PyExc_TypeError))
+                        goto onError;
+                    return -1;
+                }
+            }
+            v = iobj;
+            Py_DECREF(iobj);
+        }
+        /* Integer input truncated to a character */
         x = PyLong_AsLong(v);
         if (x == -1 && PyErr_Occurred())
             goto onError;
@@ -13416,540 +14143,588 @@ formatchar(PyObject *v)
     return (Py_UCS4) -1;
 }
 
-PyObject *
-PyUnicode_Format(PyObject *format, PyObject *args)
-{
-    Py_ssize_t fmtcnt, fmtpos, arglen, argidx;
-    int args_owned = 0;
-    PyObject *dict = NULL;
-    PyObject *temp = NULL;
-    PyObject *second = NULL;
-    PyObject *uformat;
-    void *fmt;
-    enum PyUnicode_Kind kind, fmtkind;
-    _PyUnicodeWriter writer;
-    Py_ssize_t sublen;
-    Py_UCS4 maxchar;
+/* Parse options of an argument: flags, width, precision.
+   Handle also "%(name)" syntax.
 
-    if (format == NULL || args == NULL) {
-        PyErr_BadInternalCall();
-        return NULL;
-    }
-    uformat = PyUnicode_FromObject(format);
-    if (uformat == NULL)
-        return NULL;
-    if (PyUnicode_READY(uformat) == -1) {
-        Py_DECREF(uformat);
-        return NULL;
-    }
+   Return 0 if the argument has been formatted into arg->str.
+   Return 1 if the argument has been written into ctx->writer,
+   Raise an exception and return -1 on error. */
+static int
+unicode_format_arg_parse(struct unicode_formatter_t *ctx,
+                         struct unicode_format_arg_t *arg)
+{
+#define FORMAT_READ(ctx) \
+        PyUnicode_READ((ctx)->fmtkind, (ctx)->fmtdata, (ctx)->fmtpos)
 
-    fmt = PyUnicode_DATA(uformat);
-    fmtkind = PyUnicode_KIND(uformat);
-    fmtcnt = PyUnicode_GET_LENGTH(uformat);
-    fmtpos = 0;
+    PyObject *v;
 
-    _PyUnicodeWriter_Init(&writer, fmtcnt + 100);
+    if (arg->ch == '(') {
+        /* Get argument value from a dictionary. Example: "%(name)s". */
+        Py_ssize_t keystart;
+        Py_ssize_t keylen;
+        PyObject *key;
+        int pcount = 1;
 
-    if (PyTuple_Check(args)) {
-        arglen = PyTuple_Size(args);
-        argidx = 0;
-    }
-    else {
-        arglen = -1;
-        argidx = -2;
+        if (ctx->dict == NULL) {
+            PyErr_SetString(PyExc_TypeError,
+                            "format requires a mapping");
+            return -1;
+        }
+        ++ctx->fmtpos;
+        --ctx->fmtcnt;
+        keystart = ctx->fmtpos;
+        /* Skip over balanced parentheses */
+        while (pcount > 0 && --ctx->fmtcnt >= 0) {
+            arg->ch = FORMAT_READ(ctx);
+            if (arg->ch == ')')
+                --pcount;
+            else if (arg->ch == '(')
+                ++pcount;
+            ctx->fmtpos++;
+        }
+        keylen = ctx->fmtpos - keystart - 1;
+        if (ctx->fmtcnt < 0 || pcount > 0) {
+            PyErr_SetString(PyExc_ValueError,
+                            "incomplete format key");
+            return -1;
+        }
+        key = PyUnicode_Substring(ctx->fmtstr,
+                                  keystart, keystart + keylen);
+        if (key == NULL)
+            return -1;
+        if (ctx->args_owned) {
+            Py_DECREF(ctx->args);
+            ctx->args_owned = 0;
+        }
+        ctx->args = PyObject_GetItem(ctx->dict, key);
+        Py_DECREF(key);
+        if (ctx->args == NULL)
+            return -1;
+        ctx->args_owned = 1;
+        ctx->arglen = -1;
+        ctx->argidx = -2;
+    }
+
+    /* Parse flags. Example: "%+i" => flags=F_SIGN. */
+    while (--ctx->fmtcnt >= 0) {
+        arg->ch = FORMAT_READ(ctx);
+        ctx->fmtpos++;
+        switch (arg->ch) {
+        case '-': arg->flags |= F_LJUST; continue;
+        case '+': arg->flags |= F_SIGN; continue;
+        case ' ': arg->flags |= F_BLANK; continue;
+        case '#': arg->flags |= F_ALT; continue;
+        case '0': arg->flags |= F_ZERO; continue;
+        }
+        break;
     }
-    if (PyMapping_Check(args) && !PyTuple_Check(args) && !PyUnicode_Check(args))
-        dict = args;
-
-    while (--fmtcnt >= 0) {
-        if (PyUnicode_READ(fmtkind, fmt, fmtpos) != '%') {
-            Py_ssize_t nonfmtpos;
-            nonfmtpos = fmtpos++;
-            while (fmtcnt >= 0 &&
-                   PyUnicode_READ(fmtkind, fmt, fmtpos) != '%') {
-                fmtpos++;
-                fmtcnt--;
-            }
-            if (fmtcnt < 0)
-                fmtpos--;
-            sublen = fmtpos - nonfmtpos;
-            maxchar = _PyUnicode_FindMaxChar(uformat,
-                                             nonfmtpos, nonfmtpos + sublen);
-            if (_PyUnicodeWriter_Prepare(&writer, sublen, maxchar) == -1)
-                goto onError;
 
-            _PyUnicode_FastCopyCharacters(writer.buffer, writer.pos,
-                                          uformat, nonfmtpos, sublen);
-            writer.pos += sublen;
+    /* Parse width. Example: "%10s" => width=10 */
+    if (arg->ch == '*') {
+        v = unicode_format_getnextarg(ctx);
+        if (v == NULL)
+            return -1;
+        if (!PyLong_Check(v)) {
+            PyErr_SetString(PyExc_TypeError,
+                            "* wants int");
+            return -1;
         }
-        else {
-            /* Got a format specifier */
-            int flags = 0;
-            Py_ssize_t width = -1;
-            int prec = -1;
-            Py_UCS4 c = '\0';
-            Py_UCS4 fill;
-            int sign;
-            Py_UCS4 signchar;
-            int isnumok;
-            PyObject *v = NULL;
-            void *pbuf = NULL;
-            Py_ssize_t pindex, len;
-            Py_UCS4 bufmaxchar;
-            Py_ssize_t buflen;
-
-            fmtpos++;
-            c = PyUnicode_READ(fmtkind, fmt, fmtpos);
-            if (c == '(') {
-                Py_ssize_t keystart;
-                Py_ssize_t keylen;
-                PyObject *key;
-                int pcount = 1;
-
-                if (dict == NULL) {
-                    PyErr_SetString(PyExc_TypeError,
-                                    "format requires a mapping");
-                    goto onError;
-                }
-                ++fmtpos;
-                --fmtcnt;
-                keystart = fmtpos;
-                /* Skip over balanced parentheses */
-                while (pcount > 0 && --fmtcnt >= 0) {
-                    c = PyUnicode_READ(fmtkind, fmt, fmtpos);
-                    if (c == ')')
-                        --pcount;
-                    else if (c == '(')
-                        ++pcount;
-                    fmtpos++;
-                }
-                keylen = fmtpos - keystart - 1;
-                if (fmtcnt < 0 || pcount > 0) {
-                    PyErr_SetString(PyExc_ValueError,
-                                    "incomplete format key");
-                    goto onError;
-                }
-                key = PyUnicode_Substring(uformat,
-                                          keystart, keystart + keylen);
-                if (key == NULL)
-                    goto onError;
-                if (args_owned) {
-                    Py_DECREF(args);
-                    args_owned = 0;
-                }
-                args = PyObject_GetItem(dict, key);
-                Py_DECREF(key);
-                if (args == NULL) {
-                    goto onError;
-                }
-                args_owned = 1;
-                arglen = -1;
-                argidx = -2;
-            }
-            while (--fmtcnt >= 0) {
-                c = PyUnicode_READ(fmtkind, fmt, fmtpos++);
-                switch (c) {
-                case '-': flags |= F_LJUST; continue;
-                case '+': flags |= F_SIGN; continue;
-                case ' ': flags |= F_BLANK; continue;
-                case '#': flags |= F_ALT; continue;
-                case '0': flags |= F_ZERO; continue;
-                }
+        arg->width = PyLong_AsSsize_t(v);
+        if (arg->width == -1 && PyErr_Occurred())
+            return -1;
+        if (arg->width < 0) {
+            arg->flags |= F_LJUST;
+            arg->width = -arg->width;
+        }
+        if (--ctx->fmtcnt >= 0) {
+            arg->ch = FORMAT_READ(ctx);
+            ctx->fmtpos++;
+        }
+    }
+    else if (arg->ch >= '0' && arg->ch <= '9') {
+        arg->width = arg->ch - '0';
+        while (--ctx->fmtcnt >= 0) {
+            arg->ch = FORMAT_READ(ctx);
+            ctx->fmtpos++;
+            if (arg->ch < '0' || arg->ch > '9')
                 break;
+            /* Since arg->ch is unsigned, the RHS would end up as unsigned,
+               mixing signed and unsigned comparison. Since arg->ch is between
+               '0' and '9', casting to int is safe. */
+            if (arg->width > (PY_SSIZE_T_MAX - ((int)arg->ch - '0')) / 10) {
+                PyErr_SetString(PyExc_ValueError,
+                                "width too big");
+                return -1;
             }
-            if (c == '*') {
-                v = getnextarg(args, arglen, &argidx);
-                if (v == NULL)
-                    goto onError;
-                if (!PyLong_Check(v)) {
-                    PyErr_SetString(PyExc_TypeError,
-                                    "* wants int");
-                    goto onError;
-                }
-                width = PyLong_AsSsize_t(v);
-                if (width == -1 && PyErr_Occurred())
-                    goto onError;
-                if (width < 0) {
-                    flags |= F_LJUST;
-                    width = -width;
-                }
-                if (--fmtcnt >= 0)
-                    c = PyUnicode_READ(fmtkind, fmt, fmtpos++);
-            }
-            else if (c >= '0' && c <= '9') {
-                width = c - '0';
-                while (--fmtcnt >= 0) {
-                    c = PyUnicode_READ(fmtkind, fmt, fmtpos++);
-                    if (c < '0' || c > '9')
-                        break;
-                    /* Since c is unsigned, the RHS would end up as unsigned,
-                       mixing signed and unsigned comparison. Since c is between
-                       '0' and '9', casting to int is safe. */
-                    if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
-                        PyErr_SetString(PyExc_ValueError,
-                                        "width too big");
-                        goto onError;
-                    }
-                    width = width*10 + (c - '0');
-                }
+            arg->width = arg->width*10 + (arg->ch - '0');
+        }
+    }
+
+    /* Parse precision. Example: "%.3f" => prec=3 */
+    if (arg->ch == '.') {
+        arg->prec = 0;
+        if (--ctx->fmtcnt >= 0) {
+            arg->ch = FORMAT_READ(ctx);
+            ctx->fmtpos++;
+        }
+        if (arg->ch == '*') {
+            v = unicode_format_getnextarg(ctx);
+            if (v == NULL)
+                return -1;
+            if (!PyLong_Check(v)) {
+                PyErr_SetString(PyExc_TypeError,
+                                "* wants int");
+                return -1;
             }
-            if (c == '.') {
-                prec = 0;
-                if (--fmtcnt >= 0)
-                    c = PyUnicode_READ(fmtkind, fmt, fmtpos++);
-                if (c == '*') {
-                    v = getnextarg(args, arglen, &argidx);
-                    if (v == NULL)
-                        goto onError;
-                    if (!PyLong_Check(v)) {
-                        PyErr_SetString(PyExc_TypeError,
-                                        "* wants int");
-                        goto onError;
-                    }
-                    prec = _PyLong_AsInt(v);
-                    if (prec == -1 && PyErr_Occurred())
-                        goto onError;
-                    if (prec < 0)
-                        prec = 0;
-                    if (--fmtcnt >= 0)
-                        c = PyUnicode_READ(fmtkind, fmt, fmtpos++);
-                }
-                else if (c >= '0' && c <= '9') {
-                    prec = c - '0';
-                    while (--fmtcnt >= 0) {
-                        c = PyUnicode_READ(fmtkind, fmt, fmtpos++);
-                        if (c < '0' || c > '9')
-                            break;
-                        if (prec > (INT_MAX - ((int)c - '0')) / 10) {
-                            PyErr_SetString(PyExc_ValueError,
-                                            "prec too big");
-                            goto onError;
-                        }
-                        prec = prec*10 + (c - '0');
-                    }
-                }
-            } /* prec */
-            if (fmtcnt >= 0) {
-                if (c == 'h' || c == 'l' || c == 'L') {
-                    if (--fmtcnt >= 0)
-                        c = PyUnicode_READ(fmtkind, fmt, fmtpos++);
+            arg->prec = _PyLong_AsInt(v);
+            if (arg->prec == -1 && PyErr_Occurred())
+                return -1;
+            if (arg->prec < 0)
+                arg->prec = 0;
+            if (--ctx->fmtcnt >= 0) {
+                arg->ch = FORMAT_READ(ctx);
+                ctx->fmtpos++;
+            }
+        }
+        else if (arg->ch >= '0' && arg->ch <= '9') {
+            arg->prec = arg->ch - '0';
+            while (--ctx->fmtcnt >= 0) {
+                arg->ch = FORMAT_READ(ctx);
+                ctx->fmtpos++;
+                if (arg->ch < '0' || arg->ch > '9')
+                    break;
+                if (arg->prec > (INT_MAX - ((int)arg->ch - '0')) / 10) {
+                    PyErr_SetString(PyExc_ValueError,
+                                    "precision too big");
+                    return -1;
                 }
+                arg->prec = arg->prec*10 + (arg->ch - '0');
             }
-            if (fmtcnt < 0) {
-                PyErr_SetString(PyExc_ValueError,
-                                "incomplete format");
-                goto onError;
-            }
-            if (fmtcnt == 0)
-                writer.overallocate = 0;
+        }
+    }
 
-            if (c == '%') {
-                if (_PyUnicodeWriter_Prepare(&writer, 1, '%') == -1)
-                    goto onError;
-                PyUnicode_WRITE(writer.kind, writer.data, writer.pos, '%');
-                writer.pos += 1;
-                continue;
+    /* Ignore "h", "l" and "L" format prefix (ex: "%hi" or "%ls") */
+    if (ctx->fmtcnt >= 0) {
+        if (arg->ch == 'h' || arg->ch == 'l' || arg->ch == 'L') {
+            if (--ctx->fmtcnt >= 0) {
+                arg->ch = FORMAT_READ(ctx);
+                ctx->fmtpos++;
             }
+        }
+    }
+    if (ctx->fmtcnt < 0) {
+        PyErr_SetString(PyExc_ValueError,
+                        "incomplete format");
+        return -1;
+    }
+    return 0;
 
-            v = getnextarg(args, arglen, &argidx);
-            if (v == NULL)
-                goto onError;
+#undef FORMAT_READ
+}
 
-            sign = 0;
-            signchar = '\0';
-            fill = ' ';
-            switch (c) {
-
-            case 's':
-            case 'r':
-            case 'a':
-                if (PyLong_CheckExact(v) && width == -1 && prec == -1) {
-                    /* Fast path */
-                    if (_PyLong_FormatWriter(&writer, v, 10, flags & F_ALT) == -1)
-                        goto onError;
-                    goto nextarg;
-                }
+/* Format one argument. Supported conversion specifiers:
 
-                if (PyUnicode_CheckExact(v) && c == 's') {
-                    temp = v;
-                    Py_INCREF(temp);
-                }
-                else {
-                    if (c == 's')
-                        temp = PyObject_Str(v);
-                    else if (c == 'r')
-                        temp = PyObject_Repr(v);
-                    else
-                        temp = PyObject_ASCII(v);
-                }
-                break;
+   - "s", "r", "a": any type
+   - "i", "d", "u": int or float
+   - "o", "x", "X": int
+   - "e", "E", "f", "F", "g", "G": float
+   - "c": int or str (1 character)
 
-            case 'i':
-            case 'd':
-            case 'u':
-            case 'o':
-            case 'x':
-            case 'X':
-                if (PyLong_CheckExact(v)
-                    && width == -1 && prec == -1
-                    && !(flags & (F_SIGN | F_BLANK)))
-                {
-                    /* Fast path */
-                    switch(c)
-                    {
-                    case 'd':
-                    case 'i':
-                    case 'u':
-                        if (_PyLong_FormatWriter(&writer, v, 10, flags & F_ALT) == -1)
-                            goto onError;
-                        goto nextarg;
-                    case 'x':
-                        if (_PyLong_FormatWriter(&writer, v, 16, flags & F_ALT) == -1)
-                            goto onError;
-                        goto nextarg;
-                    case 'o':
-                        if (_PyLong_FormatWriter(&writer, v, 8, flags & F_ALT) == -1)
-                            goto onError;
-                        goto nextarg;
-                    default:
-                        break;
-                    }
-                }
+   When possible, the output is written directly into the Unicode writer
+   (ctx->writer). A string is created when padding is required.
 
-                isnumok = 0;
-                if (PyNumber_Check(v)) {
-                    PyObject *iobj=NULL;
+   Return 0 if the argument has been formatted into *p_str,
+          1 if the argument has been written into ctx->writer,
+         -1 on error. */
+static int
+unicode_format_arg_format(struct unicode_formatter_t *ctx,
+                          struct unicode_format_arg_t *arg,
+                          PyObject **p_str)
+{
+    PyObject *v;
+    _PyUnicodeWriter *writer = &ctx->writer;
 
-                    if (PyLong_Check(v)) {
-                        iobj = v;
-                        Py_INCREF(iobj);
-                    }
-                    else {
-                        iobj = PyNumber_Long(v);
-                    }
-                    if (iobj!=NULL) {
-                        if (PyLong_Check(iobj)) {
-                            isnumok = 1;
-                            sign = 1;
-                            temp = formatlong(iobj, flags, prec, (c == 'i'? 'd': c));
-                            Py_DECREF(iobj);
-                        }
-                        else {
-                            Py_DECREF(iobj);
-                        }
-                    }
-                }
-                if (!isnumok) {
-                    PyErr_Format(PyExc_TypeError,
-                                 "%%%c format: a number is required, "
-                                 "not %.200s", (char)c, Py_TYPE(v)->tp_name);
-                    goto onError;
-                }
-                if (flags & F_ZERO)
-                    fill = '0';
-                break;
+    if (ctx->fmtcnt == 0)
+        ctx->writer.overallocate = 0;
 
-            case 'e':
-            case 'E':
-            case 'f':
-            case 'F':
-            case 'g':
-            case 'G':
-                if (width == -1 && prec == -1
-                    && !(flags & (F_SIGN | F_BLANK)))
-                {
-                    /* Fast path */
-                    if (formatfloat(v, flags, prec, c, NULL, &writer) == -1)
-                        goto onError;
-                    goto nextarg;
-                }
+    if (arg->ch == '%') {
+        if (_PyUnicodeWriter_WriteCharInline(writer, '%') < 0)
+            return -1;
+        return 1;
+    }
 
-                sign = 1;
-                if (flags & F_ZERO)
-                    fill = '0';
-                if (formatfloat(v, flags, prec, c, &temp, NULL) == -1)
-                    temp = NULL;
-                break;
+    v = unicode_format_getnextarg(ctx);
+    if (v == NULL)
+        return -1;
 
-            case 'c':
-            {
-                Py_UCS4 ch = formatchar(v);
-                if (ch == (Py_UCS4) -1)
-                    goto onError;
-                if (width == -1 && prec == -1) {
-                    /* Fast path */
-                    if (_PyUnicodeWriter_Prepare(&writer, 1, ch) == -1)
-                        goto onError;
-                    PyUnicode_WRITE(writer.kind, writer.data, writer.pos, ch);
-                    writer.pos += 1;
-                    goto nextarg;
-                }
-                temp = PyUnicode_FromOrdinal(ch);
-                break;
-            }
 
-            default:
-                PyErr_Format(PyExc_ValueError,
-                             "unsupported format character '%c' (0x%x) "
-                             "at index %zd",
-                             (31<=c && c<=126) ? (char)c : '?',
-                             (int)c,
-                             fmtpos - 1);
-                goto onError;
-            }
-            if (temp == NULL)
-                goto onError;
-            assert (PyUnicode_Check(temp));
+    switch (arg->ch) {
+    case 's':
+    case 'r':
+    case 'a':
+        if (PyLong_CheckExact(v) && arg->width == -1 && arg->prec == -1) {
+            /* Fast path */
+            if (_PyLong_FormatWriter(writer, v, 10, arg->flags & F_ALT) == -1)
+                return -1;
+            return 1;
+        }
 
-            if (width == -1 && prec == -1
-                && !(flags & (F_SIGN | F_BLANK)))
-            {
-                /* Fast path */
-                if (_PyUnicodeWriter_WriteStr(&writer, temp) == -1)
-                    goto onError;
-                goto nextarg;
-            }
+        if (PyUnicode_CheckExact(v) && arg->ch == 's') {
+            *p_str = v;
+            Py_INCREF(*p_str);
+        }
+        else {
+            if (arg->ch == 's')
+                *p_str = PyObject_Str(v);
+            else if (arg->ch == 'r')
+                *p_str = PyObject_Repr(v);
+            else
+                *p_str = PyObject_ASCII(v);
+        }
+        break;
 
-            if (PyUnicode_READY(temp) == -1) {
-                Py_CLEAR(temp);
-                goto onError;
-            }
-            kind = PyUnicode_KIND(temp);
-            pbuf = PyUnicode_DATA(temp);
-            len = PyUnicode_GET_LENGTH(temp);
+    case 'i':
+    case 'd':
+    case 'u':
+    case 'o':
+    case 'x':
+    case 'X':
+    {
+        int ret = mainformatlong(v, arg, p_str, writer);
+        if (ret != 0)
+            return ret;
+        arg->sign = 1;
+        break;
+    }
 
-            if (c == 's' || c == 'r' || c == 'a') {
-                if (prec >= 0 && len > prec)
-                    len = prec;
-            }
+    case 'e':
+    case 'E':
+    case 'f':
+    case 'F':
+    case 'g':
+    case 'G':
+        if (arg->width == -1 && arg->prec == -1
+            && !(arg->flags & (F_SIGN | F_BLANK)))
+        {
+            /* Fast path */
+            if (formatfloat(v, arg, NULL, writer) == -1)
+                return -1;
+            return 1;
+        }
 
-            /* pbuf is initialized here. */
-            pindex = 0;
-            if (sign) {
-                Py_UCS4 ch = PyUnicode_READ(kind, pbuf, pindex);
-                if (ch == '-' || ch == '+') {
-                    signchar = ch;
-                    len--;
-                    pindex++;
-                }
-                else if (flags & F_SIGN)
-                    signchar = '+';
-                else if (flags & F_BLANK)
-                    signchar = ' ';
-                else
-                    sign = 0;
-            }
-            if (width < len)
-                width = len;
-
-            /* Compute the length and maximum character of the
-               written characters */
-            bufmaxchar = 127;
-            if (!(flags & F_LJUST)) {
-                if (sign) {
-                    if ((width-1) > len)
-                        bufmaxchar = Py_MAX(bufmaxchar, fill);
-                }
-                else {
-                    if (width > len)
-                        bufmaxchar = Py_MAX(bufmaxchar, fill);
-                }
-            }
-            maxchar = _PyUnicode_FindMaxChar(temp, 0, pindex+len);
-            bufmaxchar = Py_MAX(bufmaxchar, maxchar);
+        arg->sign = 1;
+        if (formatfloat(v, arg, p_str, NULL) == -1)
+            return -1;
+        break;
 
-            buflen = width;
-            if (sign && len == width)
-                buflen++;
+    case 'c':
+    {
+        Py_UCS4 ch = formatchar(v);
+        if (ch == (Py_UCS4) -1)
+            return -1;
+        if (arg->width == -1 && arg->prec == -1) {
+            /* Fast path */
+            if (_PyUnicodeWriter_WriteCharInline(writer, ch) < 0)
+                return -1;
+            return 1;
+        }
+        *p_str = PyUnicode_FromOrdinal(ch);
+        break;
+    }
 
-            if (_PyUnicodeWriter_Prepare(&writer, buflen, bufmaxchar) == -1)
-                goto onError;
+    default:
+        PyErr_Format(PyExc_ValueError,
+                     "unsupported format character '%c' (0x%x) "
+                     "at index %zd",
+                     (31<=arg->ch && arg->ch<=126) ? (char)arg->ch : '?',
+                     (int)arg->ch,
+                     ctx->fmtpos - 1);
+        return -1;
+    }
+    if (*p_str == NULL)
+        return -1;
+    assert (PyUnicode_Check(*p_str));
+    return 0;
+}
 
-            /* Write characters */
-            if (sign) {
-                if (fill != ' ') {
-                    PyUnicode_WRITE(writer.kind, writer.data, writer.pos, signchar);
-                    writer.pos += 1;
-                }
-                if (width > len)
-                    width--;
-            }
-            if ((flags & F_ALT) && (c == 'x' || c == 'X' || c == 'o')) {
-                assert(PyUnicode_READ(kind, pbuf, pindex) == '0');
-                assert(PyUnicode_READ(kind, pbuf, pindex + 1) == c);
-                if (fill != ' ') {
-                    PyUnicode_WRITE(writer.kind, writer.data, writer.pos, '0');
-                    PyUnicode_WRITE(writer.kind, writer.data, writer.pos+1, c);
-                    writer.pos += 2;
-                    pindex += 2;
-                }
-                width -= 2;
-                if (width < 0)
-                    width = 0;
-                len -= 2;
-            }
-            if (width > len && !(flags & F_LJUST)) {
-                sublen = width - len;
-                FILL(writer.kind, writer.data, fill, writer.pos, sublen);
-                writer.pos += sublen;
-                width = len;
-            }
-            if (fill == ' ') {
-                if (sign) {
-                    PyUnicode_WRITE(writer.kind, writer.data, writer.pos, signchar);
-                    writer.pos += 1;
-                }
-                if ((flags & F_ALT) && (c == 'x' || c == 'X' || c == 'o')) {
-                    assert(PyUnicode_READ(kind, pbuf, pindex) == '0');
-                    assert(PyUnicode_READ(kind, pbuf, pindex+1) == c);
-                    PyUnicode_WRITE(writer.kind, writer.data, writer.pos, '0');
-                    PyUnicode_WRITE(writer.kind, writer.data, writer.pos+1, c);
-                    writer.pos += 2;
-                    pindex += 2;
-                }
-            }
+static int
+unicode_format_arg_output(struct unicode_formatter_t *ctx,
+                          struct unicode_format_arg_t *arg,
+                          PyObject *str)
+{
+    Py_ssize_t len;
+    enum PyUnicode_Kind kind;
+    void *pbuf;
+    Py_ssize_t pindex;
+    Py_UCS4 signchar;
+    Py_ssize_t buflen;
+    Py_UCS4 maxchar;
+    Py_ssize_t sublen;
+    _PyUnicodeWriter *writer = &ctx->writer;
+    Py_UCS4 fill;
+
+    fill = ' ';
+    if (arg->sign && arg->flags & F_ZERO)
+        fill = '0';
+
+    if (PyUnicode_READY(str) == -1)
+        return -1;
+
+    len = PyUnicode_GET_LENGTH(str);
+    if ((arg->width == -1 || arg->width <= len)
+        && (arg->prec == -1 || arg->prec >= len)
+        && !(arg->flags & (F_SIGN | F_BLANK)))
+    {
+        /* Fast path */
+        if (_PyUnicodeWriter_WriteStr(writer, str) == -1)
+            return -1;
+        return 0;
+    }
 
-            if (len) {
-                _PyUnicode_FastCopyCharacters(writer.buffer, writer.pos,
-                                              temp, pindex, len);
-                writer.pos += len;
+    /* Truncate the string for "s", "r" and "a" formats
+       if the precision is set */
+    if (arg->ch == 's' || arg->ch == 'r' || arg->ch == 'a') {
+        if (arg->prec >= 0 && len > arg->prec)
+            len = arg->prec;
+    }
+
+    /* Adjust sign and width */
+    kind = PyUnicode_KIND(str);
+    pbuf = PyUnicode_DATA(str);
+    pindex = 0;
+    signchar = '\0';
+    if (arg->sign) {
+        Py_UCS4 ch = PyUnicode_READ(kind, pbuf, pindex);
+        if (ch == '-' || ch == '+') {
+            signchar = ch;
+            len--;
+            pindex++;
+        }
+        else if (arg->flags & F_SIGN)
+            signchar = '+';
+        else if (arg->flags & F_BLANK)
+            signchar = ' ';
+        else
+            arg->sign = 0;
+    }
+    if (arg->width < len)
+        arg->width = len;
+
+    /* Prepare the writer */
+    maxchar = writer->maxchar;
+    if (!(arg->flags & F_LJUST)) {
+        if (arg->sign) {
+            if ((arg->width-1) > len)
+                maxchar = Py_MAX(maxchar, fill);
+        }
+        else {
+            if (arg->width > len)
+                maxchar = Py_MAX(maxchar, fill);
+        }
+    }
+    if (PyUnicode_MAX_CHAR_VALUE(str) > maxchar) {
+        Py_UCS4 strmaxchar = _PyUnicode_FindMaxChar(str, 0, pindex+len);
+        maxchar = Py_MAX(maxchar, strmaxchar);
+    }
+
+    buflen = arg->width;
+    if (arg->sign && len == arg->width)
+        buflen++;
+    if (_PyUnicodeWriter_Prepare(writer, buflen, maxchar) == -1)
+        return -1;
+
+    /* Write the sign if needed */
+    if (arg->sign) {
+        if (fill != ' ') {
+            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, signchar);
+            writer->pos += 1;
+        }
+        if (arg->width > len)
+            arg->width--;
+    }
+
+    /* Write the numeric prefix for "x", "X" and "o" formats
+       if the alternate form is used.
+       For example, write "0x" for the "%#x" format. */
+    if ((arg->flags & F_ALT) && (arg->ch == 'x' || arg->ch == 'X' || arg->ch == 'o')) {
+        assert(PyUnicode_READ(kind, pbuf, pindex) == '0');
+        assert(PyUnicode_READ(kind, pbuf, pindex + 1) == arg->ch);
+        if (fill != ' ') {
+            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, '0');
+            PyUnicode_WRITE(writer->kind, writer->data, writer->pos+1, arg->ch);
+            writer->pos += 2;
+            pindex += 2;
+        }
+        arg->width -= 2;
+        if (arg->width < 0)
+            arg->width = 0;
+        len -= 2;
+    }
+
+    /* Pad left with the fill character if needed */
+    if (arg->width > len && !(arg->flags & F_LJUST)) {
+        sublen = arg->width - len;
+        FILL(writer->kind, writer->data, fill, writer->pos, sublen);
+        writer->pos += sublen;
+        arg->width = len;
+    }
+
+    /* If padding with spaces: write sign if needed and/or numeric prefix if
+       the alternate form is used */
+    if (fill == ' ') {
+        if (arg->sign) {
+            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, signchar);
+            writer->pos += 1;
+        }
+        if ((arg->flags & F_ALT) && (arg->ch == 'x' || arg->ch == 'X' || arg->ch == 'o')) {
+            assert(PyUnicode_READ(kind, pbuf, pindex) == '0');
+            assert(PyUnicode_READ(kind, pbuf, pindex+1) == arg->ch);
+            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, '0');
+            PyUnicode_WRITE(writer->kind, writer->data, writer->pos+1, arg->ch);
+            writer->pos += 2;
+            pindex += 2;
+        }
+    }
+
+    /* Write characters */
+    if (len) {
+        _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
+                                      str, pindex, len);
+        writer->pos += len;
+    }
+
+    /* Pad right with the fill character if needed */
+    if (arg->width > len) {
+        sublen = arg->width - len;
+        FILL(writer->kind, writer->data, ' ', writer->pos, sublen);
+        writer->pos += sublen;
+    }
+    return 0;
+}
+
+/* Helper of PyUnicode_Format(): format one arg.
+   Return 0 on success, raise an exception and return -1 on error. */
+static int
+unicode_format_arg(struct unicode_formatter_t *ctx)
+{
+    struct unicode_format_arg_t arg;
+    PyObject *str;
+    int ret;
+
+    arg.ch = PyUnicode_READ(ctx->fmtkind, ctx->fmtdata, ctx->fmtpos);
+    arg.flags = 0;
+    arg.width = -1;
+    arg.prec = -1;
+    arg.sign = 0;
+    str = NULL;
+
+    ret = unicode_format_arg_parse(ctx, &arg);
+    if (ret == -1)
+        return -1;
+
+    ret = unicode_format_arg_format(ctx, &arg, &str);
+    if (ret == -1)
+        return -1;
+
+    if (ret != 1) {
+        ret = unicode_format_arg_output(ctx, &arg, str);
+        Py_DECREF(str);
+        if (ret == -1)
+            return -1;
+    }
+
+    if (ctx->dict && (ctx->argidx < ctx->arglen) && arg.ch != '%') {
+        PyErr_SetString(PyExc_TypeError,
+                        "not all arguments converted during string formatting");
+        return -1;
+    }
+    return 0;
+}
+
+PyObject *
+PyUnicode_Format(PyObject *format, PyObject *args)
+{
+    struct unicode_formatter_t ctx;
+
+    if (format == NULL || args == NULL) {
+        PyErr_BadInternalCall();
+        return NULL;
+    }
+
+    ctx.fmtstr = PyUnicode_FromObject(format);
+    if (ctx.fmtstr == NULL)
+        return NULL;
+    if (PyUnicode_READY(ctx.fmtstr) == -1) {
+        Py_DECREF(ctx.fmtstr);
+        return NULL;
+    }
+    ctx.fmtdata = PyUnicode_DATA(ctx.fmtstr);
+    ctx.fmtkind = PyUnicode_KIND(ctx.fmtstr);
+    ctx.fmtcnt = PyUnicode_GET_LENGTH(ctx.fmtstr);
+    ctx.fmtpos = 0;
+
+    _PyUnicodeWriter_Init(&ctx.writer);
+    ctx.writer.min_length = ctx.fmtcnt + 100;
+    ctx.writer.overallocate = 1;
+
+    if (PyTuple_Check(args)) {
+        ctx.arglen = PyTuple_Size(args);
+        ctx.argidx = 0;
+    }
+    else {
+        ctx.arglen = -1;
+        ctx.argidx = -2;
+    }
+    ctx.args_owned = 0;
+    if (PyMapping_Check(args) && !PyTuple_Check(args) && !PyUnicode_Check(args))
+        ctx.dict = args;
+    else
+        ctx.dict = NULL;
+    ctx.args = args;
+
+    while (--ctx.fmtcnt >= 0) {
+        if (PyUnicode_READ(ctx.fmtkind, ctx.fmtdata, ctx.fmtpos) != '%') {
+            Py_ssize_t nonfmtpos;
+
+            nonfmtpos = ctx.fmtpos++;
+            while (ctx.fmtcnt >= 0 &&
+                   PyUnicode_READ(ctx.fmtkind, ctx.fmtdata, ctx.fmtpos) != '%') {
+                ctx.fmtpos++;
+                ctx.fmtcnt--;
             }
-            if (width > len) {
-                sublen = width - len;
-                FILL(writer.kind, writer.data, ' ', writer.pos, sublen);
-                writer.pos += sublen;
+            if (ctx.fmtcnt < 0) {
+                ctx.fmtpos--;
+                ctx.writer.overallocate = 0;
             }
 
-nextarg:
-            if (dict && (argidx < arglen) && c != '%') {
-                PyErr_SetString(PyExc_TypeError,
-                                "not all arguments converted during string formatting");
+            if (_PyUnicodeWriter_WriteSubstring(&ctx.writer, ctx.fmtstr,
+                                                nonfmtpos, ctx.fmtpos) < 0)
                 goto onError;
-            }
-            Py_CLEAR(temp);
-        } /* '%' */
-    } /* until end */
-    if (argidx < arglen && !dict) {
+        }
+        else {
+            ctx.fmtpos++;
+            if (unicode_format_arg(&ctx) == -1)
+                goto onError;
+        }
+    }
+
+    if (ctx.argidx < ctx.arglen && !ctx.dict) {
         PyErr_SetString(PyExc_TypeError,
                         "not all arguments converted during string formatting");
         goto onError;
     }
 
-    if (args_owned) {
-        Py_DECREF(args);
+    if (ctx.args_owned) {
+        Py_DECREF(ctx.args);
     }
-    Py_DECREF(uformat);
-    Py_XDECREF(temp);
-    Py_XDECREF(second);
-    return _PyUnicodeWriter_Finish(&writer);
+    Py_DECREF(ctx.fmtstr);
+    return _PyUnicodeWriter_Finish(&ctx.writer);
 
   onError:
-    Py_DECREF(uformat);
-    Py_XDECREF(temp);
-    Py_XDECREF(second);
-    _PyUnicodeWriter_Dealloc(&writer);
-    if (args_owned) {
-        Py_DECREF(args);
+    Py_DECREF(ctx.fmtstr);
+    _PyUnicodeWriter_Dealloc(&ctx.writer);
+    if (ctx.args_owned) {
+        Py_DECREF(ctx.args);
     }
     return NULL;
 }
@@ -14209,7 +14984,7 @@ _PyUnicode_Fini(void)
 void
 PyUnicode_InternInPlace(PyObject **p)
 {
-    register PyObject *s = *p;
+    PyObject *s = *p;
     PyObject *t;
 #ifdef Py_DEBUG
     assert(s != NULL);
@@ -14238,12 +15013,12 @@ PyUnicode_InternInPlace(PyObject **p)
     t = PyDict_GetItem(interned, s);
     Py_END_ALLOW_RECURSION
 
-        if (t) {
-            Py_INCREF(t);
-            Py_DECREF(*p);
-            *p = t;
-            return;
-        }
+    if (t) {
+        Py_INCREF(t);
+        Py_DECREF(*p);
+        *p = t;
+        return;
+    }
 
     PyThreadState_GET()->recursion_critical = 1;
     if (PyDict_SetItem(interned, s, s) < 0) {
@@ -14542,7 +15317,7 @@ Py_UNICODE_strcmp(const Py_UNICODE *s1, const Py_UNICODE *s2)
 int
 Py_UNICODE_strncmp(const Py_UNICODE *s1, const Py_UNICODE *s2, size_t n)
 {
-    register Py_UNICODE u1, u2;
+    Py_UNICODE u1, u2;
     for (; n != 0; n--) {
         u1 = *s1;
         u2 = *s2;