11 files changed, 175 insertions, 268 deletions
diff --git a/numpy/core/defchararray.py b/numpy/core/defchararray.py
index 942a698a9..b22d6b85e 100644
--- a/numpy/core/defchararray.py
+++ b/numpy/core/defchararray.py
@@ -2679,25 +2679,6 @@ def array(obj, itemsize=None, copy=True, unicode=None, order=None):
             itemsize = len(obj)
         shape = len(obj) // itemsize
 
-        if unicode:
-            if sys.maxunicode == 0xffff:
-                # On a narrow Python build, the buffer for Unicode
-                # strings is UCS2, which doesn't match the buffer for
-                # NumPy Unicode types, which is ALWAYS UCS4.
-                # Therefore, we need to convert the buffer.  On Python
-                # 2.6 and later, we can use the utf_32 codec.  Earlier
-                # versions don't have that codec, so we convert to a
-                # numerical array that matches the input buffer, and
-                # then use NumPy to convert it to UCS4.  All of this
-                # should happen in native endianness.
-                obj = obj.encode('utf_32')
-            else:
-                obj = str(obj)
-        else:
-            # Let the default Unicode -> string encoding (if any) take
-            # precedence.
-            obj = bytes(obj)
-
         return chararray(shape, itemsize=itemsize, unicode=unicode,
                          buffer=obj, order=order)
 
diff --git a/numpy/core/include/numpy/arrayscalars.h b/numpy/core/include/numpy/arrayscalars.h
index 64450e713..42a0df76a 100644
--- a/numpy/core/include/numpy/arrayscalars.h
+++ b/numpy/core/include/numpy/arrayscalars.h
@@ -135,7 +135,13 @@ typedef struct {
 } PyScalarObject;
 
 #define PyStringScalarObject PyStringObject
-#define PyUnicodeScalarObject PyUnicodeObject
+#define PyStringScalarObject PyStringObject
+typedef struct {
+        /* note that the PyObject_HEAD macro lives right here */
+        PyUnicodeObject base;
+        Py_UCS4 *obval;
+} PyUnicodeScalarObject;
+
 
 typedef struct {
         PyObject_VAR_HEAD
diff --git a/numpy/core/src/common/ucsnarrow.c b/numpy/core/src/common/ucsnarrow.c
index 946a72257..3ef5d6878 100644
--- a/numpy/core/src/common/ucsnarrow.c
+++ b/numpy/core/src/common/ucsnarrow.c
@@ -16,76 +16,12 @@
 #include "ctors.h"
 
 /*
- * Functions only needed on narrow builds of Python for converting back and
- * forth between the NumPy Unicode data-type (always 4-bytes) and the
- * Python Unicode scalar (2-bytes on a narrow build).
- */
-
-/*
- * The ucs2 buffer must be large enough to hold 2*ucs4length characters
- * due to the use of surrogate pairs.
+ * This file originally contained functions only needed on narrow builds of
+ * Python for converting back and forth between the NumPy Unicode data-type
+ * (always 4-bytes) and the Python Unicode scalar (2-bytes on a narrow build).
  *
- * The return value is the number of ucs2 bytes used-up which
- * is ucs4length + number of surrogate pairs found.
- *
- * Values above 0xffff are converted to surrogate pairs.
+ * This "narrow" interface is now deprecated in python and unused in NumPy.
  */
-NPY_NO_EXPORT int
-PyUCS2Buffer_FromUCS4(Py_UNICODE *ucs2, npy_ucs4 const *ucs4, int ucs4length)
-{
-    int i;
-    int numucs2 = 0;
-    npy_ucs4 chr;
-    for (i = 0; i < ucs4length; i++) {
-        chr = *ucs4++;
-        if (chr > 0xffff) {
-            numucs2++;
-            chr -= 0x10000L;
-            *ucs2++ = 0xD800 + (Py_UNICODE) (chr >> 10);
-            *ucs2++ = 0xDC00 + (Py_UNICODE) (chr & 0x03FF);
-        }
-        else {
-            *ucs2++ = (Py_UNICODE) chr;
-        }
-        numucs2++;
-    }
-    return numucs2;
-}
-
-
-/*
- * This converts a UCS2 buffer of the given length to UCS4 buffer.
- * It converts up to ucs4len characters of UCS2
- *
- * It returns the number of characters converted which can
- * be less than ucs2len if there are surrogate pairs in ucs2.
- *
- * The return value is the actual size of the used part of the ucs4 buffer.
- */
-NPY_NO_EXPORT int
-PyUCS2Buffer_AsUCS4(Py_UNICODE const *ucs2, npy_ucs4 *ucs4, int ucs2len, int ucs4len)
-{
-    int i;
-    npy_ucs4 chr;
-    Py_UNICODE ch;
-    int numchars=0;
-
-    for (i = 0; (i < ucs2len) && (numchars < ucs4len); i++) {
-        ch = *ucs2++;
-        if (ch >= 0xd800 && ch <= 0xdfff) {
-            /* surrogate pair */
-            chr = ((npy_ucs4)(ch-0xd800)) << 10;
-            chr += *ucs2++ + 0x2400;  /* -0xdc00 + 0x10000 */
-            i++;
-        }
-        else {
-            chr = (npy_ucs4) ch;
-        }
-        *ucs4++ = chr;
-        numchars++;
-    }
-    return numchars;
-}
 
 /*
  * Returns a PyUnicodeObject initialized from a buffer containing
@@ -112,14 +48,13 @@ PyUnicode_FromUCS4(char const *src_char, Py_ssize_t size, int swap, int align)
     Py_ssize_t ucs4len = size / sizeof(npy_ucs4);
     npy_ucs4 const *src = (npy_ucs4 const *)src_char;
     npy_ucs4 *buf = NULL;
-    PyUnicodeObject *ret;
 
     /* swap and align if needed */
     if (swap || align) {
         buf = (npy_ucs4 *)malloc(size);
         if (buf == NULL) {
             PyErr_NoMemory();
-            goto fail;
+            return NULL;
         }
         memcpy(buf, src, size);
         if (swap) {
@@ -132,43 +67,8 @@ PyUnicode_FromUCS4(char const *src_char, Py_ssize_t size, int swap, int align)
     while (ucs4len > 0 && src[ucs4len - 1] == 0) {
         ucs4len--;
     }
-
-    /* produce PyUnicode object */
-#ifdef Py_UNICODE_WIDE
-    {
-        ret = (PyUnicodeObject *)PyUnicode_FromUnicode((Py_UNICODE const*)src,
-                                                       (Py_ssize_t) ucs4len);
-        if (ret == NULL) {
-            goto fail;
-        }
-    }
-#else
-    {
-        Py_ssize_t tmpsiz = 2 * sizeof(Py_UNICODE) * ucs4len;
-        Py_ssize_t ucs2len;
-        Py_UNICODE *tmp;
-
-        if ((tmp = (Py_UNICODE *)malloc(tmpsiz)) == NULL) {
-            PyErr_NoMemory();
-            goto fail;
-        }
-        ucs2len = PyUCS2Buffer_FromUCS4(tmp, src, ucs4len);
-        ret = (PyUnicodeObject *)PyUnicode_FromUnicode(tmp, (Py_ssize_t) ucs2len);
-        free(tmp);
-        if (ret == NULL) {
-            goto fail;
-        }
-    }
-#endif
-
-    if (buf) {
-        free(buf);
-    }
+    PyUnicodeObject *ret = (PyUnicodeObject *)PyUnicode_FromKindAndData(
+        PyUnicode_4BYTE_KIND, src, ucs4len);
+    free(buf);
     return ret;
-
-fail:
-    if (buf) {
-        free(buf);
-    }
-    return NULL;
 }
diff --git a/numpy/core/src/common/ucsnarrow.h b/numpy/core/src/common/ucsnarrow.h
index fe31a5e25..c811e1f2c 100644
--- a/numpy/core/src/common/ucsnarrow.h
+++ b/numpy/core/src/common/ucsnarrow.h
@@ -1,12 +1,6 @@
 #ifndef _NPY_UCSNARROW_H_
 #define _NPY_UCSNARROW_H_
 
-NPY_NO_EXPORT int
-PyUCS2Buffer_FromUCS4(Py_UNICODE *ucs2, npy_ucs4 *ucs4, int ucs4length);
-
-NPY_NO_EXPORT int
-PyUCS2Buffer_AsUCS4(Py_UNICODE *ucs2, npy_ucs4 *ucs4, int ucs2len, int ucs4len);
-
 NPY_NO_EXPORT PyUnicodeObject *
 PyUnicode_FromUCS4(char *src, Py_ssize_t size, int swap, int align);
 
diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src
index ce288d62e..c16e0f311 100644
--- a/numpy/core/src/multiarray/arraytypes.c.src
+++ b/numpy/core/src/multiarray/arraytypes.c.src
@@ -450,12 +450,6 @@ static int
 UNICODE_setitem(PyObject *op, void *ov, void *vap)
 {
     PyArrayObject *ap = vap;
-    PyObject *temp;
-    Py_UNICODE *ptr;
-    int datalen;
-#ifndef Py_UNICODE_WIDE
-    char *buffer;
-#endif
 
     if (PyArray_IsZeroDim(op)) {
         return convert_to_scalar_and_retry(op, ov, vap, UNICODE_setitem);
@@ -466,6 +460,8 @@ UNICODE_setitem(PyObject *op, void *ov, void *vap)
                 "setting an array element with a sequence");
         return -1;
     }
+
+    PyObject *temp;
     if (PyBytes_Check(op)) {
         /* Try to decode from ASCII */
         temp = PyUnicode_FromEncodedObject(op, "ASCII", "strict");
@@ -476,18 +472,27 @@ UNICODE_setitem(PyObject *op, void *ov, void *vap)
     else if ((temp=PyObject_Str(op)) == NULL) {
         return -1;
     }
-    ptr = PyUnicode_AS_UNICODE(temp);
-    if ((ptr == NULL) || (PyErr_Occurred())) {
+
+    /* truncate if needed */
+    Py_ssize_t max_len = PyArray_DESCR(ap)->elsize >> 2;
+    Py_ssize_t actual_len = PyUnicode_GetLength(temp);
+    if (actual_len < 0) {
         Py_DECREF(temp);
         return -1;
     }
-    datalen = PyUnicode_GET_DATA_SIZE(temp);
+    if (actual_len > max_len) {
+        Py_SETREF(temp, PyUnicode_Substring(temp, 0, max_len));
+        if (temp == NULL) {
+            return -1;
+        }
+        actual_len = max_len;
+    }
 
-#ifdef Py_UNICODE_WIDE
-    memcpy(ov, ptr, PyArray_MIN(PyArray_DESCR(ap)->elsize, datalen));
-#else
+    Py_ssize_t num_bytes = actual_len * 4;
+
+    char *buffer;
     if (!PyArray_ISALIGNED(ap)) {
-        buffer = PyArray_malloc(PyArray_DESCR(ap)->elsize);
+        buffer = PyArray_malloc(num_bytes);
         if (buffer == NULL) {
             Py_DECREF(temp);
             PyErr_NoMemory();
@@ -497,20 +502,23 @@ UNICODE_setitem(PyObject *op, void *ov, void *vap)
     else {
         buffer = ov;
     }
-    datalen = PyUCS2Buffer_AsUCS4(ptr, (npy_ucs4 *)buffer,
-            datalen >> 1, PyArray_DESCR(ap)->elsize >> 2);
-    datalen <<= 2;
+    if (PyUnicode_AsUCS4(temp, (Py_UCS4 *)buffer, actual_len, 0) == NULL) {
+        PyArray_free(buffer);
+        Py_DECREF(temp);
+        return -1;
+    }
+
     if (!PyArray_ISALIGNED(ap)) {
-        memcpy(ov, buffer, datalen);
+        memcpy(ov, buffer, num_bytes);
         PyArray_free(buffer);
     }
-#endif
+
     /* Fill in the rest of the space with 0 */
-    if (PyArray_DESCR(ap)->elsize > datalen) {
-        memset((char*)ov + datalen, 0, (PyArray_DESCR(ap)->elsize - datalen));
+    if (PyArray_DESCR(ap)->elsize > num_bytes) {
+        memset((char*)ov + num_bytes, 0, (PyArray_DESCR(ap)->elsize - num_bytes));
     }
     if (PyArray_ISBYTESWAPPED(ap)) {
-        byte_swap_vector(ov, PyArray_DESCR(ap)->elsize >> 2, 4);
+        byte_swap_vector(ov, actual_len, 4);
     }
     Py_DECREF(temp);
     return 0;
@@ -2650,12 +2658,6 @@ STRING_nonzero (char *ip, PyArrayObject *ap)
     return nonz;
 }
 
-#ifdef Py_UNICODE_WIDE
-#define PyArray_UCS4_ISSPACE Py_UNICODE_ISSPACE
-#else
-#define PyArray_UCS4_ISSPACE(ch) Py_STRING_ISSPACE((char)ch)
-#endif
-
 static npy_bool
 UNICODE_nonzero (npy_ucs4 *ip, PyArrayObject *ap)
 {
@@ -2681,7 +2683,7 @@ UNICODE_nonzero (npy_ucs4 *ip, PyArrayObject *ap)
         if (*ip == '\0') {
             seen_null = NPY_TRUE;
         }
-        else if (seen_null || !PyArray_UCS4_ISSPACE(*ip)) {
+        else if (seen_null || !Py_UNICODE_ISSPACE(*ip)) {
             nonz = NPY_TRUE;
             break;
         }
diff --git a/numpy/core/src/multiarray/buffer.c b/numpy/core/src/multiarray/buffer.c
index 576186362..9a1f7b230 100644
--- a/numpy/core/src/multiarray/buffer.c
+++ b/numpy/core/src/multiarray/buffer.c
@@ -832,11 +832,6 @@ gentype_getbuffer(PyObject *self, Py_buffer *view, int flags)
     descr = PyArray_DescrFromScalar(self);
     view->buf = (void *)scalar_value(self, descr);
     elsize = descr->elsize;
-#ifndef Py_UNICODE_WIDE
-    if (descr->type_num == NPY_UNICODE) {
-        elsize >>= 1;
-    }
-#endif
     view->len = elsize;
     if (PyArray_IsScalar(self, Datetime) || PyArray_IsScalar(self, Timedelta)) {
         elsize = 1; /* descr->elsize,char is 8,'M', but we return 1,'B' */
diff --git a/numpy/core/src/multiarray/common.c b/numpy/core/src/multiarray/common.c
index 3ee2cc6c6..0150ae10e 100644
--- a/numpy/core/src/multiarray/common.c
+++ b/numpy/core/src/multiarray/common.c
@@ -130,27 +130,34 @@ PyArray_DTypeFromObjectStringDiscovery(
         PyObject *obj, PyArray_Descr *last_dtype, int string_type)
 {
     int itemsize;
-    PyObject *temp;
 
     if (string_type == NPY_STRING) {
-        if ((temp = PyObject_Str(obj)) == NULL) {
+        PyObject *temp = PyObject_Str(obj);
+        if (temp == NULL) {
             return NULL;
         }
+        /* assume that when we do the encoding elsewhere we'll use ASCII */
         itemsize = PyUnicode_GetLength(temp);
+        Py_DECREF(temp);
+        if (itemsize < 0) {
+            return NULL;
+        }
     }
     else if (string_type == NPY_UNICODE) {
-        if ((temp = PyObject_Str(obj)) == NULL) {
+        PyObject *temp = PyObject_Str(obj);
+        if (temp == NULL) {
             return NULL;
         }
-        itemsize = PyUnicode_GET_DATA_SIZE(temp);
-#ifndef Py_UNICODE_WIDE
-        itemsize <<= 1;
-#endif
+        itemsize = PyUnicode_GetLength(temp);
+        Py_DECREF(temp);
+        if (itemsize < 0) {
+            return NULL;
+        }
+        itemsize *= 4;  /* convert UCS4 codepoints to bytes */
     }
     else {
         return NULL;
     }
-    Py_DECREF(temp);
     if (last_dtype != NULL &&
             last_dtype->type_num == string_type &&
             last_dtype->elsize >= itemsize) {
@@ -258,10 +265,11 @@ PyArray_DTypeFromObjectHelper(PyObject *obj, int maxdims,
 
     /* Check if it's a Unicode string */
     if (PyUnicode_Check(obj)) {
-        int itemsize = PyUnicode_GET_DATA_SIZE(obj);
-#ifndef Py_UNICODE_WIDE
-        itemsize <<= 1;
-#endif
+        int itemsize = PyUnicode_GetLength(obj);
+        if (itemsize < 0) {
+            goto fail;
+        }
+        itemsize *= 4;
 
         /*
          * If it's already a big enough unicode object,
diff --git a/numpy/core/src/multiarray/scalarapi.c b/numpy/core/src/multiarray/scalarapi.c
index 5c4332364..6d3276e18 100644
--- a/numpy/core/src/multiarray/scalarapi.c
+++ b/numpy/core/src/multiarray/scalarapi.c
@@ -71,7 +71,16 @@ scalar_value(PyObject *scalar, PyArray_Descr *descr)
         case NPY_STRING:
             return (void *)PyString_AS_STRING(scalar);
         case NPY_UNICODE:
-            return (void *)PyUnicode_AS_DATA(scalar);
+            /* lazy initialization, to reduce the memory used by string scalars */
+            if (PyArrayScalar_VAL(scalar, Unicode) == NULL) {
+                Py_UCS4 *raw_data = PyUnicode_AsUCS4Copy(scalar);
+                if (raw_data == NULL) {
+                    return NULL;
+                }
+                PyArrayScalar_VAL(scalar, Unicode) = raw_data;
+                return (void *)raw_data;
+            }
+            return PyArrayScalar_VAL(scalar, Unicode);
         case NPY_VOID:
             /* Note: no & needed here, so can't use CASE */
             return PyArrayScalar_VAL(scalar, Void);
@@ -319,21 +328,10 @@ PyArray_FromScalar(PyObject *scalar, PyArray_Descr *outcode)
 
     memptr = scalar_value(scalar, typecode);
 
-#ifndef Py_UNICODE_WIDE
-    if (typecode->type_num == NPY_UNICODE) {
-        PyUCS2Buffer_AsUCS4((Py_UNICODE *)memptr,
-                (npy_ucs4 *)PyArray_DATA(r),
-                PyUnicode_GET_SIZE(scalar),
-                PyArray_ITEMSIZE(r) >> 2);
-    }
-    else
-#endif
-    {
-        memcpy(PyArray_DATA(r), memptr, PyArray_ITEMSIZE(r));
-        if (PyDataType_FLAGCHK(typecode, NPY_ITEM_HASOBJECT)) {
-            /* Need to INCREF just the PyObject portion */
-            PyArray_Item_INCREF(memptr, typecode);
-        }
+    memcpy(PyArray_DATA(r), memptr, PyArray_ITEMSIZE(r));
+    if (PyDataType_FLAGCHK(typecode, NPY_ITEM_HASOBJECT)) {
+        /* Need to INCREF just the PyObject portion */
+        PyArray_Item_INCREF(memptr, typecode);
     }
 
 finish:
@@ -568,10 +566,7 @@ PyArray_DescrFromScalar(PyObject *sc)
             descr->elsize = PyString_GET_SIZE(sc);
         }
         else if (type_num == NPY_UNICODE) {
-            descr->elsize = PyUnicode_GET_DATA_SIZE(sc);
-#ifndef Py_UNICODE_WIDE
-            descr->elsize <<= 1;
-#endif
+            descr->elsize = PyUnicode_GET_LENGTH(sc) * 4;
         }
         else {
             PyArray_Descr *dtype;
@@ -654,23 +649,30 @@ PyArray_Scalar(void *data, PyArray_Descr *descr, PyObject *base)
         }
     }
     if (type_num == NPY_UNICODE) {
-        PyObject *u, *args;
-        int byteorder;
-
-#if NPY_BYTE_ORDER == NPY_LITTLE_ENDIAN
-        byteorder = -1;
-#elif NPY_BYTE_ORDER == NPY_BIG_ENDIAN
-        byteorder = +1;
-#else
-        #error Endianness undefined ?
-#endif
-        if (swap) byteorder *= -1;
-
-        u = PyUnicode_DecodeUTF32(data, itemsize, NULL, &byteorder);
+        /* we need the full string length here, else copyswap will write too
+           many bytes */
+        void *buff = PyArray_malloc(descr->elsize);
+        if (buff == NULL) {
+            return PyErr_NoMemory();
+        }
+        /* copyswap needs an array object, but only actually cares about the
+         * dtype
+         */
+        PyArrayObject_fields dummy_arr;
+        if (base == NULL) {
+            dummy_arr.descr = descr;
+            base = (PyObject *)&dummy_arr;
+        }
+        copyswap(buff, data, swap, base);
+
+        /* truncation occurs here */
+        PyObject *u = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buff, itemsize / 4);
+        PyArray_free(buff);
         if (u == NULL) {
             return NULL;
         }
-        args = Py_BuildValue("(O)", u);
+
+        PyObject *args = Py_BuildValue("(O)", u);
         if (args == NULL) {
             Py_DECREF(u);
             return NULL;
diff --git a/numpy/core/src/multiarray/scalartypes.c.src b/numpy/core/src/multiarray/scalartypes.c.src
index 7657e39ee..eafa13ff2 100644
--- a/numpy/core/src/multiarray/scalartypes.c.src
+++ b/numpy/core/src/multiarray/scalartypes.c.src
@@ -345,6 +345,10 @@ format_@name@(@type@ val, npy_bool scientific,
  * over-ride repr and str of array-scalar strings and unicode to
  * remove NULL bytes and then call the corresponding functions
  * of string and unicode.
+ * 
+ * FIXME:
+ *   is this really a good idea?
+ *   stop using Py_UNICODE here.
  */
 
 /**begin repeat
@@ -1094,11 +1098,6 @@ gentype_itemsize_get(PyObject *self)
 
     typecode = PyArray_DescrFromScalar(self);
     elsize = typecode->elsize;
-#ifndef Py_UNICODE_WIDE
-    if (typecode->type_num == NPY_UNICODE) {
-        elsize >>= 1;
-    }
-#endif
     ret = PyInt_FromLong((long) elsize);
     Py_DECREF(typecode);
     return ret;
@@ -1658,12 +1657,7 @@ gentype_reduce(PyObject *self, PyObject *NPY_UNUSED(args))
         return NULL;
     }
 
-    if (PyArray_IsScalar(self, Unicode)) {
-        /* Unicode on Python 3 does not expose the buffer interface */
-        buffer = PyUnicode_AS_DATA(self);
-        buflen = PyUnicode_GET_DATA_SIZE(self);
-    }
-    else if (PyObject_GetBuffer(self, &view, PyBUF_SIMPLE) >= 0) {
+    if (PyObject_GetBuffer(self, &view, PyBUF_SIMPLE) >= 0) {
         buffer = view.buf;
         buflen = view.len;
         /*
@@ -1718,48 +1712,13 @@ gentype_reduce(PyObject *self, PyObject *NPY_UNUSED(args))
         PyTuple_SET_ITEM(ret, 1, tup);
     }
     else {
-#ifndef Py_UNICODE_WIDE
-        /*
-         * We need to expand the buffer so that we always write
-         * UCS4 to disk for pickle of unicode scalars.
-         *
-         * This could be in a unicode_reduce function, but
-         * that would require re-factoring.
-         */
-        int alloc = 0;
-        char *tmp;
-        int newlen;
-
-        if (PyArray_IsScalar(self, Unicode)) {
-            tmp = PyArray_malloc(buflen*2);
-            if (tmp == NULL) {
-                Py_DECREF(ret);
-                return PyErr_NoMemory();
-            }
-            alloc = 1;
-            newlen = PyUCS2Buffer_AsUCS4((Py_UNICODE *)buffer,
-                    (npy_ucs4 *)tmp,
-                    buflen / 2, buflen / 2);
-            buflen = newlen*4;
-            buffer = tmp;
-        }
-#endif
         mod = PyBytes_FromStringAndSize(buffer, buflen);
         if (mod == NULL) {
             Py_DECREF(ret);
-#ifndef Py_UNICODE_WIDE
-            ret = NULL;
-            goto fail;
-#else
             return NULL;
-#endif
         }
         PyTuple_SET_ITEM(ret, 1,
                 Py_BuildValue("NN", obj, mod));
-#ifndef Py_UNICODE_WIDE
-fail:
-        if (alloc) PyArray_free((char *)buffer);
-#endif
     }
     return ret;
 }
@@ -2409,6 +2368,15 @@ object_arrtype_dealloc(PyObject *v)
     Py_TYPE(v)->tp_free(v);
 }
 
+static void
+unicode_arrtype_dealloc(PyObject *v)
+{
+    /* note: may be null if it was never requested */
+    PyMem_Free(PyArrayScalar_VAL(v, Unicode));
+    /* delegate to the base class */
+    PyUnicode_Type.tp_dealloc(v);
+}
+
 /**begin repeat
  * #name = byte, short, int, long, longlong, ubyte, ushort, uint, ulong,
  *         ulonglong, half, float, double, longdouble, cfloat, cdouble,
@@ -2444,6 +2412,9 @@ static PyObject *
         PyErr_Clear();
     }
     else {
+#if defined(_@TYPE@_IS_UNICODE)
+        PyArrayScalar_VAL(from_superclass, Unicode) = NULL;
+#endif
         return from_superclass;
     }
 #endif
@@ -3667,6 +3638,9 @@ initialize_numeric_types(void)
 
     /**end repeat**/
 
+    PyUnicodeArrType_Type.tp_dealloc = unicode_arrtype_dealloc;
+    PyUnicodeArrType_Type.tp_as_buffer = &gentype_as_buffer;
+
     /**begin repeat
      * #name = bool, byte, short, ubyte, ushort, uint, ulong, ulonglong,
      *         half, float, longdouble, cfloat, clongdouble, void, object,
diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py
index ad38911cb..13244f3ba 100644
--- a/numpy/core/tests/test_multiarray.py
+++ b/numpy/core/tests/test_multiarray.py
@@ -7854,6 +7854,34 @@ class TestBytestringArrayNonzero:
         assert_(a)
 
 
+class TestUnicodeEncoding:
+    """
+    Tests for encoding related bugs, such as UCS2 vs UCS4, round-tripping
+    issues, etc
+    """
+    def test_round_trip(self):
+        """ Tests that GETITEM, SETITEM, and PyArray_Scalar roundtrip """
+        # gh-15363
+        arr = np.zeros(shape=(), dtype="U1")
+        for i in range(1, sys.maxunicode + 1):
+            expected = chr(i)
+            arr[()] = expected
+            assert arr[()] == expected
+            assert arr.item() == expected
+
+    def test_assign_scalar(self):
+        # gh-3258
+        l = np.array(['aa', 'bb'])
+        l[:] = np.unicode_('cc')
+        assert_equal(l, ['cc', 'cc'])
+
+    def test_fill_scalar(self):
+        # gh-7227
+        l = np.array(['aa', 'bb'])
+        l.fill(np.unicode_('cc'))
+        assert_equal(l, ['cc', 'cc'])
+
+
 class TestUnicodeArrayNonzero:
 
     def test_empty_ustring_array_is_falsey(self):
diff --git a/numpy/core/tests/test_scalarbuffer.py b/numpy/core/tests/test_scalarbuffer.py
index b8c6dd4aa..b1c1bbbb1 100644
--- a/numpy/core/tests/test_scalarbuffer.py
+++ b/numpy/core/tests/test_scalarbuffer.py
@@ -76,27 +76,44 @@ class TestScalarPEP3118:
         assert_equal(mv_x.itemsize, mv_a.itemsize)
         assert_equal(mv_x.format, mv_a.format)
 
+    def _as_dict(self, m):
+        return dict(strides=m.strides, shape=m.shape, itemsize=m.itemsize,
+                    ndim=m.ndim, format=m.format)
+
     def test_datetime_memoryview(self):
         # gh-11656
         # Values verified with v1.13.3, shape is not () as in test_scalar_dim
-        def as_dict(m):
-            return dict(strides=m.strides, shape=m.shape, itemsize=m.itemsize,
-                        ndim=m.ndim, format=m.format)
 
         dt1 = np.datetime64('2016-01-01')
         dt2 = np.datetime64('2017-01-01')
-        expected = {'strides': (1,), 'itemsize': 1, 'ndim': 1,
-                    'shape': (8,), 'format': 'B'}
+        expected = dict(strides=(1,), itemsize=1, ndim=1, shape=(8,),
+                        format='B')
         v = memoryview(dt1)
-        res = as_dict(v)
-        assert_equal(res, expected)
+        assert self._as_dict(v) == expected
 
         v = memoryview(dt2 - dt1)
-        res = as_dict(v)
-        assert_equal(res, expected)
+        assert self._as_dict(v) == expected
 
         dt = np.dtype([('a', 'uint16'), ('b', 'M8[s]')])
         a = np.empty(1, dt)
         # Fails to create a PEP 3118 valid buffer
         assert_raises((ValueError, BufferError), memoryview, a[0])
 
+    @pytest.mark.parametrize('s', [
+        pytest.param("\x32\x32", id="ascii"),
+        pytest.param("\uFE0F\uFE0F", id="basic multilingual"),
+        pytest.param("\U0001f4bb\U0001f4bb", id="non-BMP"),
+    ])
+    def test_str_ucs4(self, s):
+        s = np.str_(s)  # only our subclass implements the buffer protocol
+
+        # all the same, characters always encode as ucs4
+        expected = dict(strides=(), itemsize=8, ndim=0, shape=(), format='2w')
+
+        v = memoryview(s)
+        assert self._as_dict(v) == expected
+
+        # integers of the paltform-appropriate endianness
+        code_points = np.frombuffer(v, dtype='i4')
+
+        assert_equal(code_points, [ord(c) for c in s])