Merge pull request #15385 from eric-wieser/fix-unicode-ucs2

BUG, MAINT: Stop using the error-prone deprecated Py_UNICODE apis
author: Sebastian Berg <sebastian@sipsolutions.net> 2020-02-13 16:17:15 -0800
committer: GitHub <noreply@github.com> 2020-02-13 16:17:15 -0800
commit: 1f9ab28a9ee5a0cda956da35d0c77a3a4c4bcd55 (patch)
tree: 24e7752be50437d9dcedafcecbebdefb01e53a9f /numpy/core/tests
parent: 491f41acd2c6a0e7886662a2448b3d245ae432a9 (diff)
parent: d0b7b6638fe7496d25a488a179d79879748075fa (diff)
download: numpy-1f9ab28a9ee5a0cda956da35d0c77a3a4c4bcd55.tar.gz
2 files changed, 54 insertions, 9 deletions
diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py
index ad38911cb..13244f3ba 100644
--- a/numpy/core/tests/test_multiarray.py
+++ b/numpy/core/tests/test_multiarray.py
@@ -7854,6 +7854,34 @@ class TestBytestringArrayNonzero:
         assert_(a)
 
 
+class TestUnicodeEncoding:
+    """
+    Tests for encoding related bugs, such as UCS2 vs UCS4, round-tripping
+    issues, etc
+    """
+    def test_round_trip(self):
+        """ Tests that GETITEM, SETITEM, and PyArray_Scalar roundtrip """
+        # gh-15363
+        arr = np.zeros(shape=(), dtype="U1")
+        for i in range(1, sys.maxunicode + 1):
+            expected = chr(i)
+            arr[()] = expected
+            assert arr[()] == expected
+            assert arr.item() == expected
+
+    def test_assign_scalar(self):
+        # gh-3258
+        l = np.array(['aa', 'bb'])
+        l[:] = np.unicode_('cc')
+        assert_equal(l, ['cc', 'cc'])
+
+    def test_fill_scalar(self):
+        # gh-7227
+        l = np.array(['aa', 'bb'])
+        l.fill(np.unicode_('cc'))
+        assert_equal(l, ['cc', 'cc'])
+
+
 class TestUnicodeArrayNonzero:
 
     def test_empty_ustring_array_is_falsey(self):
diff --git a/numpy/core/tests/test_scalarbuffer.py b/numpy/core/tests/test_scalarbuffer.py
index b8c6dd4aa..b1c1bbbb1 100644
--- a/numpy/core/tests/test_scalarbuffer.py
+++ b/numpy/core/tests/test_scalarbuffer.py
@@ -76,27 +76,44 @@ class TestScalarPEP3118:
         assert_equal(mv_x.itemsize, mv_a.itemsize)
         assert_equal(mv_x.format, mv_a.format)
 
+    def _as_dict(self, m):
+        return dict(strides=m.strides, shape=m.shape, itemsize=m.itemsize,
+                    ndim=m.ndim, format=m.format)
+
     def test_datetime_memoryview(self):
         # gh-11656
         # Values verified with v1.13.3, shape is not () as in test_scalar_dim
-        def as_dict(m):
-            return dict(strides=m.strides, shape=m.shape, itemsize=m.itemsize,
-                        ndim=m.ndim, format=m.format)
 
         dt1 = np.datetime64('2016-01-01')
         dt2 = np.datetime64('2017-01-01')
-        expected = {'strides': (1,), 'itemsize': 1, 'ndim': 1,
-                    'shape': (8,), 'format': 'B'}
+        expected = dict(strides=(1,), itemsize=1, ndim=1, shape=(8,),
+                        format='B')
         v = memoryview(dt1)
-        res = as_dict(v)
-        assert_equal(res, expected)
+        assert self._as_dict(v) == expected
 
         v = memoryview(dt2 - dt1)
-        res = as_dict(v)
-        assert_equal(res, expected)
+        assert self._as_dict(v) == expected
 
         dt = np.dtype([('a', 'uint16'), ('b', 'M8[s]')])
         a = np.empty(1, dt)
         # Fails to create a PEP 3118 valid buffer
         assert_raises((ValueError, BufferError), memoryview, a[0])
 
+    @pytest.mark.parametrize('s', [
+        pytest.param("\x32\x32", id="ascii"),
+        pytest.param("\uFE0F\uFE0F", id="basic multilingual"),
+        pytest.param("\U0001f4bb\U0001f4bb", id="non-BMP"),
+    ])
+    def test_str_ucs4(self, s):
+        s = np.str_(s)  # only our subclass implements the buffer protocol
+
+        # all the same, characters always encode as ucs4
+        expected = dict(strides=(), itemsize=8, ndim=0, shape=(), format='2w')
+
+        v = memoryview(s)
+        assert self._as_dict(v) == expected
+
+        # integers of the paltform-appropriate endianness
+        code_points = np.frombuffer(v, dtype='i4')
+
+        assert_equal(code_points, [ord(c) for c in s])
author	Sebastian Berg <sebastian@sipsolutions.net>	2020-02-13 16:17:15 -0800
committer	GitHub <noreply@github.com>	2020-02-13 16:17:15 -0800
commit	1f9ab28a9ee5a0cda956da35d0c77a3a4c4bcd55 (patch)
tree	24e7752be50437d9dcedafcecbebdefb01e53a9f /numpy/core/tests
parent	491f41acd2c6a0e7886662a2448b3d245ae432a9 (diff)
parent	d0b7b6638fe7496d25a488a179d79879748075fa (diff)
download	numpy-1f9ab28a9ee5a0cda956da35d0c77a3a4c4bcd55.tar.gz