Merge pull request #18116 from seberg/futurewarn-string-promotion

DEP: Deprecate promotion of numbers and bool to string
author: Matti Picus <matti.picus@gmail.com> 2021-01-26 08:49:45 +0200
committer: GitHub <noreply@github.com> 2021-01-26 08:49:45 +0200
commit: af51d6615c37f43b6d842b72a4b02dc14e024f3e (patch)
tree: 76344e5ea87f8e0db649e2fc2ad163ce6f51588b
parent: c90cb814567b4f798e884b773fe96e42d8aa63de (diff)
parent: 38bda3ce9e6dc075548f378806488ad152c2e46c (diff)
download: numpy-af51d6615c37f43b6d842b72a4b02dc14e024f3e.tar.gz
10 files changed, 182 insertions, 43 deletions
diff --git a/doc/release/upcoming_changes/18116.future.rst b/doc/release/upcoming_changes/18116.future.rst
new file mode 100644
index 000000000..1341d022f
--- /dev/null
+++ b/doc/release/upcoming_changes/18116.future.rst
@@ -0,0 +1,29 @@
+Promotion of strings with numbers and bools is deprecated
+---------------------------------------------------------
+Any promotion of numbers and strings is deprecated and will
+give a ``FutureWarning`` the main affected functionalities
+are:
+
+* `numpy.promote_types` and `numpy.result_type` which will raise
+  an error in this case in the future.
+* `numpy.concatenate` will raise an error when concatenating a string
+  and numeric array. You can use ``dtype="S"`` to explicitly request
+  a string result.
+* `numpy.array` and related functions will start returning ``object``
+  arrays because these functions use ``object`` as a fallback when
+  no common dtype can be found. (In this case setting the
+  ``FutureWarning`` to be raised will unfortunately lead to the new
+  behaviour)
+
+This will mainly affect code such as::
+
+    np.asarray(['string', 0])
+
+and::
+
+    np.concatenate((['string'], [0]))
+
+in both cases adding ``dtype="U"`` or ``dtype="S"`` will give the
+previous (string) result.
+
+Comparisons, universal functions, and casting are not affected by this.
diff --git a/numpy/core/src/multiarray/dtypemeta.c b/numpy/core/src/multiarray/dtypemeta.c
index 2931977c2..b2f36d794 100644
--- a/numpy/core/src/multiarray/dtypemeta.c
+++ b/numpy/core/src/multiarray/dtypemeta.c
@@ -407,6 +407,19 @@ string_unicode_common_dtype(PyArray_DTypeMeta *cls, PyArray_DTypeMeta *other)
         Py_INCREF(Py_NotImplemented);
         return (PyArray_DTypeMeta *)Py_NotImplemented;
     }
+    if (other->type_num != NPY_STRING && other->type_num != NPY_UNICODE) {
+        /* Deprecated 2020-12-19, NumPy 1.21. */
+        if (DEPRECATE_FUTUREWARNING(
+                "Promotion of numbers and bools to strings is deprecated. "
+                "In the future, code such as `np.concatenate((['string'], [0]))` "
+                "will raise an error, while `np.asarray(['string', 0])` will "
+                "return an array with `dtype=object`.  To avoid the warning "
+                "while retaining a string result use `dtype='U'` (or 'S').  "
+                "To get an array of Python objects use `dtype=object`. "
+                "(Warning added in NumPy 1.21)") < 0) {
+            return NULL;
+        }
+    }
     /*
      * The builtin types are ordered by complexity (aside from object) here.
      * Arguably, we should not consider numbers and strings "common", but
diff --git a/numpy/core/src/umath/ufunc_type_resolution.c b/numpy/core/src/umath/ufunc_type_resolution.c
index be48be079..c46346118 100644
--- a/numpy/core/src/umath/ufunc_type_resolution.c
+++ b/numpy/core/src/umath/ufunc_type_resolution.c
@@ -111,14 +111,18 @@ raise_no_loop_found_error(
         return -1;
     }
     for (i = 0; i < ufunc->nargs; ++i) {
-        Py_INCREF(dtypes[i]);
-        PyTuple_SET_ITEM(dtypes_tup, i, (PyObject *)dtypes[i]);
+        PyObject *tmp = Py_None;
+        if (dtypes[i] != NULL) {
+            tmp = (PyObject *)dtypes[i];
+        }
+        Py_INCREF(tmp);
+        PyTuple_SET_ITEM(dtypes_tup, i, tmp);
     }
 
     /* produce an error object */
     exc_value = PyTuple_Pack(2, ufunc, dtypes_tup);
     Py_DECREF(dtypes_tup);
-    if (exc_value == NULL){
+    if (exc_value == NULL) {
         return -1;
     }
     PyErr_SetObject(exc_type, exc_value);
@@ -329,10 +333,23 @@ PyUFunc_SimpleBinaryComparisonTypeResolver(PyUFuncObject *ufunc,
     }
 
     if (type_tup == NULL) {
-        /* Input types are the result type */
-        out_dtypes[0] = PyArray_ResultType(2, operands, 0, NULL);
-        if (out_dtypes[0] == NULL) {
-            return -1;
+        /*
+         * DEPRECATED NumPy 1.20, 2020-12.
+         * This check is required to avoid the FutureWarning that
+         * ResultType will give for number->string promotions.
+         * (We never supported flexible dtypes here.)
+         */
+        if (!PyArray_ISFLEXIBLE(operands[0]) &&
+                !PyArray_ISFLEXIBLE(operands[1])) {
+            out_dtypes[0] = PyArray_ResultType(2, operands, 0, NULL);
+            if (out_dtypes[0] == NULL) {
+                return -1;
+            }
+        }
+        else {
+            /* Not doing anything will lead to a loop no found error. */
+            out_dtypes[0] = PyArray_DESCR(operands[0]);
+            Py_INCREF(out_dtypes[0]);
         }
         out_dtypes[1] = out_dtypes[0];
         Py_INCREF(out_dtypes[1]);
@@ -488,6 +505,30 @@ PyUFunc_SimpleUniformOperationTypeResolver(
             out_dtypes[0] = ensure_dtype_nbo(PyArray_DESCR(operands[0]));
         }
         else {
+            int iop;
+            npy_bool has_flexible = 0;
+            npy_bool has_object = 0;
+            for (iop = 0; iop < ufunc->nin; iop++) {
+                if (PyArray_ISOBJECT(operands[iop])) {
+                    has_object = 1;
+                }
+                if (PyArray_ISFLEXIBLE(operands[iop])) {
+                    has_flexible = 1;
+                }
+            }
+            if (NPY_UNLIKELY(has_flexible && !has_object)) {
+                /*
+                 * DEPRECATED NumPy 1.20, 2020-12.
+                 * This check is required to avoid the FutureWarning that
+                 * ResultType will give for number->string promotions.
+                 * (We never supported flexible dtypes here.)
+                 */
+                for (iop = 0; iop < ufunc->nin; iop++) {
+                    out_dtypes[iop] = PyArray_DESCR(operands[iop]);
+                    Py_INCREF(out_dtypes[iop]);
+                }
+                return raise_no_loop_found_error(ufunc, out_dtypes);
+            }
             out_dtypes[0] = PyArray_ResultType(ufunc->nin, operands, 0, NULL);
         }
         if (out_dtypes[0] == NULL) {
diff --git a/numpy/core/tests/test_array_coercion.py b/numpy/core/tests/test_array_coercion.py
index 8f709dbe1..45c792ad2 100644
--- a/numpy/core/tests/test_array_coercion.py
+++ b/numpy/core/tests/test_array_coercion.py
@@ -234,6 +234,7 @@ class TestScalarDiscovery:
 
     # Additionally to string this test also runs into a corner case
     # with datetime promotion (the difference is the promotion order).
+    @pytest.mark.filterwarnings("ignore:Promotion of numbers:FutureWarning")
     def test_scalar_promotion(self):
         for sc1, sc2 in product(scalar_instances(), scalar_instances()):
             sc1, sc2 = sc1.values[0], sc2.values[0]
diff --git a/numpy/core/tests/test_deprecations.py b/numpy/core/tests/test_deprecations.py
index 5498e1cf9..53441d9fe 100644
--- a/numpy/core/tests/test_deprecations.py
+++ b/numpy/core/tests/test_deprecations.py
@@ -1100,3 +1100,41 @@ class TestNoseDecoratorsDeprecated(_DeprecationTestCase):
                 count += 1
             assert_(count == 3)
         self.assert_deprecated(_test_parametrize)
+
+
+class TestStringPromotion(_DeprecationTestCase):
+    # Deprecated 2020-12-19, NumPy 1.21
+    warning_cls = FutureWarning
+    message = "Promotion of numbers and bools to strings is deprecated."
+
+    @pytest.mark.parametrize("dtype", "?bhilqpBHILQPefdgFDG")
+    @pytest.mark.parametrize("string_dt", ["S", "U"])
+    def test_deprecated(self, dtype, string_dt):
+        self.assert_deprecated(lambda: np.promote_types(dtype, string_dt))
+
+        # concatenate has to be able to promote to find the result dtype:
+        arr1 = np.ones(3, dtype=dtype)
+        arr2 = np.ones(3, dtype=string_dt)
+        self.assert_deprecated(lambda: np.concatenate((arr1, arr2), axis=0))
+        self.assert_deprecated(lambda: np.concatenate((arr1, arr2), axis=None))
+
+        # coercing to an array is similar, but will fall-back to `object`
+        # (when raising the FutureWarning, this already happens)
+        self.assert_deprecated(lambda: np.array([arr1[0], arr2[0]]),
+                               exceptions=())
+
+    @pytest.mark.parametrize("dtype", "?bhilqpBHILQPefdgFDG")
+    @pytest.mark.parametrize("string_dt", ["S", "U"])
+    def test_not_deprecated(self, dtype, string_dt):
+        # The ufunc type resolvers run into this, but giving a futurewarning
+        # here is unnecessary (it ends up as an error anyway), so test that
+        # no warning is given:
+        arr1 = np.ones(3, dtype=dtype)
+        arr2 = np.ones(3, dtype=string_dt)
+
+        # Adding two arrays uses result_type normally, which would fail:
+        with pytest.raises(TypeError):
+            self.assert_not_deprecated(lambda: arr1 + arr2)
+        # np.equal uses a different type resolver:
+        with pytest.raises(TypeError):
+            self.assert_not_deprecated(lambda: np.equal(arr1, arr2))
diff --git a/numpy/core/tests/test_half.py b/numpy/core/tests/test_half.py
index 1b6fd21e1..449a01d21 100644
--- a/numpy/core/tests/test_half.py
+++ b/numpy/core/tests/test_half.py
@@ -71,8 +71,10 @@ class TestHalf:
     def test_half_conversion_to_string(self, string_dt):
         # Currently uses S/U32 (which is sufficient for float32)
         expected_dt = np.dtype(f"{string_dt}32")
-        assert np.promote_types(np.float16, string_dt) == expected_dt
-        assert np.promote_types(string_dt, np.float16) == expected_dt
+        with pytest.warns(FutureWarning):
+            assert np.promote_types(np.float16, string_dt) == expected_dt
+        with pytest.warns(FutureWarning):
+            assert np.promote_types(string_dt, np.float16) == expected_dt
 
         arr = np.ones(3, dtype=np.float16).astype(string_dt)
         assert arr.dtype == expected_dt
diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py
index 280874d21..f8b388b6f 100644
--- a/numpy/core/tests/test_numeric.py
+++ b/numpy/core/tests/test_numeric.py
@@ -847,10 +847,12 @@ class TestTypes:
         assert_equal(np.promote_types('<i8', '<i8'), np.dtype('i8'))
         assert_equal(np.promote_types('>i8', '>i8'), np.dtype('i8'))
 
-        assert_equal(np.promote_types('>i8', '>U16'), np.dtype('U21'))
-        assert_equal(np.promote_types('<i8', '<U16'), np.dtype('U21'))
-        assert_equal(np.promote_types('>U16', '>i8'), np.dtype('U21'))
-        assert_equal(np.promote_types('<U16', '<i8'), np.dtype('U21'))
+        with pytest.warns(FutureWarning,
+                match="Promotion of numbers and bools to strings"):
+            assert_equal(np.promote_types('>i8', '>U16'), np.dtype('U21'))
+            assert_equal(np.promote_types('<i8', '<U16'), np.dtype('U21'))
+            assert_equal(np.promote_types('>U16', '>i8'), np.dtype('U21'))
+            assert_equal(np.promote_types('<U16', '<i8'), np.dtype('U21'))
 
         assert_equal(np.promote_types('<S5', '<U8'), np.dtype('U8'))
         assert_equal(np.promote_types('>S5', '>U8'), np.dtype('U8'))
@@ -897,32 +899,38 @@ class TestTypes:
             promote_types = np.promote_types
 
         S = string_dtype
-        # Promote numeric with unsized string:
-        assert_equal(promote_types('bool', S), np.dtype(S+'5'))
-        assert_equal(promote_types('b', S), np.dtype(S+'4'))
-        assert_equal(promote_types('u1', S), np.dtype(S+'3'))
-        assert_equal(promote_types('u2', S), np.dtype(S+'5'))
-        assert_equal(promote_types('u4', S), np.dtype(S+'10'))
-        assert_equal(promote_types('u8', S), np.dtype(S+'20'))
-        assert_equal(promote_types('i1', S), np.dtype(S+'4'))
-        assert_equal(promote_types('i2', S), np.dtype(S+'6'))
-        assert_equal(promote_types('i4', S), np.dtype(S+'11'))
-        assert_equal(promote_types('i8', S), np.dtype(S+'21'))
-        # Promote numeric with sized string:
-        assert_equal(promote_types('bool', S+'1'), np.dtype(S+'5'))
-        assert_equal(promote_types('bool', S+'30'), np.dtype(S+'30'))
-        assert_equal(promote_types('b', S+'1'), np.dtype(S+'4'))
-        assert_equal(promote_types('b', S+'30'), np.dtype(S+'30'))
-        assert_equal(promote_types('u1', S+'1'), np.dtype(S+'3'))
-        assert_equal(promote_types('u1', S+'30'), np.dtype(S+'30'))
-        assert_equal(promote_types('u2', S+'1'), np.dtype(S+'5'))
-        assert_equal(promote_types('u2', S+'30'), np.dtype(S+'30'))
-        assert_equal(promote_types('u4', S+'1'), np.dtype(S+'10'))
-        assert_equal(promote_types('u4', S+'30'), np.dtype(S+'30'))
-        assert_equal(promote_types('u8', S+'1'), np.dtype(S+'20'))
-        assert_equal(promote_types('u8', S+'30'), np.dtype(S+'30'))
-        # Promote with object:
-        assert_equal(promote_types('O', S+'30'), np.dtype('O'))
+        
+        with pytest.warns(FutureWarning,
+                match="Promotion of numbers and bools to strings") as record:
+            # Promote numeric with unsized string:
+            assert_equal(promote_types('bool', S), np.dtype(S+'5'))
+            assert_equal(promote_types('b', S), np.dtype(S+'4'))
+            assert_equal(promote_types('u1', S), np.dtype(S+'3'))
+            assert_equal(promote_types('u2', S), np.dtype(S+'5'))
+            assert_equal(promote_types('u4', S), np.dtype(S+'10'))
+            assert_equal(promote_types('u8', S), np.dtype(S+'20'))
+            assert_equal(promote_types('i1', S), np.dtype(S+'4'))
+            assert_equal(promote_types('i2', S), np.dtype(S+'6'))
+            assert_equal(promote_types('i4', S), np.dtype(S+'11'))
+            assert_equal(promote_types('i8', S), np.dtype(S+'21'))
+            # Promote numeric with sized string:
+            assert_equal(promote_types('bool', S+'1'), np.dtype(S+'5'))
+            assert_equal(promote_types('bool', S+'30'), np.dtype(S+'30'))
+            assert_equal(promote_types('b', S+'1'), np.dtype(S+'4'))
+            assert_equal(promote_types('b', S+'30'), np.dtype(S+'30'))
+            assert_equal(promote_types('u1', S+'1'), np.dtype(S+'3'))
+            assert_equal(promote_types('u1', S+'30'), np.dtype(S+'30'))
+            assert_equal(promote_types('u2', S+'1'), np.dtype(S+'5'))
+            assert_equal(promote_types('u2', S+'30'), np.dtype(S+'30'))
+            assert_equal(promote_types('u4', S+'1'), np.dtype(S+'10'))
+            assert_equal(promote_types('u4', S+'30'), np.dtype(S+'30'))
+            assert_equal(promote_types('u8', S+'1'), np.dtype(S+'20'))
+            assert_equal(promote_types('u8', S+'30'), np.dtype(S+'30'))
+            # Promote with object:
+            assert_equal(promote_types('O', S+'30'), np.dtype('O'))
+
+        assert len(record) == 22  # each string promotion gave one warning
+
 
     @pytest.mark.parametrize(["dtype1", "dtype2"],
             [[np.dtype("V6"), np.dtype("V10")],
@@ -972,6 +980,7 @@ class TestTypes:
             assert res.isnative
 
     @pytest.mark.slow
+    @pytest.mark.filterwarnings('ignore:Promotion of numbers:FutureWarning')
     @pytest.mark.parametrize(["dtype1", "dtype2"],
             itertools.product(
                 list(np.typecodes["All"]) +
diff --git a/numpy/core/tests/test_regression.py b/numpy/core/tests/test_regression.py
index 831e48e8b..5faa9923c 100644
--- a/numpy/core/tests/test_regression.py
+++ b/numpy/core/tests/test_regression.py
@@ -782,7 +782,9 @@ class TestRegression:
         # Ticket #514
         s = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
         t = []
-        np.hstack((t, s))
+        with pytest.warns(FutureWarning,
+                match="Promotion of numbers and bools to strings"):
+            np.hstack((t, s))
 
     def test_arr_transpose(self):
         # Ticket #516
diff --git a/numpy/core/tests/test_shape_base.py b/numpy/core/tests/test_shape_base.py
index 9922c9173..a0c72f9d0 100644
--- a/numpy/core/tests/test_shape_base.py
+++ b/numpy/core/tests/test_shape_base.py
@@ -256,7 +256,7 @@ class TestConcatenate:
         r = np.concatenate((a, b), axis=None)
         assert_equal(r.size, a.size + len(b))
         assert_equal(r.dtype, a.dtype)
-        r = np.concatenate((a, b, c), axis=None)
+        r = np.concatenate((a, b, c), axis=None, dtype="U")
         d = array(['0.0', '1.0', '2.0', '3.0',
                    '0', '1', '2', 'x'])
         assert_array_equal(r, d)
@@ -377,7 +377,8 @@ class TestConcatenate:
         # Note that U0 and S0 should be deprecated eventually and changed to
         # actually give the empty string result (together with `np.array`)
         res = np.concatenate(arrs, axis=axis, dtype=string_dt, casting="unsafe")
-        assert res.dtype == np.promote_types("d", string_dt)
+        # The actual dtype should be identical to a cast (of a double array):
+        assert res.dtype == np.array(1.).astype(string_dt).dtype
 
     @pytest.mark.parametrize("axis", [None, 0])
     def test_string_dtype_does_not_inspect(self, axis):
diff --git a/numpy/lib/tests/test_regression.py b/numpy/lib/tests/test_regression.py
index 55df2a675..94fac7ef0 100644
--- a/numpy/lib/tests/test_regression.py
+++ b/numpy/lib/tests/test_regression.py
@@ -1,3 +1,5 @@
+import pytest
+
 import os
 
 import numpy as np
@@ -62,7 +64,8 @@ class TestRegression:
     def test_mem_string_concat(self):
         # Ticket #469
         x = np.array([])
-        np.append(x, 'asdasd\tasdasd')
+        with pytest.warns(FutureWarning):
+            np.append(x, 'asdasd\tasdasd')
 
     def test_poly_div(self):
         # Ticket #553
author	Matti Picus <matti.picus@gmail.com>	2021-01-26 08:49:45 +0200
committer	GitHub <noreply@github.com>	2021-01-26 08:49:45 +0200
commit	af51d6615c37f43b6d842b72a4b02dc14e024f3e (patch)
tree	76344e5ea87f8e0db649e2fc2ad163ce6f51588b
parent	c90cb814567b4f798e884b773fe96e42d8aa63de (diff)
parent	38bda3ce9e6dc075548f378806488ad152c2e46c (diff)
download	numpy-af51d6615c37f43b6d842b72a4b02dc14e024f3e.tar.gz