diff options
author | Matti Picus <matti.picus@gmail.com> | 2021-01-26 08:49:45 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-01-26 08:49:45 +0200 |
commit | af51d6615c37f43b6d842b72a4b02dc14e024f3e (patch) | |
tree | 76344e5ea87f8e0db649e2fc2ad163ce6f51588b | |
parent | c90cb814567b4f798e884b773fe96e42d8aa63de (diff) | |
parent | 38bda3ce9e6dc075548f378806488ad152c2e46c (diff) | |
download | numpy-af51d6615c37f43b6d842b72a4b02dc14e024f3e.tar.gz |
Merge pull request #18116 from seberg/futurewarn-string-promotion
DEP: Deprecate promotion of numbers and bool to string
-rw-r--r-- | doc/release/upcoming_changes/18116.future.rst | 29 | ||||
-rw-r--r-- | numpy/core/src/multiarray/dtypemeta.c | 13 | ||||
-rw-r--r-- | numpy/core/src/umath/ufunc_type_resolution.c | 55 | ||||
-rw-r--r-- | numpy/core/tests/test_array_coercion.py | 1 | ||||
-rw-r--r-- | numpy/core/tests/test_deprecations.py | 38 | ||||
-rw-r--r-- | numpy/core/tests/test_half.py | 6 | ||||
-rw-r--r-- | numpy/core/tests/test_numeric.py | 69 | ||||
-rw-r--r-- | numpy/core/tests/test_regression.py | 4 | ||||
-rw-r--r-- | numpy/core/tests/test_shape_base.py | 5 | ||||
-rw-r--r-- | numpy/lib/tests/test_regression.py | 5 |
10 files changed, 182 insertions, 43 deletions
diff --git a/doc/release/upcoming_changes/18116.future.rst b/doc/release/upcoming_changes/18116.future.rst new file mode 100644 index 000000000..1341d022f --- /dev/null +++ b/doc/release/upcoming_changes/18116.future.rst @@ -0,0 +1,29 @@ +Promotion of strings with numbers and bools is deprecated +--------------------------------------------------------- +Any promotion of numbers and strings is deprecated and will +give a ``FutureWarning`` the main affected functionalities +are: + +* `numpy.promote_types` and `numpy.result_type` which will raise + an error in this case in the future. +* `numpy.concatenate` will raise an error when concatenating a string + and numeric array. You can use ``dtype="S"`` to explicitly request + a string result. +* `numpy.array` and related functions will start returning ``object`` + arrays because these functions use ``object`` as a fallback when + no common dtype can be found. (In this case setting the + ``FutureWarning`` to be raised will unfortunately lead to the new + behaviour) + +This will mainly affect code such as:: + + np.asarray(['string', 0]) + +and:: + + np.concatenate((['string'], [0])) + +in both cases adding ``dtype="U"`` or ``dtype="S"`` will give the +previous (string) result. + +Comparisons, universal functions, and casting are not affected by this. diff --git a/numpy/core/src/multiarray/dtypemeta.c b/numpy/core/src/multiarray/dtypemeta.c index 2931977c2..b2f36d794 100644 --- a/numpy/core/src/multiarray/dtypemeta.c +++ b/numpy/core/src/multiarray/dtypemeta.c @@ -407,6 +407,19 @@ string_unicode_common_dtype(PyArray_DTypeMeta *cls, PyArray_DTypeMeta *other) Py_INCREF(Py_NotImplemented); return (PyArray_DTypeMeta *)Py_NotImplemented; } + if (other->type_num != NPY_STRING && other->type_num != NPY_UNICODE) { + /* Deprecated 2020-12-19, NumPy 1.21. */ + if (DEPRECATE_FUTUREWARNING( + "Promotion of numbers and bools to strings is deprecated. " + "In the future, code such as `np.concatenate((['string'], [0]))` " + "will raise an error, while `np.asarray(['string', 0])` will " + "return an array with `dtype=object`. To avoid the warning " + "while retaining a string result use `dtype='U'` (or 'S'). " + "To get an array of Python objects use `dtype=object`. " + "(Warning added in NumPy 1.21)") < 0) { + return NULL; + } + } /* * The builtin types are ordered by complexity (aside from object) here. * Arguably, we should not consider numbers and strings "common", but diff --git a/numpy/core/src/umath/ufunc_type_resolution.c b/numpy/core/src/umath/ufunc_type_resolution.c index be48be079..c46346118 100644 --- a/numpy/core/src/umath/ufunc_type_resolution.c +++ b/numpy/core/src/umath/ufunc_type_resolution.c @@ -111,14 +111,18 @@ raise_no_loop_found_error( return -1; } for (i = 0; i < ufunc->nargs; ++i) { - Py_INCREF(dtypes[i]); - PyTuple_SET_ITEM(dtypes_tup, i, (PyObject *)dtypes[i]); + PyObject *tmp = Py_None; + if (dtypes[i] != NULL) { + tmp = (PyObject *)dtypes[i]; + } + Py_INCREF(tmp); + PyTuple_SET_ITEM(dtypes_tup, i, tmp); } /* produce an error object */ exc_value = PyTuple_Pack(2, ufunc, dtypes_tup); Py_DECREF(dtypes_tup); - if (exc_value == NULL){ + if (exc_value == NULL) { return -1; } PyErr_SetObject(exc_type, exc_value); @@ -329,10 +333,23 @@ PyUFunc_SimpleBinaryComparisonTypeResolver(PyUFuncObject *ufunc, } if (type_tup == NULL) { - /* Input types are the result type */ - out_dtypes[0] = PyArray_ResultType(2, operands, 0, NULL); - if (out_dtypes[0] == NULL) { - return -1; + /* + * DEPRECATED NumPy 1.20, 2020-12. + * This check is required to avoid the FutureWarning that + * ResultType will give for number->string promotions. + * (We never supported flexible dtypes here.) + */ + if (!PyArray_ISFLEXIBLE(operands[0]) && + !PyArray_ISFLEXIBLE(operands[1])) { + out_dtypes[0] = PyArray_ResultType(2, operands, 0, NULL); + if (out_dtypes[0] == NULL) { + return -1; + } + } + else { + /* Not doing anything will lead to a loop no found error. */ + out_dtypes[0] = PyArray_DESCR(operands[0]); + Py_INCREF(out_dtypes[0]); } out_dtypes[1] = out_dtypes[0]; Py_INCREF(out_dtypes[1]); @@ -488,6 +505,30 @@ PyUFunc_SimpleUniformOperationTypeResolver( out_dtypes[0] = ensure_dtype_nbo(PyArray_DESCR(operands[0])); } else { + int iop; + npy_bool has_flexible = 0; + npy_bool has_object = 0; + for (iop = 0; iop < ufunc->nin; iop++) { + if (PyArray_ISOBJECT(operands[iop])) { + has_object = 1; + } + if (PyArray_ISFLEXIBLE(operands[iop])) { + has_flexible = 1; + } + } + if (NPY_UNLIKELY(has_flexible && !has_object)) { + /* + * DEPRECATED NumPy 1.20, 2020-12. + * This check is required to avoid the FutureWarning that + * ResultType will give for number->string promotions. + * (We never supported flexible dtypes here.) + */ + for (iop = 0; iop < ufunc->nin; iop++) { + out_dtypes[iop] = PyArray_DESCR(operands[iop]); + Py_INCREF(out_dtypes[iop]); + } + return raise_no_loop_found_error(ufunc, out_dtypes); + } out_dtypes[0] = PyArray_ResultType(ufunc->nin, operands, 0, NULL); } if (out_dtypes[0] == NULL) { diff --git a/numpy/core/tests/test_array_coercion.py b/numpy/core/tests/test_array_coercion.py index 8f709dbe1..45c792ad2 100644 --- a/numpy/core/tests/test_array_coercion.py +++ b/numpy/core/tests/test_array_coercion.py @@ -234,6 +234,7 @@ class TestScalarDiscovery: # Additionally to string this test also runs into a corner case # with datetime promotion (the difference is the promotion order). + @pytest.mark.filterwarnings("ignore:Promotion of numbers:FutureWarning") def test_scalar_promotion(self): for sc1, sc2 in product(scalar_instances(), scalar_instances()): sc1, sc2 = sc1.values[0], sc2.values[0] diff --git a/numpy/core/tests/test_deprecations.py b/numpy/core/tests/test_deprecations.py index 5498e1cf9..53441d9fe 100644 --- a/numpy/core/tests/test_deprecations.py +++ b/numpy/core/tests/test_deprecations.py @@ -1100,3 +1100,41 @@ class TestNoseDecoratorsDeprecated(_DeprecationTestCase): count += 1 assert_(count == 3) self.assert_deprecated(_test_parametrize) + + +class TestStringPromotion(_DeprecationTestCase): + # Deprecated 2020-12-19, NumPy 1.21 + warning_cls = FutureWarning + message = "Promotion of numbers and bools to strings is deprecated." + + @pytest.mark.parametrize("dtype", "?bhilqpBHILQPefdgFDG") + @pytest.mark.parametrize("string_dt", ["S", "U"]) + def test_deprecated(self, dtype, string_dt): + self.assert_deprecated(lambda: np.promote_types(dtype, string_dt)) + + # concatenate has to be able to promote to find the result dtype: + arr1 = np.ones(3, dtype=dtype) + arr2 = np.ones(3, dtype=string_dt) + self.assert_deprecated(lambda: np.concatenate((arr1, arr2), axis=0)) + self.assert_deprecated(lambda: np.concatenate((arr1, arr2), axis=None)) + + # coercing to an array is similar, but will fall-back to `object` + # (when raising the FutureWarning, this already happens) + self.assert_deprecated(lambda: np.array([arr1[0], arr2[0]]), + exceptions=()) + + @pytest.mark.parametrize("dtype", "?bhilqpBHILQPefdgFDG") + @pytest.mark.parametrize("string_dt", ["S", "U"]) + def test_not_deprecated(self, dtype, string_dt): + # The ufunc type resolvers run into this, but giving a futurewarning + # here is unnecessary (it ends up as an error anyway), so test that + # no warning is given: + arr1 = np.ones(3, dtype=dtype) + arr2 = np.ones(3, dtype=string_dt) + + # Adding two arrays uses result_type normally, which would fail: + with pytest.raises(TypeError): + self.assert_not_deprecated(lambda: arr1 + arr2) + # np.equal uses a different type resolver: + with pytest.raises(TypeError): + self.assert_not_deprecated(lambda: np.equal(arr1, arr2)) diff --git a/numpy/core/tests/test_half.py b/numpy/core/tests/test_half.py index 1b6fd21e1..449a01d21 100644 --- a/numpy/core/tests/test_half.py +++ b/numpy/core/tests/test_half.py @@ -71,8 +71,10 @@ class TestHalf: def test_half_conversion_to_string(self, string_dt): # Currently uses S/U32 (which is sufficient for float32) expected_dt = np.dtype(f"{string_dt}32") - assert np.promote_types(np.float16, string_dt) == expected_dt - assert np.promote_types(string_dt, np.float16) == expected_dt + with pytest.warns(FutureWarning): + assert np.promote_types(np.float16, string_dt) == expected_dt + with pytest.warns(FutureWarning): + assert np.promote_types(string_dt, np.float16) == expected_dt arr = np.ones(3, dtype=np.float16).astype(string_dt) assert arr.dtype == expected_dt diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py index 280874d21..f8b388b6f 100644 --- a/numpy/core/tests/test_numeric.py +++ b/numpy/core/tests/test_numeric.py @@ -847,10 +847,12 @@ class TestTypes: assert_equal(np.promote_types('<i8', '<i8'), np.dtype('i8')) assert_equal(np.promote_types('>i8', '>i8'), np.dtype('i8')) - assert_equal(np.promote_types('>i8', '>U16'), np.dtype('U21')) - assert_equal(np.promote_types('<i8', '<U16'), np.dtype('U21')) - assert_equal(np.promote_types('>U16', '>i8'), np.dtype('U21')) - assert_equal(np.promote_types('<U16', '<i8'), np.dtype('U21')) + with pytest.warns(FutureWarning, + match="Promotion of numbers and bools to strings"): + assert_equal(np.promote_types('>i8', '>U16'), np.dtype('U21')) + assert_equal(np.promote_types('<i8', '<U16'), np.dtype('U21')) + assert_equal(np.promote_types('>U16', '>i8'), np.dtype('U21')) + assert_equal(np.promote_types('<U16', '<i8'), np.dtype('U21')) assert_equal(np.promote_types('<S5', '<U8'), np.dtype('U8')) assert_equal(np.promote_types('>S5', '>U8'), np.dtype('U8')) @@ -897,32 +899,38 @@ class TestTypes: promote_types = np.promote_types S = string_dtype - # Promote numeric with unsized string: - assert_equal(promote_types('bool', S), np.dtype(S+'5')) - assert_equal(promote_types('b', S), np.dtype(S+'4')) - assert_equal(promote_types('u1', S), np.dtype(S+'3')) - assert_equal(promote_types('u2', S), np.dtype(S+'5')) - assert_equal(promote_types('u4', S), np.dtype(S+'10')) - assert_equal(promote_types('u8', S), np.dtype(S+'20')) - assert_equal(promote_types('i1', S), np.dtype(S+'4')) - assert_equal(promote_types('i2', S), np.dtype(S+'6')) - assert_equal(promote_types('i4', S), np.dtype(S+'11')) - assert_equal(promote_types('i8', S), np.dtype(S+'21')) - # Promote numeric with sized string: - assert_equal(promote_types('bool', S+'1'), np.dtype(S+'5')) - assert_equal(promote_types('bool', S+'30'), np.dtype(S+'30')) - assert_equal(promote_types('b', S+'1'), np.dtype(S+'4')) - assert_equal(promote_types('b', S+'30'), np.dtype(S+'30')) - assert_equal(promote_types('u1', S+'1'), np.dtype(S+'3')) - assert_equal(promote_types('u1', S+'30'), np.dtype(S+'30')) - assert_equal(promote_types('u2', S+'1'), np.dtype(S+'5')) - assert_equal(promote_types('u2', S+'30'), np.dtype(S+'30')) - assert_equal(promote_types('u4', S+'1'), np.dtype(S+'10')) - assert_equal(promote_types('u4', S+'30'), np.dtype(S+'30')) - assert_equal(promote_types('u8', S+'1'), np.dtype(S+'20')) - assert_equal(promote_types('u8', S+'30'), np.dtype(S+'30')) - # Promote with object: - assert_equal(promote_types('O', S+'30'), np.dtype('O')) + + with pytest.warns(FutureWarning, + match="Promotion of numbers and bools to strings") as record: + # Promote numeric with unsized string: + assert_equal(promote_types('bool', S), np.dtype(S+'5')) + assert_equal(promote_types('b', S), np.dtype(S+'4')) + assert_equal(promote_types('u1', S), np.dtype(S+'3')) + assert_equal(promote_types('u2', S), np.dtype(S+'5')) + assert_equal(promote_types('u4', S), np.dtype(S+'10')) + assert_equal(promote_types('u8', S), np.dtype(S+'20')) + assert_equal(promote_types('i1', S), np.dtype(S+'4')) + assert_equal(promote_types('i2', S), np.dtype(S+'6')) + assert_equal(promote_types('i4', S), np.dtype(S+'11')) + assert_equal(promote_types('i8', S), np.dtype(S+'21')) + # Promote numeric with sized string: + assert_equal(promote_types('bool', S+'1'), np.dtype(S+'5')) + assert_equal(promote_types('bool', S+'30'), np.dtype(S+'30')) + assert_equal(promote_types('b', S+'1'), np.dtype(S+'4')) + assert_equal(promote_types('b', S+'30'), np.dtype(S+'30')) + assert_equal(promote_types('u1', S+'1'), np.dtype(S+'3')) + assert_equal(promote_types('u1', S+'30'), np.dtype(S+'30')) + assert_equal(promote_types('u2', S+'1'), np.dtype(S+'5')) + assert_equal(promote_types('u2', S+'30'), np.dtype(S+'30')) + assert_equal(promote_types('u4', S+'1'), np.dtype(S+'10')) + assert_equal(promote_types('u4', S+'30'), np.dtype(S+'30')) + assert_equal(promote_types('u8', S+'1'), np.dtype(S+'20')) + assert_equal(promote_types('u8', S+'30'), np.dtype(S+'30')) + # Promote with object: + assert_equal(promote_types('O', S+'30'), np.dtype('O')) + + assert len(record) == 22 # each string promotion gave one warning + @pytest.mark.parametrize(["dtype1", "dtype2"], [[np.dtype("V6"), np.dtype("V10")], @@ -972,6 +980,7 @@ class TestTypes: assert res.isnative @pytest.mark.slow + @pytest.mark.filterwarnings('ignore:Promotion of numbers:FutureWarning') @pytest.mark.parametrize(["dtype1", "dtype2"], itertools.product( list(np.typecodes["All"]) + diff --git a/numpy/core/tests/test_regression.py b/numpy/core/tests/test_regression.py index 831e48e8b..5faa9923c 100644 --- a/numpy/core/tests/test_regression.py +++ b/numpy/core/tests/test_regression.py @@ -782,7 +782,9 @@ class TestRegression: # Ticket #514 s = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" t = [] - np.hstack((t, s)) + with pytest.warns(FutureWarning, + match="Promotion of numbers and bools to strings"): + np.hstack((t, s)) def test_arr_transpose(self): # Ticket #516 diff --git a/numpy/core/tests/test_shape_base.py b/numpy/core/tests/test_shape_base.py index 9922c9173..a0c72f9d0 100644 --- a/numpy/core/tests/test_shape_base.py +++ b/numpy/core/tests/test_shape_base.py @@ -256,7 +256,7 @@ class TestConcatenate: r = np.concatenate((a, b), axis=None) assert_equal(r.size, a.size + len(b)) assert_equal(r.dtype, a.dtype) - r = np.concatenate((a, b, c), axis=None) + r = np.concatenate((a, b, c), axis=None, dtype="U") d = array(['0.0', '1.0', '2.0', '3.0', '0', '1', '2', 'x']) assert_array_equal(r, d) @@ -377,7 +377,8 @@ class TestConcatenate: # Note that U0 and S0 should be deprecated eventually and changed to # actually give the empty string result (together with `np.array`) res = np.concatenate(arrs, axis=axis, dtype=string_dt, casting="unsafe") - assert res.dtype == np.promote_types("d", string_dt) + # The actual dtype should be identical to a cast (of a double array): + assert res.dtype == np.array(1.).astype(string_dt).dtype @pytest.mark.parametrize("axis", [None, 0]) def test_string_dtype_does_not_inspect(self, axis): diff --git a/numpy/lib/tests/test_regression.py b/numpy/lib/tests/test_regression.py index 55df2a675..94fac7ef0 100644 --- a/numpy/lib/tests/test_regression.py +++ b/numpy/lib/tests/test_regression.py @@ -1,3 +1,5 @@ +import pytest + import os import numpy as np @@ -62,7 +64,8 @@ class TestRegression: def test_mem_string_concat(self): # Ticket #469 x = np.array([]) - np.append(x, 'asdasd\tasdasd') + with pytest.warns(FutureWarning): + np.append(x, 'asdasd\tasdasd') def test_poly_div(self): # Ticket #553 |