diff options
author | Matti Picus <matti.picus@gmail.com> | 2020-10-07 21:53:15 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-10-07 21:53:15 +0300 |
commit | 382758355998951cea2b9f6ad1fb83e7dc4c3a02 (patch) | |
tree | 938956ddaf3f844f38841e1d633f7b2773fedc44 | |
parent | 18af0e10878fe49a893fd576317dabd424c7ca16 (diff) | |
parent | e31ae7ff70c72dec2cf7ba3bd817982ad1b68713 (diff) | |
download | numpy-382758355998951cea2b9f6ad1fb83e7dc4c3a02.tar.gz |
Merge pull request #17419 from seberg/deprecate-coercion-to-subarray-dtype
DEP: Deprecate coercion to subarray dtypes
-rw-r--r-- | doc/release/upcoming_changes/17419.deprecation.rst | 24 | ||||
-rw-r--r-- | numpy/core/src/multiarray/ctors.c | 202 | ||||
-rw-r--r-- | numpy/core/src/multiarray/methods.c | 14 | ||||
-rw-r--r-- | numpy/core/tests/test_deprecations.py | 41 |
4 files changed, 281 insertions, 0 deletions
diff --git a/doc/release/upcoming_changes/17419.deprecation.rst b/doc/release/upcoming_changes/17419.deprecation.rst new file mode 100644 index 000000000..fcab3a8ad --- /dev/null +++ b/doc/release/upcoming_changes/17419.deprecation.rst @@ -0,0 +1,24 @@ +Arrays cannot be using subarray dtypes +-------------------------------------- +Array creation and casting using ``np.array(obj, dtype)`` +and ``arr.astype(dtype)`` will not support ``dtype`` +to be a subarray dtype such as ``np.dtype("(2)i,")``. + +For such a ``dtype`` the following behaviour occurs currently:: + + res = np.array(obj, dtype) + + res.dtype is not dtype + res.dtype is dtype.base + res.shape[-dtype.ndim:] == dtype.shape + +The shape of the dtype is included into the array. +This leads to inconsistencies when ``obj`` is: + +* a scalar, such as ``np.array(1, dtype="(2)i")`` +* an array, such as ``np.array(np.array([1]), dtype="(2)i")`` + +In most cases the work-around is to pass the output dtype directly +and possibly check ``res.shape[-dtype.ndim:] == dtype.shape``. +If this is insufficient, please open an issue on the NumPy issue +tracker. diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index b09ec9f8e..05e45fbf5 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -1368,6 +1368,160 @@ PyArray_GetArrayParamsFromObject(PyObject *NPY_UNUSED(op), } +/* + * This function is a legacy implementation to retain subarray dtype + * behaviour in array coercion. The behaviour here makes sense if tuples + * of matching dimensionality are being coerced. Due to the difficulty + * that the result is ill-defined for lists of array-likes, this is deprecated. + * + * WARNING: Do not use this function, it exists purely to support a deprecated + * code path. + */ +static int +setArrayFromSequence(PyArrayObject *a, PyObject *s, + int dim, PyArrayObject * dst) +{ + Py_ssize_t i, slen; + int res = -1; + + /* first recursion, view equal destination */ + if (dst == NULL) + dst = a; + + /* + * This code is to ensure that the sequence access below will + * return a lower-dimensional sequence. + */ + + /* INCREF on entry DECREF on exit */ + Py_INCREF(s); + + PyObject *seq = NULL; + + if (PyArray_Check(s)) { + if (!(PyArray_CheckExact(s))) { + /* + * make sure a base-class array is used so that the dimensionality + * reduction assumption is correct. + */ + /* This will DECREF(s) if replaced */ + s = PyArray_EnsureArray(s); + if (s == NULL) { + goto fail; + } + } + + /* dst points to correct array subsection */ + if (PyArray_CopyInto(dst, (PyArrayObject *)s) < 0) { + goto fail; + } + + Py_DECREF(s); + return 0; + } + + if (dim > PyArray_NDIM(a)) { + PyErr_Format(PyExc_ValueError, + "setArrayFromSequence: sequence/array dimensions mismatch."); + goto fail; + } + + /* Try __array__ before using s as a sequence */ + PyObject *tmp = _array_from_array_like(s, NULL, 0, NULL); + if (tmp == NULL) { + goto fail; + } + else if (tmp == Py_NotImplemented) { + Py_DECREF(tmp); + } + else { + int r = PyArray_CopyInto(dst, (PyArrayObject *)tmp); + Py_DECREF(tmp); + if (r < 0) { + goto fail; + } + Py_DECREF(s); + return 0; + } + + seq = PySequence_Fast(s, "Could not convert object to sequence"); + if (seq == NULL) { + goto fail; + } + slen = PySequence_Fast_GET_SIZE(seq); + + /* + * Either the dimensions match, or the sequence has length 1 and can + * be broadcast to the destination. + */ + if (slen != PyArray_DIMS(a)[dim] && slen != 1) { + PyErr_Format(PyExc_ValueError, + "cannot copy sequence with size %zd to array axis " + "with dimension %" NPY_INTP_FMT, slen, PyArray_DIMS(a)[dim]); + goto fail; + } + + /* Broadcast the one element from the sequence to all the outputs */ + if (slen == 1) { + PyObject *o = PySequence_Fast_GET_ITEM(seq, 0); + npy_intp alen = PyArray_DIM(a, dim); + + for (i = 0; i < alen; i++) { + if ((PyArray_NDIM(a) - dim) > 1) { + PyArrayObject * tmp = + (PyArrayObject *)array_item_asarray(dst, i); + if (tmp == NULL) { + goto fail; + } + + res = setArrayFromSequence(a, o, dim+1, tmp); + Py_DECREF(tmp); + } + else { + char * b = (PyArray_BYTES(dst) + i * PyArray_STRIDES(dst)[0]); + res = PyArray_SETITEM(dst, b, o); + } + if (res < 0) { + goto fail; + } + } + } + /* Copy element by element */ + else { + for (i = 0; i < slen; i++) { + PyObject * o = PySequence_Fast_GET_ITEM(seq, i); + if ((PyArray_NDIM(a) - dim) > 1) { + PyArrayObject * tmp = + (PyArrayObject *)array_item_asarray(dst, i); + if (tmp == NULL) { + goto fail; + } + + res = setArrayFromSequence(a, o, dim+1, tmp); + Py_DECREF(tmp); + } + else { + char * b = (PyArray_BYTES(dst) + i * PyArray_STRIDES(dst)[0]); + res = PyArray_SETITEM(dst, b, o); + } + if (res < 0) { + goto fail; + } + } + } + + Py_DECREF(seq); + Py_DECREF(s); + return 0; + + fail: + Py_XDECREF(seq); + Py_DECREF(s); + return res; +} + + + /*NUMPY_API * Does not check for NPY_ARRAY_ENSURECOPY and NPY_ARRAY_NOTSWAPPED in flags * Steals a reference to newtype --- which can be NULL @@ -1408,6 +1562,54 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth, if (ndim < 0) { return NULL; } + + if (NPY_UNLIKELY(fixed_descriptor != NULL && PyDataType_HASSUBARRAY(dtype))) { + /* + * When a subarray dtype was passed in, its dimensions are absorbed + * into the array dimension (causing a dimension mismatch). + * We can't reasonably handle this because of inconsistencies in + * how it was handled (depending on nested list vs. embed array-likes). + * So we give a deprecation warning and fall back to legacy code. + */ + ret = (PyArrayObject *)PyArray_NewFromDescr( + &PyArray_Type, dtype, ndim, dims, NULL, NULL, + flags&NPY_ARRAY_F_CONTIGUOUS, NULL); + if (ret == NULL) { + npy_free_coercion_cache(cache); + return NULL; + } + assert(PyArray_NDIM(ret) != ndim); + + if (cache == NULL) { + /* This is a single item. Sets only first subarray element. */ + assert(ndim == 0); + if (PyArray_Pack(PyArray_DESCR(ret), PyArray_DATA(ret), op) < 0) { + Py_DECREF(ret); + return NULL; + } + } + else { + npy_free_coercion_cache(cache); + if (setArrayFromSequence(ret, op, 0, NULL) < 0) { + Py_DECREF(ret); + return NULL; + } + } + /* NumPy 1.20, 2020-10-01 */ + if (DEPRECATE( + "using a dtype with a subarray field is deprecated. " + "This can lead to inconsistent behaviour due to the resulting " + "dtype being different from the input dtype. " + "You may try to use `dtype=dtype.base`, which should give the " + "same result for most inputs, but does not guarantee the " + "output dimensions to match the subarray ones. " + "(Deprecated NumPy 1.20)")) { + Py_DECREF(ret); + return NULL; + } + return (PyObject *)ret; + } + if (dtype == NULL) { dtype = PyArray_DescrFromType(NPY_DEFAULT_TYPE); } diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c index f7cb2185b..e4421b41b 100644 --- a/numpy/core/src/multiarray/methods.c +++ b/numpy/core/src/multiarray/methods.c @@ -844,6 +844,20 @@ array_astype(PyArrayObject *self, PyObject *args, PyObject *kwds) if (ret == NULL) { return NULL; } + /* NumPy 1.20, 2020-10-01 */ + if ((PyArray_NDIM(self) != PyArray_NDIM(ret)) && DEPRECATE( + "using a dtype with a subarray field is deprecated. " + "This can lead to inconsistent behaviour due to the resulting " + "dtype being different from the input dtype. " + "You may try to use `dtype=dtype.base`, which should give the " + "same result for most inputs, but does not guarantee the " + "output dimensions to match the subarray ones. " + "For `arr.astype()` the old, surprising, behaviour can be " + "retained using `res = np.empty(arr.shape, dtype)` followed" + "by `res[...] = arr`. (Deprecated NumPy 1.20)")) { + Py_DECREF(ret); + return NULL; + } if (PyArray_CopyInto(ret, self) < 0) { Py_DECREF(ret); diff --git a/numpy/core/tests/test_deprecations.py b/numpy/core/tests/test_deprecations.py index 17391e80c..91fd95d4d 100644 --- a/numpy/core/tests/test_deprecations.py +++ b/numpy/core/tests/test_deprecations.py @@ -81,6 +81,8 @@ class _DeprecationTestCase: kwargs : dict Keyword arguments for `function` """ + __tracebackhide__ = True # Hide traceback for py.test + # reset the log self.log[:] = [] @@ -728,3 +730,42 @@ class FlatteningConcatenateUnsafeCast(_DeprecationTestCase): np.concatenate(([0.], [1.]), out=np.empty(2, dtype=np.int64), casting="same_kind") + +class TestDeprecateSubarrayDTypeDuringArrayCoercion(_DeprecationTestCase): + message = "using a dtype with a subarray field is deprecated" + + @pytest.mark.parametrize(["obj", "dtype"], + [([((0, 1), (1, 2)), ((2,),)], '(2,2)f4'), + (["1", "2"], "(2)i,")]) + def test_deprecated_sequence(self, obj, dtype): + dtype = np.dtype(dtype) + self.assert_deprecated(lambda: np.array(obj, dtype=dtype)) + with pytest.warns(DeprecationWarning): + res = np.array(obj, dtype=dtype) + + # Using `arr.astype(subarray_dtype)` is also deprecated, because + # it uses broadcasting instead of casting each element. + self.assert_deprecated(lambda: res.astype(dtype)) + expected = np.empty(len(obj), dtype=dtype) + for i in range(len(expected)): + expected[i] = obj[i] + + assert_array_equal(res, expected) + + def test_deprecated_array(self): + # Arrays are more complex, since they "broadcast" on success: + arr = np.array([1, 2]) + self.assert_deprecated(lambda: np.array(arr, dtype="(2)i,")) + with pytest.warns(DeprecationWarning): + res = np.array(arr, dtype="(2)i,") + + assert_array_equal(res, [[1, 2], [1, 2]]) + + def test_not_deprecated(self): + # These error paths are not deprecated, the tests should be retained + # when the deprecation is finalized. + arr = np.arange(5 * 2).reshape(5, 2) + with pytest.raises(ValueError): + arr.astype("(2,2)f") + with pytest.raises(ValueError): + np.array(arr, dtype="(2,2)f") |