diff options
| -rw-r--r-- | doc/release/upcoming_changes/17419.deprecation.rst | 24 | ||||
| -rw-r--r-- | doc/release/upcoming_changes/17596.future.rst | 30 | ||||
| -rw-r--r-- | numpy/core/src/multiarray/ctors.c | 111 | ||||
| -rw-r--r-- | numpy/core/src/multiarray/methods.c | 21 | ||||
| -rw-r--r-- | numpy/core/tests/test_deprecations.py | 56 | ||||
| -rw-r--r-- | numpy/core/tests/test_dtype.py | 18 |
6 files changed, 165 insertions, 95 deletions
diff --git a/doc/release/upcoming_changes/17419.deprecation.rst b/doc/release/upcoming_changes/17419.deprecation.rst deleted file mode 100644 index fcab3a8ad..000000000 --- a/doc/release/upcoming_changes/17419.deprecation.rst +++ /dev/null @@ -1,24 +0,0 @@ -Arrays cannot be using subarray dtypes --------------------------------------- -Array creation and casting using ``np.array(obj, dtype)`` -and ``arr.astype(dtype)`` will not support ``dtype`` -to be a subarray dtype such as ``np.dtype("(2)i,")``. - -For such a ``dtype`` the following behaviour occurs currently:: - - res = np.array(obj, dtype) - - res.dtype is not dtype - res.dtype is dtype.base - res.shape[-dtype.ndim:] == dtype.shape - -The shape of the dtype is included into the array. -This leads to inconsistencies when ``obj`` is: - -* a scalar, such as ``np.array(1, dtype="(2)i")`` -* an array, such as ``np.array(np.array([1]), dtype="(2)i")`` - -In most cases the work-around is to pass the output dtype directly -and possibly check ``res.shape[-dtype.ndim:] == dtype.shape``. -If this is insufficient, please open an issue on the NumPy issue -tracker. diff --git a/doc/release/upcoming_changes/17596.future.rst b/doc/release/upcoming_changes/17596.future.rst new file mode 100644 index 000000000..6e697c8d1 --- /dev/null +++ b/doc/release/upcoming_changes/17596.future.rst @@ -0,0 +1,30 @@ +Arrays cannot be using subarray dtypes +-------------------------------------- +Array creation and casting using ``np.array(arr, dtype)`` +and ``arr.astype(dtype)`` will use different logic when ``dtype`` +is a subarray dtype such as ``np.dtype("(2)i,")``. + +For such a ``dtype`` the following behaviour is true:: + + res = np.array(arr, dtype) + + res.dtype is not dtype + res.dtype is dtype.base + res.shape == arr.shape + dtype.shape + +But ``res`` is filled using the logic: + + res = np.empty(arr.shape + dtype.shape, dtype=dtype.base) + res[...] = arr + +which uses incorrect broadcasting (and often leads to an error). +In the future, this will instead cast each element individually, +leading to the same result as:: + + res = np.array(arr, dtype=np.dtype(["f", dtype]))["f"] + +Which can normally be used to opt-in to the new behaviour. + +This change does not affect ``np.array(list, dtype="(2)i,")`` unless the +``list`` itself includes at least one array. In particular, the behaviour +is unchanged for a list of tuples. diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index 05e45fbf5..ff262369b 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -1565,49 +1565,66 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth, if (NPY_UNLIKELY(fixed_descriptor != NULL && PyDataType_HASSUBARRAY(dtype))) { /* - * When a subarray dtype was passed in, its dimensions are absorbed - * into the array dimension (causing a dimension mismatch). - * We can't reasonably handle this because of inconsistencies in - * how it was handled (depending on nested list vs. embed array-likes). - * So we give a deprecation warning and fall back to legacy code. + * When a subarray dtype was passed in, its dimensions are appended + * to the array dimension (causing a dimension mismatch). + * There is a problem with that, because if we coerce from non-arrays + * we do this correctly by element (as defined by tuples), but for + * arrays we first append the dimensions and then assign to the base + * dtype and then assign which causes the problem. + * + * Thus, we check if there is an array included, in that case we + * give a FutureWarning. + * When the warning is removed, PyArray_Pack will have to ensure + * that that it does not append the dimensions when creating the + * subarrays to assign `arr[0] = obj[0]`. */ - ret = (PyArrayObject *)PyArray_NewFromDescr( - &PyArray_Type, dtype, ndim, dims, NULL, NULL, - flags&NPY_ARRAY_F_CONTIGUOUS, NULL); - if (ret == NULL) { - npy_free_coercion_cache(cache); - return NULL; + int includes_array = 0; + if (cache != NULL) { + /* This is not ideal, but it is a pretty special case */ + coercion_cache_obj *next = cache; + while (next != NULL) { + if (!next->sequence) { + includes_array = 1; + break; + } + next = next->next; + } } - assert(PyArray_NDIM(ret) != ndim); + if (includes_array) { + npy_free_coercion_cache(cache); - if (cache == NULL) { - /* This is a single item. Sets only first subarray element. */ - assert(ndim == 0); - if (PyArray_Pack(PyArray_DESCR(ret), PyArray_DATA(ret), op) < 0) { + ret = (PyArrayObject *) PyArray_NewFromDescr( + &PyArray_Type, dtype, ndim, dims, NULL, NULL, + flags & NPY_ARRAY_F_CONTIGUOUS, NULL); + if (ret == NULL) { + return NULL; + } + assert(PyArray_NDIM(ret) != ndim); + + /* NumPy 1.20, 2020-10-01 */ + if (DEPRECATE_FUTUREWARNING( + "creating an array with a subarray dtype will behave " + "differently when the `np.array()` (or `asarray`, etc.) " + "call includes an array or array object.\n" + "If you are converting a single array or a list of arrays," + "you can opt-in to the future behaviour using:\n" + " np.array(arr, dtype=np.dtype(['f', dtype]))['f']\n" + " np.array([arr1, arr2], dtype=np.dtype(['f', dtype]))['f']\n" + "\n" + "By including a new field and indexing it after the " + "conversion.\n" + "This may lead to a different result or to current failures " + "succeeding. (FutureWarning since NumPy 1.20)") < 0) { Py_DECREF(ret); return NULL; } - } - else { - npy_free_coercion_cache(cache); + if (setArrayFromSequence(ret, op, 0, NULL) < 0) { Py_DECREF(ret); return NULL; } + return (PyObject *)ret; } - /* NumPy 1.20, 2020-10-01 */ - if (DEPRECATE( - "using a dtype with a subarray field is deprecated. " - "This can lead to inconsistent behaviour due to the resulting " - "dtype being different from the input dtype. " - "You may try to use `dtype=dtype.base`, which should give the " - "same result for most inputs, but does not guarantee the " - "output dimensions to match the subarray ones. " - "(Deprecated NumPy 1.20)")) { - Py_DECREF(ret); - return NULL; - } - return (PyObject *)ret; } if (dtype == NULL) { @@ -1700,26 +1717,52 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth, } /* Create a new array and copy the data */ + Py_INCREF(dtype); /* hold on in case of a subarray that is replaced */ ret = (PyArrayObject *)PyArray_NewFromDescr( &PyArray_Type, dtype, ndim, dims, NULL, NULL, flags&NPY_ARRAY_F_CONTIGUOUS, NULL); if (ret == NULL) { npy_free_coercion_cache(cache); + Py_DECREF(dtype); return NULL; } + if (ndim == PyArray_NDIM(ret)) { + /* + * Appending of dimensions did not occur, so use the actual dtype + * below. This is relevant for S0 or U0 which can be replaced with + * S1 or U1, although that should likely change. + */ + Py_SETREF(dtype, PyArray_DESCR(ret)); + Py_INCREF(dtype); + } + if (cache == NULL) { /* This is a single item. Set it directly. */ assert(ndim == 0); - if (PyArray_Pack(PyArray_DESCR(ret), PyArray_BYTES(ret), op) < 0) { + if (PyArray_Pack(dtype, PyArray_BYTES(ret), op) < 0) { + Py_DECREF(dtype); Py_DECREF(ret); return NULL; } + Py_DECREF(dtype); return (PyObject *)ret; } assert(ndim != 0); assert(op == cache->converted_obj); - if (PyArray_AssignFromCache(ret, cache) < 0) { + + /* Decrease the number of dimensions to the detected ones */ + int out_ndim = PyArray_NDIM(ret); + PyArray_Descr *out_descr = PyArray_DESCR(ret); + ((PyArrayObject_fields *)ret)->nd = ndim; + ((PyArrayObject_fields *)ret)->descr = dtype; + + int success = PyArray_AssignFromCache(ret, cache); + + ((PyArrayObject_fields *)ret)->nd = out_ndim; + ((PyArrayObject_fields *)ret)->descr = out_descr; + Py_DECREF(dtype); + if (success < 0) { Py_DECREF(ret); return NULL; } diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c index e4421b41b..084f7cee1 100644 --- a/numpy/core/src/multiarray/methods.c +++ b/numpy/core/src/multiarray/methods.c @@ -845,16 +845,17 @@ array_astype(PyArrayObject *self, PyObject *args, PyObject *kwds) return NULL; } /* NumPy 1.20, 2020-10-01 */ - if ((PyArray_NDIM(self) != PyArray_NDIM(ret)) && DEPRECATE( - "using a dtype with a subarray field is deprecated. " - "This can lead to inconsistent behaviour due to the resulting " - "dtype being different from the input dtype. " - "You may try to use `dtype=dtype.base`, which should give the " - "same result for most inputs, but does not guarantee the " - "output dimensions to match the subarray ones. " - "For `arr.astype()` the old, surprising, behaviour can be " - "retained using `res = np.empty(arr.shape, dtype)` followed" - "by `res[...] = arr`. (Deprecated NumPy 1.20)")) { + if ((PyArray_NDIM(self) != PyArray_NDIM(ret)) && + DEPRECATE_FUTUREWARNING( + "casting an array to a subarray dtype " + "will not using broadcasting in the future, but cast each " + "element to the new dtype and then append the dtype's shape " + "to the new array. You can opt-in to the new behaviour, by " + "additional field to the cast: " + "`arr.astype(np.dtype([('f', dtype)]))['f']`.\n" + "This may lead to a different result or to current failures " + "succeeding. " + "(FutureWarning since NumPy 1.20)") < 0) { Py_DECREF(ret); return NULL; } diff --git a/numpy/core/tests/test_deprecations.py b/numpy/core/tests/test_deprecations.py index 91fd95d4d..380b78f67 100644 --- a/numpy/core/tests/test_deprecations.py +++ b/numpy/core/tests/test_deprecations.py @@ -732,40 +732,42 @@ class FlatteningConcatenateUnsafeCast(_DeprecationTestCase): class TestDeprecateSubarrayDTypeDuringArrayCoercion(_DeprecationTestCase): - message = "using a dtype with a subarray field is deprecated" - - @pytest.mark.parametrize(["obj", "dtype"], - [([((0, 1), (1, 2)), ((2,),)], '(2,2)f4'), - (["1", "2"], "(2)i,")]) - def test_deprecated_sequence(self, obj, dtype): - dtype = np.dtype(dtype) - self.assert_deprecated(lambda: np.array(obj, dtype=dtype)) - with pytest.warns(DeprecationWarning): - res = np.array(obj, dtype=dtype) - - # Using `arr.astype(subarray_dtype)` is also deprecated, because - # it uses broadcasting instead of casting each element. - self.assert_deprecated(lambda: res.astype(dtype)) - expected = np.empty(len(obj), dtype=dtype) - for i in range(len(expected)): - expected[i] = obj[i] - - assert_array_equal(res, expected) + warning_cls = FutureWarning + message = "(creating|casting) an array (with|to) a subarray dtype" def test_deprecated_array(self): # Arrays are more complex, since they "broadcast" on success: arr = np.array([1, 2]) + + self.assert_deprecated(lambda: arr.astype("(2)i,")) + with pytest.warns(FutureWarning): + res = arr.astype("(2)i,") + + assert_array_equal(res, [[1, 2], [1, 2]]) + self.assert_deprecated(lambda: np.array(arr, dtype="(2)i,")) - with pytest.warns(DeprecationWarning): + with pytest.warns(FutureWarning): res = np.array(arr, dtype="(2)i,") assert_array_equal(res, [[1, 2], [1, 2]]) - def test_not_deprecated(self): - # These error paths are not deprecated, the tests should be retained - # when the deprecation is finalized. + with pytest.warns(FutureWarning): + res = np.array([[(1,), (2,)], arr], dtype="(2)i,") + + assert_array_equal(res, [[[1, 1], [2, 2]], [[1, 2], [1, 2]]]) + + def test_deprecated_and_error(self): + # These error paths do not give a warning, but will succeed in the + # future. arr = np.arange(5 * 2).reshape(5, 2) - with pytest.raises(ValueError): - arr.astype("(2,2)f") - with pytest.raises(ValueError): - np.array(arr, dtype="(2,2)f") + def check(): + with pytest.raises(ValueError): + arr.astype("(2,2)f") + + self.assert_deprecated(check) + + def check(): + with pytest.raises(ValueError): + np.array(arr, dtype="(2,2)f") + + self.assert_deprecated(check) diff --git a/numpy/core/tests/test_dtype.py b/numpy/core/tests/test_dtype.py index ba5069024..1b2b85cc1 100644 --- a/numpy/core/tests/test_dtype.py +++ b/numpy/core/tests/test_dtype.py @@ -314,6 +314,24 @@ class TestRecord: 'formats':['i1', 'O'], 'offsets':[np.dtype('intp').itemsize, 0]}) + @pytest.mark.parametrize(["obj", "dtype", "expected"], + [([], ("(2)f4,"), np.empty((0, 2), dtype="f4")), + (3, "(3)f4,", [3, 3, 3]), + (np.float64(2), "(2)f4,", [2, 2]), + ([((0, 1), (1, 2)), ((2,),)], '(2,2)f4', None), + (["1", "2"], "(2)i,", None)]) + def test_subarray_list(self, obj, dtype, expected): + dtype = np.dtype(dtype) + res = np.array(obj, dtype=dtype) + + if expected is None: + # iterate the 1-d list to fill the array + expected = np.empty(len(obj), dtype=dtype) + for i in range(len(expected)): + expected[i] = obj[i] + + assert_array_equal(res, expected) + def test_comma_datetime(self): dt = np.dtype('M8[D],datetime64[Y],i8') assert_equal(dt, np.dtype([('f0', 'M8[D]'), |
