summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/release/upcoming_changes/17419.deprecation.rst24
-rw-r--r--doc/release/upcoming_changes/17596.future.rst30
-rw-r--r--numpy/core/src/multiarray/ctors.c111
-rw-r--r--numpy/core/src/multiarray/methods.c21
-rw-r--r--numpy/core/tests/test_deprecations.py56
-rw-r--r--numpy/core/tests/test_dtype.py18
6 files changed, 165 insertions, 95 deletions
diff --git a/doc/release/upcoming_changes/17419.deprecation.rst b/doc/release/upcoming_changes/17419.deprecation.rst
deleted file mode 100644
index fcab3a8ad..000000000
--- a/doc/release/upcoming_changes/17419.deprecation.rst
+++ /dev/null
@@ -1,24 +0,0 @@
-Arrays cannot be using subarray dtypes
---------------------------------------
-Array creation and casting using ``np.array(obj, dtype)``
-and ``arr.astype(dtype)`` will not support ``dtype``
-to be a subarray dtype such as ``np.dtype("(2)i,")``.
-
-For such a ``dtype`` the following behaviour occurs currently::
-
- res = np.array(obj, dtype)
-
- res.dtype is not dtype
- res.dtype is dtype.base
- res.shape[-dtype.ndim:] == dtype.shape
-
-The shape of the dtype is included into the array.
-This leads to inconsistencies when ``obj`` is:
-
-* a scalar, such as ``np.array(1, dtype="(2)i")``
-* an array, such as ``np.array(np.array([1]), dtype="(2)i")``
-
-In most cases the work-around is to pass the output dtype directly
-and possibly check ``res.shape[-dtype.ndim:] == dtype.shape``.
-If this is insufficient, please open an issue on the NumPy issue
-tracker.
diff --git a/doc/release/upcoming_changes/17596.future.rst b/doc/release/upcoming_changes/17596.future.rst
new file mode 100644
index 000000000..6e697c8d1
--- /dev/null
+++ b/doc/release/upcoming_changes/17596.future.rst
@@ -0,0 +1,30 @@
+Arrays cannot be using subarray dtypes
+--------------------------------------
+Array creation and casting using ``np.array(arr, dtype)``
+and ``arr.astype(dtype)`` will use different logic when ``dtype``
+is a subarray dtype such as ``np.dtype("(2)i,")``.
+
+For such a ``dtype`` the following behaviour is true::
+
+ res = np.array(arr, dtype)
+
+ res.dtype is not dtype
+ res.dtype is dtype.base
+ res.shape == arr.shape + dtype.shape
+
+But ``res`` is filled using the logic:
+
+ res = np.empty(arr.shape + dtype.shape, dtype=dtype.base)
+ res[...] = arr
+
+which uses incorrect broadcasting (and often leads to an error).
+In the future, this will instead cast each element individually,
+leading to the same result as::
+
+ res = np.array(arr, dtype=np.dtype(["f", dtype]))["f"]
+
+Which can normally be used to opt-in to the new behaviour.
+
+This change does not affect ``np.array(list, dtype="(2)i,")`` unless the
+``list`` itself includes at least one array. In particular, the behaviour
+is unchanged for a list of tuples.
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index 05e45fbf5..ff262369b 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -1565,49 +1565,66 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth,
if (NPY_UNLIKELY(fixed_descriptor != NULL && PyDataType_HASSUBARRAY(dtype))) {
/*
- * When a subarray dtype was passed in, its dimensions are absorbed
- * into the array dimension (causing a dimension mismatch).
- * We can't reasonably handle this because of inconsistencies in
- * how it was handled (depending on nested list vs. embed array-likes).
- * So we give a deprecation warning and fall back to legacy code.
+ * When a subarray dtype was passed in, its dimensions are appended
+ * to the array dimension (causing a dimension mismatch).
+ * There is a problem with that, because if we coerce from non-arrays
+ * we do this correctly by element (as defined by tuples), but for
+ * arrays we first append the dimensions and then assign to the base
+ * dtype and then assign which causes the problem.
+ *
+ * Thus, we check if there is an array included, in that case we
+ * give a FutureWarning.
+ * When the warning is removed, PyArray_Pack will have to ensure
+ * that that it does not append the dimensions when creating the
+ * subarrays to assign `arr[0] = obj[0]`.
*/
- ret = (PyArrayObject *)PyArray_NewFromDescr(
- &PyArray_Type, dtype, ndim, dims, NULL, NULL,
- flags&NPY_ARRAY_F_CONTIGUOUS, NULL);
- if (ret == NULL) {
- npy_free_coercion_cache(cache);
- return NULL;
+ int includes_array = 0;
+ if (cache != NULL) {
+ /* This is not ideal, but it is a pretty special case */
+ coercion_cache_obj *next = cache;
+ while (next != NULL) {
+ if (!next->sequence) {
+ includes_array = 1;
+ break;
+ }
+ next = next->next;
+ }
}
- assert(PyArray_NDIM(ret) != ndim);
+ if (includes_array) {
+ npy_free_coercion_cache(cache);
- if (cache == NULL) {
- /* This is a single item. Sets only first subarray element. */
- assert(ndim == 0);
- if (PyArray_Pack(PyArray_DESCR(ret), PyArray_DATA(ret), op) < 0) {
+ ret = (PyArrayObject *) PyArray_NewFromDescr(
+ &PyArray_Type, dtype, ndim, dims, NULL, NULL,
+ flags & NPY_ARRAY_F_CONTIGUOUS, NULL);
+ if (ret == NULL) {
+ return NULL;
+ }
+ assert(PyArray_NDIM(ret) != ndim);
+
+ /* NumPy 1.20, 2020-10-01 */
+ if (DEPRECATE_FUTUREWARNING(
+ "creating an array with a subarray dtype will behave "
+ "differently when the `np.array()` (or `asarray`, etc.) "
+ "call includes an array or array object.\n"
+ "If you are converting a single array or a list of arrays,"
+ "you can opt-in to the future behaviour using:\n"
+ " np.array(arr, dtype=np.dtype(['f', dtype]))['f']\n"
+ " np.array([arr1, arr2], dtype=np.dtype(['f', dtype]))['f']\n"
+ "\n"
+ "By including a new field and indexing it after the "
+ "conversion.\n"
+ "This may lead to a different result or to current failures "
+ "succeeding. (FutureWarning since NumPy 1.20)") < 0) {
Py_DECREF(ret);
return NULL;
}
- }
- else {
- npy_free_coercion_cache(cache);
+
if (setArrayFromSequence(ret, op, 0, NULL) < 0) {
Py_DECREF(ret);
return NULL;
}
+ return (PyObject *)ret;
}
- /* NumPy 1.20, 2020-10-01 */
- if (DEPRECATE(
- "using a dtype with a subarray field is deprecated. "
- "This can lead to inconsistent behaviour due to the resulting "
- "dtype being different from the input dtype. "
- "You may try to use `dtype=dtype.base`, which should give the "
- "same result for most inputs, but does not guarantee the "
- "output dimensions to match the subarray ones. "
- "(Deprecated NumPy 1.20)")) {
- Py_DECREF(ret);
- return NULL;
- }
- return (PyObject *)ret;
}
if (dtype == NULL) {
@@ -1700,26 +1717,52 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth,
}
/* Create a new array and copy the data */
+ Py_INCREF(dtype); /* hold on in case of a subarray that is replaced */
ret = (PyArrayObject *)PyArray_NewFromDescr(
&PyArray_Type, dtype, ndim, dims, NULL, NULL,
flags&NPY_ARRAY_F_CONTIGUOUS, NULL);
if (ret == NULL) {
npy_free_coercion_cache(cache);
+ Py_DECREF(dtype);
return NULL;
}
+ if (ndim == PyArray_NDIM(ret)) {
+ /*
+ * Appending of dimensions did not occur, so use the actual dtype
+ * below. This is relevant for S0 or U0 which can be replaced with
+ * S1 or U1, although that should likely change.
+ */
+ Py_SETREF(dtype, PyArray_DESCR(ret));
+ Py_INCREF(dtype);
+ }
+
if (cache == NULL) {
/* This is a single item. Set it directly. */
assert(ndim == 0);
- if (PyArray_Pack(PyArray_DESCR(ret), PyArray_BYTES(ret), op) < 0) {
+ if (PyArray_Pack(dtype, PyArray_BYTES(ret), op) < 0) {
+ Py_DECREF(dtype);
Py_DECREF(ret);
return NULL;
}
+ Py_DECREF(dtype);
return (PyObject *)ret;
}
assert(ndim != 0);
assert(op == cache->converted_obj);
- if (PyArray_AssignFromCache(ret, cache) < 0) {
+
+ /* Decrease the number of dimensions to the detected ones */
+ int out_ndim = PyArray_NDIM(ret);
+ PyArray_Descr *out_descr = PyArray_DESCR(ret);
+ ((PyArrayObject_fields *)ret)->nd = ndim;
+ ((PyArrayObject_fields *)ret)->descr = dtype;
+
+ int success = PyArray_AssignFromCache(ret, cache);
+
+ ((PyArrayObject_fields *)ret)->nd = out_ndim;
+ ((PyArrayObject_fields *)ret)->descr = out_descr;
+ Py_DECREF(dtype);
+ if (success < 0) {
Py_DECREF(ret);
return NULL;
}
diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c
index e4421b41b..084f7cee1 100644
--- a/numpy/core/src/multiarray/methods.c
+++ b/numpy/core/src/multiarray/methods.c
@@ -845,16 +845,17 @@ array_astype(PyArrayObject *self, PyObject *args, PyObject *kwds)
return NULL;
}
/* NumPy 1.20, 2020-10-01 */
- if ((PyArray_NDIM(self) != PyArray_NDIM(ret)) && DEPRECATE(
- "using a dtype with a subarray field is deprecated. "
- "This can lead to inconsistent behaviour due to the resulting "
- "dtype being different from the input dtype. "
- "You may try to use `dtype=dtype.base`, which should give the "
- "same result for most inputs, but does not guarantee the "
- "output dimensions to match the subarray ones. "
- "For `arr.astype()` the old, surprising, behaviour can be "
- "retained using `res = np.empty(arr.shape, dtype)` followed"
- "by `res[...] = arr`. (Deprecated NumPy 1.20)")) {
+ if ((PyArray_NDIM(self) != PyArray_NDIM(ret)) &&
+ DEPRECATE_FUTUREWARNING(
+ "casting an array to a subarray dtype "
+ "will not using broadcasting in the future, but cast each "
+ "element to the new dtype and then append the dtype's shape "
+ "to the new array. You can opt-in to the new behaviour, by "
+ "additional field to the cast: "
+ "`arr.astype(np.dtype([('f', dtype)]))['f']`.\n"
+ "This may lead to a different result or to current failures "
+ "succeeding. "
+ "(FutureWarning since NumPy 1.20)") < 0) {
Py_DECREF(ret);
return NULL;
}
diff --git a/numpy/core/tests/test_deprecations.py b/numpy/core/tests/test_deprecations.py
index 91fd95d4d..380b78f67 100644
--- a/numpy/core/tests/test_deprecations.py
+++ b/numpy/core/tests/test_deprecations.py
@@ -732,40 +732,42 @@ class FlatteningConcatenateUnsafeCast(_DeprecationTestCase):
class TestDeprecateSubarrayDTypeDuringArrayCoercion(_DeprecationTestCase):
- message = "using a dtype with a subarray field is deprecated"
-
- @pytest.mark.parametrize(["obj", "dtype"],
- [([((0, 1), (1, 2)), ((2,),)], '(2,2)f4'),
- (["1", "2"], "(2)i,")])
- def test_deprecated_sequence(self, obj, dtype):
- dtype = np.dtype(dtype)
- self.assert_deprecated(lambda: np.array(obj, dtype=dtype))
- with pytest.warns(DeprecationWarning):
- res = np.array(obj, dtype=dtype)
-
- # Using `arr.astype(subarray_dtype)` is also deprecated, because
- # it uses broadcasting instead of casting each element.
- self.assert_deprecated(lambda: res.astype(dtype))
- expected = np.empty(len(obj), dtype=dtype)
- for i in range(len(expected)):
- expected[i] = obj[i]
-
- assert_array_equal(res, expected)
+ warning_cls = FutureWarning
+ message = "(creating|casting) an array (with|to) a subarray dtype"
def test_deprecated_array(self):
# Arrays are more complex, since they "broadcast" on success:
arr = np.array([1, 2])
+
+ self.assert_deprecated(lambda: arr.astype("(2)i,"))
+ with pytest.warns(FutureWarning):
+ res = arr.astype("(2)i,")
+
+ assert_array_equal(res, [[1, 2], [1, 2]])
+
self.assert_deprecated(lambda: np.array(arr, dtype="(2)i,"))
- with pytest.warns(DeprecationWarning):
+ with pytest.warns(FutureWarning):
res = np.array(arr, dtype="(2)i,")
assert_array_equal(res, [[1, 2], [1, 2]])
- def test_not_deprecated(self):
- # These error paths are not deprecated, the tests should be retained
- # when the deprecation is finalized.
+ with pytest.warns(FutureWarning):
+ res = np.array([[(1,), (2,)], arr], dtype="(2)i,")
+
+ assert_array_equal(res, [[[1, 1], [2, 2]], [[1, 2], [1, 2]]])
+
+ def test_deprecated_and_error(self):
+ # These error paths do not give a warning, but will succeed in the
+ # future.
arr = np.arange(5 * 2).reshape(5, 2)
- with pytest.raises(ValueError):
- arr.astype("(2,2)f")
- with pytest.raises(ValueError):
- np.array(arr, dtype="(2,2)f")
+ def check():
+ with pytest.raises(ValueError):
+ arr.astype("(2,2)f")
+
+ self.assert_deprecated(check)
+
+ def check():
+ with pytest.raises(ValueError):
+ np.array(arr, dtype="(2,2)f")
+
+ self.assert_deprecated(check)
diff --git a/numpy/core/tests/test_dtype.py b/numpy/core/tests/test_dtype.py
index ba5069024..1b2b85cc1 100644
--- a/numpy/core/tests/test_dtype.py
+++ b/numpy/core/tests/test_dtype.py
@@ -314,6 +314,24 @@ class TestRecord:
'formats':['i1', 'O'],
'offsets':[np.dtype('intp').itemsize, 0]})
+ @pytest.mark.parametrize(["obj", "dtype", "expected"],
+ [([], ("(2)f4,"), np.empty((0, 2), dtype="f4")),
+ (3, "(3)f4,", [3, 3, 3]),
+ (np.float64(2), "(2)f4,", [2, 2]),
+ ([((0, 1), (1, 2)), ((2,),)], '(2,2)f4', None),
+ (["1", "2"], "(2)i,", None)])
+ def test_subarray_list(self, obj, dtype, expected):
+ dtype = np.dtype(dtype)
+ res = np.array(obj, dtype=dtype)
+
+ if expected is None:
+ # iterate the 1-d list to fill the array
+ expected = np.empty(len(obj), dtype=dtype)
+ for i in range(len(expected)):
+ expected[i] = obj[i]
+
+ assert_array_equal(res, expected)
+
def test_comma_datetime(self):
dt = np.dtype('M8[D],datetime64[Y],i8')
assert_equal(dt, np.dtype([('f0', 'M8[D]'),