summaryrefslogtreecommitdiff
path: root/numpy
diff options
context:
space:
mode:
authorSebastian Berg <sebastian@sipsolutions.net>2020-10-20 16:35:09 -0500
committerSebastian Berg <sebastian@sipsolutions.net>2020-10-21 12:39:22 -0500
commitfcc394033c5902bbd8104694c5f7d33a8b5eb99f (patch)
tree15b261f6358faf801d1b80eca24e3955d438269a /numpy
parent32f1359fc2d11014b240dee2270acea1784a34bb (diff)
downloadnumpy-fcc394033c5902bbd8104694c5f7d33a8b5eb99f.tar.gz
DEP,BUG: Coercion/cast of array to a subarray dtype will be fixed
This currently appends the subarray dtype dimensions first and then tries to assign to the result array which uses incorrect broadcasting (broadcasting against the subarray dimensions instead of repeating each element according to the subarray dimensions). This also fixes the python scalar pathway `np.array(2, dtype="(2)f4,")` which previously only filled the first value. I consider that a clear bug fix. Closes gh-17511
Diffstat (limited to 'numpy')
-rw-r--r--numpy/core/src/multiarray/ctors.c111
-rw-r--r--numpy/core/src/multiarray/methods.c21
-rw-r--r--numpy/core/tests/test_deprecations.py56
-rw-r--r--numpy/core/tests/test_dtype.py18
4 files changed, 135 insertions, 71 deletions
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index 05e45fbf5..ff262369b 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -1565,49 +1565,66 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth,
if (NPY_UNLIKELY(fixed_descriptor != NULL && PyDataType_HASSUBARRAY(dtype))) {
/*
- * When a subarray dtype was passed in, its dimensions are absorbed
- * into the array dimension (causing a dimension mismatch).
- * We can't reasonably handle this because of inconsistencies in
- * how it was handled (depending on nested list vs. embed array-likes).
- * So we give a deprecation warning and fall back to legacy code.
+ * When a subarray dtype was passed in, its dimensions are appended
+ * to the array dimension (causing a dimension mismatch).
+ * There is a problem with that, because if we coerce from non-arrays
+ * we do this correctly by element (as defined by tuples), but for
+ * arrays we first append the dimensions and then assign to the base
+ * dtype and then assign which causes the problem.
+ *
+ * Thus, we check if there is an array included, in that case we
+ * give a FutureWarning.
+ * When the warning is removed, PyArray_Pack will have to ensure
+ * that that it does not append the dimensions when creating the
+ * subarrays to assign `arr[0] = obj[0]`.
*/
- ret = (PyArrayObject *)PyArray_NewFromDescr(
- &PyArray_Type, dtype, ndim, dims, NULL, NULL,
- flags&NPY_ARRAY_F_CONTIGUOUS, NULL);
- if (ret == NULL) {
- npy_free_coercion_cache(cache);
- return NULL;
+ int includes_array = 0;
+ if (cache != NULL) {
+ /* This is not ideal, but it is a pretty special case */
+ coercion_cache_obj *next = cache;
+ while (next != NULL) {
+ if (!next->sequence) {
+ includes_array = 1;
+ break;
+ }
+ next = next->next;
+ }
}
- assert(PyArray_NDIM(ret) != ndim);
+ if (includes_array) {
+ npy_free_coercion_cache(cache);
- if (cache == NULL) {
- /* This is a single item. Sets only first subarray element. */
- assert(ndim == 0);
- if (PyArray_Pack(PyArray_DESCR(ret), PyArray_DATA(ret), op) < 0) {
+ ret = (PyArrayObject *) PyArray_NewFromDescr(
+ &PyArray_Type, dtype, ndim, dims, NULL, NULL,
+ flags & NPY_ARRAY_F_CONTIGUOUS, NULL);
+ if (ret == NULL) {
+ return NULL;
+ }
+ assert(PyArray_NDIM(ret) != ndim);
+
+ /* NumPy 1.20, 2020-10-01 */
+ if (DEPRECATE_FUTUREWARNING(
+ "creating an array with a subarray dtype will behave "
+ "differently when the `np.array()` (or `asarray`, etc.) "
+ "call includes an array or array object.\n"
+ "If you are converting a single array or a list of arrays,"
+ "you can opt-in to the future behaviour using:\n"
+ " np.array(arr, dtype=np.dtype(['f', dtype]))['f']\n"
+ " np.array([arr1, arr2], dtype=np.dtype(['f', dtype]))['f']\n"
+ "\n"
+ "By including a new field and indexing it after the "
+ "conversion.\n"
+ "This may lead to a different result or to current failures "
+ "succeeding. (FutureWarning since NumPy 1.20)") < 0) {
Py_DECREF(ret);
return NULL;
}
- }
- else {
- npy_free_coercion_cache(cache);
+
if (setArrayFromSequence(ret, op, 0, NULL) < 0) {
Py_DECREF(ret);
return NULL;
}
+ return (PyObject *)ret;
}
- /* NumPy 1.20, 2020-10-01 */
- if (DEPRECATE(
- "using a dtype with a subarray field is deprecated. "
- "This can lead to inconsistent behaviour due to the resulting "
- "dtype being different from the input dtype. "
- "You may try to use `dtype=dtype.base`, which should give the "
- "same result for most inputs, but does not guarantee the "
- "output dimensions to match the subarray ones. "
- "(Deprecated NumPy 1.20)")) {
- Py_DECREF(ret);
- return NULL;
- }
- return (PyObject *)ret;
}
if (dtype == NULL) {
@@ -1700,26 +1717,52 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth,
}
/* Create a new array and copy the data */
+ Py_INCREF(dtype); /* hold on in case of a subarray that is replaced */
ret = (PyArrayObject *)PyArray_NewFromDescr(
&PyArray_Type, dtype, ndim, dims, NULL, NULL,
flags&NPY_ARRAY_F_CONTIGUOUS, NULL);
if (ret == NULL) {
npy_free_coercion_cache(cache);
+ Py_DECREF(dtype);
return NULL;
}
+ if (ndim == PyArray_NDIM(ret)) {
+ /*
+ * Appending of dimensions did not occur, so use the actual dtype
+ * below. This is relevant for S0 or U0 which can be replaced with
+ * S1 or U1, although that should likely change.
+ */
+ Py_SETREF(dtype, PyArray_DESCR(ret));
+ Py_INCREF(dtype);
+ }
+
if (cache == NULL) {
/* This is a single item. Set it directly. */
assert(ndim == 0);
- if (PyArray_Pack(PyArray_DESCR(ret), PyArray_BYTES(ret), op) < 0) {
+ if (PyArray_Pack(dtype, PyArray_BYTES(ret), op) < 0) {
+ Py_DECREF(dtype);
Py_DECREF(ret);
return NULL;
}
+ Py_DECREF(dtype);
return (PyObject *)ret;
}
assert(ndim != 0);
assert(op == cache->converted_obj);
- if (PyArray_AssignFromCache(ret, cache) < 0) {
+
+ /* Decrease the number of dimensions to the detected ones */
+ int out_ndim = PyArray_NDIM(ret);
+ PyArray_Descr *out_descr = PyArray_DESCR(ret);
+ ((PyArrayObject_fields *)ret)->nd = ndim;
+ ((PyArrayObject_fields *)ret)->descr = dtype;
+
+ int success = PyArray_AssignFromCache(ret, cache);
+
+ ((PyArrayObject_fields *)ret)->nd = out_ndim;
+ ((PyArrayObject_fields *)ret)->descr = out_descr;
+ Py_DECREF(dtype);
+ if (success < 0) {
Py_DECREF(ret);
return NULL;
}
diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c
index e4421b41b..084f7cee1 100644
--- a/numpy/core/src/multiarray/methods.c
+++ b/numpy/core/src/multiarray/methods.c
@@ -845,16 +845,17 @@ array_astype(PyArrayObject *self, PyObject *args, PyObject *kwds)
return NULL;
}
/* NumPy 1.20, 2020-10-01 */
- if ((PyArray_NDIM(self) != PyArray_NDIM(ret)) && DEPRECATE(
- "using a dtype with a subarray field is deprecated. "
- "This can lead to inconsistent behaviour due to the resulting "
- "dtype being different from the input dtype. "
- "You may try to use `dtype=dtype.base`, which should give the "
- "same result for most inputs, but does not guarantee the "
- "output dimensions to match the subarray ones. "
- "For `arr.astype()` the old, surprising, behaviour can be "
- "retained using `res = np.empty(arr.shape, dtype)` followed"
- "by `res[...] = arr`. (Deprecated NumPy 1.20)")) {
+ if ((PyArray_NDIM(self) != PyArray_NDIM(ret)) &&
+ DEPRECATE_FUTUREWARNING(
+ "casting an array to a subarray dtype "
+ "will not using broadcasting in the future, but cast each "
+ "element to the new dtype and then append the dtype's shape "
+ "to the new array. You can opt-in to the new behaviour, by "
+ "additional field to the cast: "
+ "`arr.astype(np.dtype([('f', dtype)]))['f']`.\n"
+ "This may lead to a different result or to current failures "
+ "succeeding. "
+ "(FutureWarning since NumPy 1.20)") < 0) {
Py_DECREF(ret);
return NULL;
}
diff --git a/numpy/core/tests/test_deprecations.py b/numpy/core/tests/test_deprecations.py
index 91fd95d4d..380b78f67 100644
--- a/numpy/core/tests/test_deprecations.py
+++ b/numpy/core/tests/test_deprecations.py
@@ -732,40 +732,42 @@ class FlatteningConcatenateUnsafeCast(_DeprecationTestCase):
class TestDeprecateSubarrayDTypeDuringArrayCoercion(_DeprecationTestCase):
- message = "using a dtype with a subarray field is deprecated"
-
- @pytest.mark.parametrize(["obj", "dtype"],
- [([((0, 1), (1, 2)), ((2,),)], '(2,2)f4'),
- (["1", "2"], "(2)i,")])
- def test_deprecated_sequence(self, obj, dtype):
- dtype = np.dtype(dtype)
- self.assert_deprecated(lambda: np.array(obj, dtype=dtype))
- with pytest.warns(DeprecationWarning):
- res = np.array(obj, dtype=dtype)
-
- # Using `arr.astype(subarray_dtype)` is also deprecated, because
- # it uses broadcasting instead of casting each element.
- self.assert_deprecated(lambda: res.astype(dtype))
- expected = np.empty(len(obj), dtype=dtype)
- for i in range(len(expected)):
- expected[i] = obj[i]
-
- assert_array_equal(res, expected)
+ warning_cls = FutureWarning
+ message = "(creating|casting) an array (with|to) a subarray dtype"
def test_deprecated_array(self):
# Arrays are more complex, since they "broadcast" on success:
arr = np.array([1, 2])
+
+ self.assert_deprecated(lambda: arr.astype("(2)i,"))
+ with pytest.warns(FutureWarning):
+ res = arr.astype("(2)i,")
+
+ assert_array_equal(res, [[1, 2], [1, 2]])
+
self.assert_deprecated(lambda: np.array(arr, dtype="(2)i,"))
- with pytest.warns(DeprecationWarning):
+ with pytest.warns(FutureWarning):
res = np.array(arr, dtype="(2)i,")
assert_array_equal(res, [[1, 2], [1, 2]])
- def test_not_deprecated(self):
- # These error paths are not deprecated, the tests should be retained
- # when the deprecation is finalized.
+ with pytest.warns(FutureWarning):
+ res = np.array([[(1,), (2,)], arr], dtype="(2)i,")
+
+ assert_array_equal(res, [[[1, 1], [2, 2]], [[1, 2], [1, 2]]])
+
+ def test_deprecated_and_error(self):
+ # These error paths do not give a warning, but will succeed in the
+ # future.
arr = np.arange(5 * 2).reshape(5, 2)
- with pytest.raises(ValueError):
- arr.astype("(2,2)f")
- with pytest.raises(ValueError):
- np.array(arr, dtype="(2,2)f")
+ def check():
+ with pytest.raises(ValueError):
+ arr.astype("(2,2)f")
+
+ self.assert_deprecated(check)
+
+ def check():
+ with pytest.raises(ValueError):
+ np.array(arr, dtype="(2,2)f")
+
+ self.assert_deprecated(check)
diff --git a/numpy/core/tests/test_dtype.py b/numpy/core/tests/test_dtype.py
index ba5069024..1b2b85cc1 100644
--- a/numpy/core/tests/test_dtype.py
+++ b/numpy/core/tests/test_dtype.py
@@ -314,6 +314,24 @@ class TestRecord:
'formats':['i1', 'O'],
'offsets':[np.dtype('intp').itemsize, 0]})
+ @pytest.mark.parametrize(["obj", "dtype", "expected"],
+ [([], ("(2)f4,"), np.empty((0, 2), dtype="f4")),
+ (3, "(3)f4,", [3, 3, 3]),
+ (np.float64(2), "(2)f4,", [2, 2]),
+ ([((0, 1), (1, 2)), ((2,),)], '(2,2)f4', None),
+ (["1", "2"], "(2)i,", None)])
+ def test_subarray_list(self, obj, dtype, expected):
+ dtype = np.dtype(dtype)
+ res = np.array(obj, dtype=dtype)
+
+ if expected is None:
+ # iterate the 1-d list to fill the array
+ expected = np.empty(len(obj), dtype=dtype)
+ for i in range(len(expected)):
+ expected[i] = obj[i]
+
+ assert_array_equal(res, expected)
+
def test_comma_datetime(self):
dt = np.dtype('M8[D],datetime64[Y],i8')
assert_equal(dt, np.dtype([('f0', 'M8[D]'),