summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatti Picus <matti.picus@gmail.com>2020-10-07 21:53:15 +0300
committerGitHub <noreply@github.com>2020-10-07 21:53:15 +0300
commit382758355998951cea2b9f6ad1fb83e7dc4c3a02 (patch)
tree938956ddaf3f844f38841e1d633f7b2773fedc44
parent18af0e10878fe49a893fd576317dabd424c7ca16 (diff)
parente31ae7ff70c72dec2cf7ba3bd817982ad1b68713 (diff)
downloadnumpy-382758355998951cea2b9f6ad1fb83e7dc4c3a02.tar.gz
Merge pull request #17419 from seberg/deprecate-coercion-to-subarray-dtype
DEP: Deprecate coercion to subarray dtypes
-rw-r--r--doc/release/upcoming_changes/17419.deprecation.rst24
-rw-r--r--numpy/core/src/multiarray/ctors.c202
-rw-r--r--numpy/core/src/multiarray/methods.c14
-rw-r--r--numpy/core/tests/test_deprecations.py41
4 files changed, 281 insertions, 0 deletions
diff --git a/doc/release/upcoming_changes/17419.deprecation.rst b/doc/release/upcoming_changes/17419.deprecation.rst
new file mode 100644
index 000000000..fcab3a8ad
--- /dev/null
+++ b/doc/release/upcoming_changes/17419.deprecation.rst
@@ -0,0 +1,24 @@
+Arrays cannot be using subarray dtypes
+--------------------------------------
+Array creation and casting using ``np.array(obj, dtype)``
+and ``arr.astype(dtype)`` will not support ``dtype``
+to be a subarray dtype such as ``np.dtype("(2)i,")``.
+
+For such a ``dtype`` the following behaviour occurs currently::
+
+ res = np.array(obj, dtype)
+
+ res.dtype is not dtype
+ res.dtype is dtype.base
+ res.shape[-dtype.ndim:] == dtype.shape
+
+The shape of the dtype is included into the array.
+This leads to inconsistencies when ``obj`` is:
+
+* a scalar, such as ``np.array(1, dtype="(2)i")``
+* an array, such as ``np.array(np.array([1]), dtype="(2)i")``
+
+In most cases the work-around is to pass the output dtype directly
+and possibly check ``res.shape[-dtype.ndim:] == dtype.shape``.
+If this is insufficient, please open an issue on the NumPy issue
+tracker.
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index b09ec9f8e..05e45fbf5 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -1368,6 +1368,160 @@ PyArray_GetArrayParamsFromObject(PyObject *NPY_UNUSED(op),
}
+/*
+ * This function is a legacy implementation to retain subarray dtype
+ * behaviour in array coercion. The behaviour here makes sense if tuples
+ * of matching dimensionality are being coerced. Due to the difficulty
+ * that the result is ill-defined for lists of array-likes, this is deprecated.
+ *
+ * WARNING: Do not use this function, it exists purely to support a deprecated
+ * code path.
+ */
+static int
+setArrayFromSequence(PyArrayObject *a, PyObject *s,
+ int dim, PyArrayObject * dst)
+{
+ Py_ssize_t i, slen;
+ int res = -1;
+
+ /* first recursion, view equal destination */
+ if (dst == NULL)
+ dst = a;
+
+ /*
+ * This code is to ensure that the sequence access below will
+ * return a lower-dimensional sequence.
+ */
+
+ /* INCREF on entry DECREF on exit */
+ Py_INCREF(s);
+
+ PyObject *seq = NULL;
+
+ if (PyArray_Check(s)) {
+ if (!(PyArray_CheckExact(s))) {
+ /*
+ * make sure a base-class array is used so that the dimensionality
+ * reduction assumption is correct.
+ */
+ /* This will DECREF(s) if replaced */
+ s = PyArray_EnsureArray(s);
+ if (s == NULL) {
+ goto fail;
+ }
+ }
+
+ /* dst points to correct array subsection */
+ if (PyArray_CopyInto(dst, (PyArrayObject *)s) < 0) {
+ goto fail;
+ }
+
+ Py_DECREF(s);
+ return 0;
+ }
+
+ if (dim > PyArray_NDIM(a)) {
+ PyErr_Format(PyExc_ValueError,
+ "setArrayFromSequence: sequence/array dimensions mismatch.");
+ goto fail;
+ }
+
+ /* Try __array__ before using s as a sequence */
+ PyObject *tmp = _array_from_array_like(s, NULL, 0, NULL);
+ if (tmp == NULL) {
+ goto fail;
+ }
+ else if (tmp == Py_NotImplemented) {
+ Py_DECREF(tmp);
+ }
+ else {
+ int r = PyArray_CopyInto(dst, (PyArrayObject *)tmp);
+ Py_DECREF(tmp);
+ if (r < 0) {
+ goto fail;
+ }
+ Py_DECREF(s);
+ return 0;
+ }
+
+ seq = PySequence_Fast(s, "Could not convert object to sequence");
+ if (seq == NULL) {
+ goto fail;
+ }
+ slen = PySequence_Fast_GET_SIZE(seq);
+
+ /*
+ * Either the dimensions match, or the sequence has length 1 and can
+ * be broadcast to the destination.
+ */
+ if (slen != PyArray_DIMS(a)[dim] && slen != 1) {
+ PyErr_Format(PyExc_ValueError,
+ "cannot copy sequence with size %zd to array axis "
+ "with dimension %" NPY_INTP_FMT, slen, PyArray_DIMS(a)[dim]);
+ goto fail;
+ }
+
+ /* Broadcast the one element from the sequence to all the outputs */
+ if (slen == 1) {
+ PyObject *o = PySequence_Fast_GET_ITEM(seq, 0);
+ npy_intp alen = PyArray_DIM(a, dim);
+
+ for (i = 0; i < alen; i++) {
+ if ((PyArray_NDIM(a) - dim) > 1) {
+ PyArrayObject * tmp =
+ (PyArrayObject *)array_item_asarray(dst, i);
+ if (tmp == NULL) {
+ goto fail;
+ }
+
+ res = setArrayFromSequence(a, o, dim+1, tmp);
+ Py_DECREF(tmp);
+ }
+ else {
+ char * b = (PyArray_BYTES(dst) + i * PyArray_STRIDES(dst)[0]);
+ res = PyArray_SETITEM(dst, b, o);
+ }
+ if (res < 0) {
+ goto fail;
+ }
+ }
+ }
+ /* Copy element by element */
+ else {
+ for (i = 0; i < slen; i++) {
+ PyObject * o = PySequence_Fast_GET_ITEM(seq, i);
+ if ((PyArray_NDIM(a) - dim) > 1) {
+ PyArrayObject * tmp =
+ (PyArrayObject *)array_item_asarray(dst, i);
+ if (tmp == NULL) {
+ goto fail;
+ }
+
+ res = setArrayFromSequence(a, o, dim+1, tmp);
+ Py_DECREF(tmp);
+ }
+ else {
+ char * b = (PyArray_BYTES(dst) + i * PyArray_STRIDES(dst)[0]);
+ res = PyArray_SETITEM(dst, b, o);
+ }
+ if (res < 0) {
+ goto fail;
+ }
+ }
+ }
+
+ Py_DECREF(seq);
+ Py_DECREF(s);
+ return 0;
+
+ fail:
+ Py_XDECREF(seq);
+ Py_DECREF(s);
+ return res;
+}
+
+
+
/*NUMPY_API
* Does not check for NPY_ARRAY_ENSURECOPY and NPY_ARRAY_NOTSWAPPED in flags
* Steals a reference to newtype --- which can be NULL
@@ -1408,6 +1562,54 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth,
if (ndim < 0) {
return NULL;
}
+
+ if (NPY_UNLIKELY(fixed_descriptor != NULL && PyDataType_HASSUBARRAY(dtype))) {
+ /*
+ * When a subarray dtype was passed in, its dimensions are absorbed
+ * into the array dimension (causing a dimension mismatch).
+ * We can't reasonably handle this because of inconsistencies in
+ * how it was handled (depending on nested list vs. embed array-likes).
+ * So we give a deprecation warning and fall back to legacy code.
+ */
+ ret = (PyArrayObject *)PyArray_NewFromDescr(
+ &PyArray_Type, dtype, ndim, dims, NULL, NULL,
+ flags&NPY_ARRAY_F_CONTIGUOUS, NULL);
+ if (ret == NULL) {
+ npy_free_coercion_cache(cache);
+ return NULL;
+ }
+ assert(PyArray_NDIM(ret) != ndim);
+
+ if (cache == NULL) {
+ /* This is a single item. Sets only first subarray element. */
+ assert(ndim == 0);
+ if (PyArray_Pack(PyArray_DESCR(ret), PyArray_DATA(ret), op) < 0) {
+ Py_DECREF(ret);
+ return NULL;
+ }
+ }
+ else {
+ npy_free_coercion_cache(cache);
+ if (setArrayFromSequence(ret, op, 0, NULL) < 0) {
+ Py_DECREF(ret);
+ return NULL;
+ }
+ }
+ /* NumPy 1.20, 2020-10-01 */
+ if (DEPRECATE(
+ "using a dtype with a subarray field is deprecated. "
+ "This can lead to inconsistent behaviour due to the resulting "
+ "dtype being different from the input dtype. "
+ "You may try to use `dtype=dtype.base`, which should give the "
+ "same result for most inputs, but does not guarantee the "
+ "output dimensions to match the subarray ones. "
+ "(Deprecated NumPy 1.20)")) {
+ Py_DECREF(ret);
+ return NULL;
+ }
+ return (PyObject *)ret;
+ }
+
if (dtype == NULL) {
dtype = PyArray_DescrFromType(NPY_DEFAULT_TYPE);
}
diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c
index f7cb2185b..e4421b41b 100644
--- a/numpy/core/src/multiarray/methods.c
+++ b/numpy/core/src/multiarray/methods.c
@@ -844,6 +844,20 @@ array_astype(PyArrayObject *self, PyObject *args, PyObject *kwds)
if (ret == NULL) {
return NULL;
}
+ /* NumPy 1.20, 2020-10-01 */
+ if ((PyArray_NDIM(self) != PyArray_NDIM(ret)) && DEPRECATE(
+ "using a dtype with a subarray field is deprecated. "
+ "This can lead to inconsistent behaviour due to the resulting "
+ "dtype being different from the input dtype. "
+ "You may try to use `dtype=dtype.base`, which should give the "
+ "same result for most inputs, but does not guarantee the "
+ "output dimensions to match the subarray ones. "
+ "For `arr.astype()` the old, surprising, behaviour can be "
+ "retained using `res = np.empty(arr.shape, dtype)` followed"
+ "by `res[...] = arr`. (Deprecated NumPy 1.20)")) {
+ Py_DECREF(ret);
+ return NULL;
+ }
if (PyArray_CopyInto(ret, self) < 0) {
Py_DECREF(ret);
diff --git a/numpy/core/tests/test_deprecations.py b/numpy/core/tests/test_deprecations.py
index 17391e80c..91fd95d4d 100644
--- a/numpy/core/tests/test_deprecations.py
+++ b/numpy/core/tests/test_deprecations.py
@@ -81,6 +81,8 @@ class _DeprecationTestCase:
kwargs : dict
Keyword arguments for `function`
"""
+ __tracebackhide__ = True # Hide traceback for py.test
+
# reset the log
self.log[:] = []
@@ -728,3 +730,42 @@ class FlatteningConcatenateUnsafeCast(_DeprecationTestCase):
np.concatenate(([0.], [1.]), out=np.empty(2, dtype=np.int64),
casting="same_kind")
+
+class TestDeprecateSubarrayDTypeDuringArrayCoercion(_DeprecationTestCase):
+ message = "using a dtype with a subarray field is deprecated"
+
+ @pytest.mark.parametrize(["obj", "dtype"],
+ [([((0, 1), (1, 2)), ((2,),)], '(2,2)f4'),
+ (["1", "2"], "(2)i,")])
+ def test_deprecated_sequence(self, obj, dtype):
+ dtype = np.dtype(dtype)
+ self.assert_deprecated(lambda: np.array(obj, dtype=dtype))
+ with pytest.warns(DeprecationWarning):
+ res = np.array(obj, dtype=dtype)
+
+ # Using `arr.astype(subarray_dtype)` is also deprecated, because
+ # it uses broadcasting instead of casting each element.
+ self.assert_deprecated(lambda: res.astype(dtype))
+ expected = np.empty(len(obj), dtype=dtype)
+ for i in range(len(expected)):
+ expected[i] = obj[i]
+
+ assert_array_equal(res, expected)
+
+ def test_deprecated_array(self):
+ # Arrays are more complex, since they "broadcast" on success:
+ arr = np.array([1, 2])
+ self.assert_deprecated(lambda: np.array(arr, dtype="(2)i,"))
+ with pytest.warns(DeprecationWarning):
+ res = np.array(arr, dtype="(2)i,")
+
+ assert_array_equal(res, [[1, 2], [1, 2]])
+
+ def test_not_deprecated(self):
+ # These error paths are not deprecated, the tests should be retained
+ # when the deprecation is finalized.
+ arr = np.arange(5 * 2).reshape(5, 2)
+ with pytest.raises(ValueError):
+ arr.astype("(2,2)f")
+ with pytest.raises(ValueError):
+ np.array(arr, dtype="(2,2)f")