summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCharles Harris <charlesr.harris@gmail.com>2020-12-18 12:09:25 -0700
committerGitHub <noreply@github.com>2020-12-18 12:09:25 -0700
commitbac54ecb0b815eea2e8116d21aa96e1387793468 (patch)
tree37d12e172e51319864bbb6513dd0f7ffc043d46a
parent5b63f260933672b7182daf4fb15ffcd15bae68bf (diff)
parent8caabdf36c63098bc5743306df55e2c45b5808e3 (diff)
downloadnumpy-bac54ecb0b815eea2e8116d21aa96e1387793468.tar.gz
Merge pull request #17973 from seberg/require-sequence-array-coercion
DEP: Futurewarn on requiring __len__ on array-likes
-rw-r--r--doc/source/release/1.20.0-notes.rst49
-rw-r--r--numpy/core/src/multiarray/array_coercion.c47
-rw-r--r--numpy/core/tests/test_array_coercion.py16
-rw-r--r--numpy/core/tests/test_deprecations.py86
4 files changed, 192 insertions, 6 deletions
diff --git a/doc/source/release/1.20.0-notes.rst b/doc/source/release/1.20.0-notes.rst
index 9f46a3e80..e26aa0d40 100644
--- a/doc/source/release/1.20.0-notes.rst
+++ b/doc/source/release/1.20.0-notes.rst
@@ -184,6 +184,43 @@ Use ``next(it)`` instead of ``it.ndincr()``.
(`gh-17233 <https://github.com/numpy/numpy/pull/17233>`__)
+ArrayLike objects which do not define ``__len__`` and ``__getitem__``
+---------------------------------------------------------------------
+Objects which define one of the protocols ``__array__``,
+``__array_interface__``, or ``__array_struct__`` but are not sequences
+(usually defined by having a ``__len__`` and ``__getitem__``) will behave
+differently during array-coercion in the future.
+
+When nested inside sequences, such as ``np.array([array_like])``, these
+were handled as a single Python object rather than an array.
+In the future they will behave identically to::
+
+ np.array([np.array(array_like)])
+
+This change should only have an effect if ``np.array(array_like)`` is not 0-D.
+The solution to this warning may depend on the object:
+
+* Some array-likes may expect the new behaviour, and users can ignore the
+ warning. The object can choose to expose the sequence protocol to opt-in
+ to the new behaviour.
+* For example, ``shapely`` will allow conversion to an array-like using
+ ``line.coords`` rather than ``np.asarray(line)``. Users may work around
+ the warning, or use the new convention when it becomes available.
+
+Unfortunately, using the new behaviour can only be achieved by
+calling ``np.array(array_like)``.
+
+If you wish to ensure that the old behaviour remains unchanged, please create
+an object array and then fill it explicitly, for example::
+
+ arr = np.empty(3, dtype=object)
+ arr[:] = [array_like1, array_like2, array_like3]
+
+This will ensure NumPy knows to not enter the array-like and use it as
+a object instead.
+
+(`gh-17973 <https://github.com/numpy/numpy/pull/17973>`__)
+
Future Changes
==============
@@ -349,9 +386,15 @@ Things will now be more consistent with::
np.array([np.array(array_like1)])
-This could potentially subtly change output for badly defined array-likes.
-We are not aware of any such case where the results were not clearly
-incorrect previously.
+This can subtly change output for some badly defined array-likes.
+One example for this are array-like objects which are not also sequences
+of matching shape.
+In NumPy 1.20, a warning will be given when an array-like is not also a
+sequence (but behaviour remains identical, see deprecations).
+If an array like is also a sequence (defines ``__getitem__`` and ``__len__``)
+NumPy will now only use the result given by ``__array__``,
+``__array_interface__``, or ``__array_struct__``. This will result in
+differences when the (nested) sequence describes a different shape.
(`gh-16200 <https://github.com/numpy/numpy/pull/16200>`__)
diff --git a/numpy/core/src/multiarray/array_coercion.c b/numpy/core/src/multiarray/array_coercion.c
index 603e9d93b..1eac401bc 100644
--- a/numpy/core/src/multiarray/array_coercion.c
+++ b/numpy/core/src/multiarray/array_coercion.c
@@ -922,6 +922,53 @@ PyArray_DiscoverDTypeAndShape_Recursive(
Py_DECREF(arr);
arr = NULL;
}
+ else if (curr_dims > 0 && curr_dims != max_dims) {
+ /*
+ * Deprecated 2020-12-09, NumPy 1.20
+ *
+ * See https://github.com/numpy/numpy/issues/17965
+ * Shapely had objects which are not sequences but did export
+ * the array-interface (and so are arguably array-like).
+ * Previously numpy would not use array-like information during
+ * shape discovery, so that it ended up acting as if this was
+ * an (unknown) scalar but with the specified dtype.
+ * Thus we ignore "scalars" here, as the value stored in the
+ * array should be acceptable.
+ */
+ if (PyArray_NDIM(arr) > 0 && NPY_UNLIKELY(!PySequence_Check(obj))) {
+ if (PyErr_WarnFormat(PyExc_FutureWarning, 1,
+ "The input object of type '%s' is an array-like "
+ "implementing one of the corresponding protocols "
+ "(`__array__`, `__array_interface__` or "
+ "`__array_struct__`); but not a sequence (or 0-D). "
+ "In the future, this object will be coerced as if it "
+ "was first converted using `np.array(obj)`. "
+ "To retain the old behaviour, you have to either "
+ "modify the type '%s', or assign to an empty array "
+ "created with `np.empty(correct_shape, dtype=object)`.",
+ Py_TYPE(obj)->tp_name, Py_TYPE(obj)->tp_name) < 0) {
+ Py_DECREF(arr);
+ return -1;
+ }
+ /*
+ * Strangely enough, even though we threw away the result here,
+ * we did use it during descriptor discovery, so promote it:
+ */
+ if (update_shape(curr_dims, &max_dims, out_shape,
+ 0, NULL, NPY_FALSE, flags) < 0) {
+ *flags |= FOUND_RAGGED_ARRAY;
+ Py_DECREF(arr);
+ return max_dims;
+ }
+ if (!(*flags & DESCRIPTOR_WAS_SET) && handle_promotion(
+ out_descr, PyArray_DESCR(arr), fixed_DType, flags) < 0) {
+ Py_DECREF(arr);
+ return -1;
+ }
+ Py_DECREF(arr);
+ return max_dims;
+ }
+ }
}
if (arr != NULL) {
/*
diff --git a/numpy/core/tests/test_array_coercion.py b/numpy/core/tests/test_array_coercion.py
index b966ee7b0..08b32dfcc 100644
--- a/numpy/core/tests/test_array_coercion.py
+++ b/numpy/core/tests/test_array_coercion.py
@@ -38,8 +38,18 @@ def arraylikes():
yield subclass
+ class _SequenceLike():
+ # We are giving a warning that array-like's were also expected to be
+ # sequence-like in `np.array([array_like])`, this can be removed
+ # when the deprecation exired (started NumPy 1.20)
+ def __len__(self):
+ raise TypeError
+
+ def __getitem__(self):
+ raise TypeError
+
# Array-interface
- class ArrayDunder:
+ class ArrayDunder(_SequenceLike):
def __init__(self, a):
self.a = a
@@ -52,7 +62,7 @@ def arraylikes():
yield param(memoryview, id="memoryview")
# Array-interface
- class ArrayInterface:
+ class ArrayInterface(_SequenceLike):
def __init__(self, a):
self.a = a # need to hold on to keep interface valid
self.__array_interface__ = a.__array_interface__
@@ -60,7 +70,7 @@ def arraylikes():
yield param(ArrayInterface, id="__array_interface__")
# Array-Struct
- class ArrayStruct:
+ class ArrayStruct(_SequenceLike):
def __init__(self, a):
self.a = a # need to hold on to keep struct valid
self.__array_struct__ = a.__array_struct__
diff --git a/numpy/core/tests/test_deprecations.py b/numpy/core/tests/test_deprecations.py
index a67fe62c3..ed238da9f 100644
--- a/numpy/core/tests/test_deprecations.py
+++ b/numpy/core/tests/test_deprecations.py
@@ -773,6 +773,92 @@ class TestDeprecateSubarrayDTypeDuringArrayCoercion(_DeprecationTestCase):
self.assert_deprecated(check)
+class TestFutureWarningArrayLikeNotIterable(_DeprecationTestCase):
+ # Deprecated 2020-12-09, NumPy 1.20
+ warning_cls = FutureWarning
+ message = "The input object of type.*but not a sequence"
+
+ @pytest.mark.parametrize("protocol",
+ ["__array__", "__array_interface__", "__array_struct__"])
+ def test_deprecated(self, protocol):
+ """Test that these objects give a warning since they are not 0-D,
+ not coerced at the top level `np.array(obj)`, but nested, and do
+ *not* define the sequence protocol.
+
+ NOTE: Tests for the versions including __len__ and __getitem__ exist
+ in `test_array_coercion.py` and they can be modified or ammended
+ when this deprecation expired.
+ """
+ blueprint = np.arange(10)
+ MyArr = type("MyArr", (), {protocol: getattr(blueprint, protocol)})
+ self.assert_deprecated(lambda: np.array([MyArr()], dtype=object))
+
+ @pytest.mark.parametrize("protocol",
+ ["__array__", "__array_interface__", "__array_struct__"])
+ def test_0d_not_deprecated(self, protocol):
+ # 0-D always worked (albeit it would use __float__ or similar for the
+ # conversion, which may not happen anymore)
+ blueprint = np.array(1.)
+ MyArr = type("MyArr", (), {protocol: getattr(blueprint, protocol)})
+ myarr = MyArr()
+
+ self.assert_not_deprecated(lambda: np.array([myarr], dtype=object))
+ res = np.array([myarr], dtype=object)
+ expected = np.empty(1, dtype=object)
+ expected[0] = myarr
+ assert_array_equal(res, expected)
+
+ @pytest.mark.parametrize("protocol",
+ ["__array__", "__array_interface__", "__array_struct__"])
+ def test_unnested_not_deprecated(self, protocol):
+ blueprint = np.arange(10)
+ MyArr = type("MyArr", (), {protocol: getattr(blueprint, protocol)})
+ myarr = MyArr()
+
+ self.assert_not_deprecated(lambda: np.array(myarr))
+ res = np.array(myarr)
+ assert_array_equal(res, blueprint)
+
+ @pytest.mark.parametrize("protocol",
+ ["__array__", "__array_interface__", "__array_struct__"])
+ def test_strange_dtype_handling(self, protocol):
+ """The old code would actually use the dtype from the array, but
+ then end up not using the array (for dimension discovery)
+ """
+ blueprint = np.arange(10).astype("f4")
+ MyArr = type("MyArr", (), {protocol: getattr(blueprint, protocol),
+ "__float__": lambda _: 0.5})
+ myarr = MyArr()
+
+ # Make sure we warn (and capture the FutureWarning)
+ with pytest.warns(FutureWarning, match=self.message):
+ res = np.array([[myarr]])
+
+ assert res.shape == (1, 1)
+ assert res.dtype == "f4"
+ assert res[0, 0] == 0.5
+
+ @pytest.mark.parametrize("protocol",
+ ["__array__", "__array_interface__", "__array_struct__"])
+ def test_assignment_not_deprecated(self, protocol):
+ # If the result is dtype=object we do not unpack a nested array or
+ # array-like, if it is nested at exactly the right depth.
+ # NOTE: We actually do still call __array__, etc. but ignore the result
+ # in the end. For `dtype=object` we could optimize that away.
+ blueprint = np.arange(10).astype("f4")
+ MyArr = type("MyArr", (), {protocol: getattr(blueprint, protocol),
+ "__float__": lambda _: 0.5})
+ myarr = MyArr()
+
+ res = np.empty(3, dtype=object)
+ def set():
+ res[:] = [myarr, myarr, myarr]
+ self.assert_not_deprecated(set)
+ assert res[0] is myarr
+ assert res[1] is myarr
+ assert res[2] is myarr
+
+
class TestDeprecatedUnpickleObjectScalar(_DeprecationTestCase):
# Deprecated 2020-11-24, NumPy 1.20
"""