diff options
author | Charles Harris <charlesr.harris@gmail.com> | 2020-12-18 12:09:25 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-12-18 12:09:25 -0700 |
commit | bac54ecb0b815eea2e8116d21aa96e1387793468 (patch) | |
tree | 37d12e172e51319864bbb6513dd0f7ffc043d46a | |
parent | 5b63f260933672b7182daf4fb15ffcd15bae68bf (diff) | |
parent | 8caabdf36c63098bc5743306df55e2c45b5808e3 (diff) | |
download | numpy-bac54ecb0b815eea2e8116d21aa96e1387793468.tar.gz |
Merge pull request #17973 from seberg/require-sequence-array-coercion
DEP: Futurewarn on requiring __len__ on array-likes
-rw-r--r-- | doc/source/release/1.20.0-notes.rst | 49 | ||||
-rw-r--r-- | numpy/core/src/multiarray/array_coercion.c | 47 | ||||
-rw-r--r-- | numpy/core/tests/test_array_coercion.py | 16 | ||||
-rw-r--r-- | numpy/core/tests/test_deprecations.py | 86 |
4 files changed, 192 insertions, 6 deletions
diff --git a/doc/source/release/1.20.0-notes.rst b/doc/source/release/1.20.0-notes.rst index 9f46a3e80..e26aa0d40 100644 --- a/doc/source/release/1.20.0-notes.rst +++ b/doc/source/release/1.20.0-notes.rst @@ -184,6 +184,43 @@ Use ``next(it)`` instead of ``it.ndincr()``. (`gh-17233 <https://github.com/numpy/numpy/pull/17233>`__) +ArrayLike objects which do not define ``__len__`` and ``__getitem__`` +--------------------------------------------------------------------- +Objects which define one of the protocols ``__array__``, +``__array_interface__``, or ``__array_struct__`` but are not sequences +(usually defined by having a ``__len__`` and ``__getitem__``) will behave +differently during array-coercion in the future. + +When nested inside sequences, such as ``np.array([array_like])``, these +were handled as a single Python object rather than an array. +In the future they will behave identically to:: + + np.array([np.array(array_like)]) + +This change should only have an effect if ``np.array(array_like)`` is not 0-D. +The solution to this warning may depend on the object: + +* Some array-likes may expect the new behaviour, and users can ignore the + warning. The object can choose to expose the sequence protocol to opt-in + to the new behaviour. +* For example, ``shapely`` will allow conversion to an array-like using + ``line.coords`` rather than ``np.asarray(line)``. Users may work around + the warning, or use the new convention when it becomes available. + +Unfortunately, using the new behaviour can only be achieved by +calling ``np.array(array_like)``. + +If you wish to ensure that the old behaviour remains unchanged, please create +an object array and then fill it explicitly, for example:: + + arr = np.empty(3, dtype=object) + arr[:] = [array_like1, array_like2, array_like3] + +This will ensure NumPy knows to not enter the array-like and use it as +a object instead. + +(`gh-17973 <https://github.com/numpy/numpy/pull/17973>`__) + Future Changes ============== @@ -349,9 +386,15 @@ Things will now be more consistent with:: np.array([np.array(array_like1)]) -This could potentially subtly change output for badly defined array-likes. -We are not aware of any such case where the results were not clearly -incorrect previously. +This can subtly change output for some badly defined array-likes. +One example for this are array-like objects which are not also sequences +of matching shape. +In NumPy 1.20, a warning will be given when an array-like is not also a +sequence (but behaviour remains identical, see deprecations). +If an array like is also a sequence (defines ``__getitem__`` and ``__len__``) +NumPy will now only use the result given by ``__array__``, +``__array_interface__``, or ``__array_struct__``. This will result in +differences when the (nested) sequence describes a different shape. (`gh-16200 <https://github.com/numpy/numpy/pull/16200>`__) diff --git a/numpy/core/src/multiarray/array_coercion.c b/numpy/core/src/multiarray/array_coercion.c index 603e9d93b..1eac401bc 100644 --- a/numpy/core/src/multiarray/array_coercion.c +++ b/numpy/core/src/multiarray/array_coercion.c @@ -922,6 +922,53 @@ PyArray_DiscoverDTypeAndShape_Recursive( Py_DECREF(arr); arr = NULL; } + else if (curr_dims > 0 && curr_dims != max_dims) { + /* + * Deprecated 2020-12-09, NumPy 1.20 + * + * See https://github.com/numpy/numpy/issues/17965 + * Shapely had objects which are not sequences but did export + * the array-interface (and so are arguably array-like). + * Previously numpy would not use array-like information during + * shape discovery, so that it ended up acting as if this was + * an (unknown) scalar but with the specified dtype. + * Thus we ignore "scalars" here, as the value stored in the + * array should be acceptable. + */ + if (PyArray_NDIM(arr) > 0 && NPY_UNLIKELY(!PySequence_Check(obj))) { + if (PyErr_WarnFormat(PyExc_FutureWarning, 1, + "The input object of type '%s' is an array-like " + "implementing one of the corresponding protocols " + "(`__array__`, `__array_interface__` or " + "`__array_struct__`); but not a sequence (or 0-D). " + "In the future, this object will be coerced as if it " + "was first converted using `np.array(obj)`. " + "To retain the old behaviour, you have to either " + "modify the type '%s', or assign to an empty array " + "created with `np.empty(correct_shape, dtype=object)`.", + Py_TYPE(obj)->tp_name, Py_TYPE(obj)->tp_name) < 0) { + Py_DECREF(arr); + return -1; + } + /* + * Strangely enough, even though we threw away the result here, + * we did use it during descriptor discovery, so promote it: + */ + if (update_shape(curr_dims, &max_dims, out_shape, + 0, NULL, NPY_FALSE, flags) < 0) { + *flags |= FOUND_RAGGED_ARRAY; + Py_DECREF(arr); + return max_dims; + } + if (!(*flags & DESCRIPTOR_WAS_SET) && handle_promotion( + out_descr, PyArray_DESCR(arr), fixed_DType, flags) < 0) { + Py_DECREF(arr); + return -1; + } + Py_DECREF(arr); + return max_dims; + } + } } if (arr != NULL) { /* diff --git a/numpy/core/tests/test_array_coercion.py b/numpy/core/tests/test_array_coercion.py index b966ee7b0..08b32dfcc 100644 --- a/numpy/core/tests/test_array_coercion.py +++ b/numpy/core/tests/test_array_coercion.py @@ -38,8 +38,18 @@ def arraylikes(): yield subclass + class _SequenceLike(): + # We are giving a warning that array-like's were also expected to be + # sequence-like in `np.array([array_like])`, this can be removed + # when the deprecation exired (started NumPy 1.20) + def __len__(self): + raise TypeError + + def __getitem__(self): + raise TypeError + # Array-interface - class ArrayDunder: + class ArrayDunder(_SequenceLike): def __init__(self, a): self.a = a @@ -52,7 +62,7 @@ def arraylikes(): yield param(memoryview, id="memoryview") # Array-interface - class ArrayInterface: + class ArrayInterface(_SequenceLike): def __init__(self, a): self.a = a # need to hold on to keep interface valid self.__array_interface__ = a.__array_interface__ @@ -60,7 +70,7 @@ def arraylikes(): yield param(ArrayInterface, id="__array_interface__") # Array-Struct - class ArrayStruct: + class ArrayStruct(_SequenceLike): def __init__(self, a): self.a = a # need to hold on to keep struct valid self.__array_struct__ = a.__array_struct__ diff --git a/numpy/core/tests/test_deprecations.py b/numpy/core/tests/test_deprecations.py index a67fe62c3..ed238da9f 100644 --- a/numpy/core/tests/test_deprecations.py +++ b/numpy/core/tests/test_deprecations.py @@ -773,6 +773,92 @@ class TestDeprecateSubarrayDTypeDuringArrayCoercion(_DeprecationTestCase): self.assert_deprecated(check) +class TestFutureWarningArrayLikeNotIterable(_DeprecationTestCase): + # Deprecated 2020-12-09, NumPy 1.20 + warning_cls = FutureWarning + message = "The input object of type.*but not a sequence" + + @pytest.mark.parametrize("protocol", + ["__array__", "__array_interface__", "__array_struct__"]) + def test_deprecated(self, protocol): + """Test that these objects give a warning since they are not 0-D, + not coerced at the top level `np.array(obj)`, but nested, and do + *not* define the sequence protocol. + + NOTE: Tests for the versions including __len__ and __getitem__ exist + in `test_array_coercion.py` and they can be modified or ammended + when this deprecation expired. + """ + blueprint = np.arange(10) + MyArr = type("MyArr", (), {protocol: getattr(blueprint, protocol)}) + self.assert_deprecated(lambda: np.array([MyArr()], dtype=object)) + + @pytest.mark.parametrize("protocol", + ["__array__", "__array_interface__", "__array_struct__"]) + def test_0d_not_deprecated(self, protocol): + # 0-D always worked (albeit it would use __float__ or similar for the + # conversion, which may not happen anymore) + blueprint = np.array(1.) + MyArr = type("MyArr", (), {protocol: getattr(blueprint, protocol)}) + myarr = MyArr() + + self.assert_not_deprecated(lambda: np.array([myarr], dtype=object)) + res = np.array([myarr], dtype=object) + expected = np.empty(1, dtype=object) + expected[0] = myarr + assert_array_equal(res, expected) + + @pytest.mark.parametrize("protocol", + ["__array__", "__array_interface__", "__array_struct__"]) + def test_unnested_not_deprecated(self, protocol): + blueprint = np.arange(10) + MyArr = type("MyArr", (), {protocol: getattr(blueprint, protocol)}) + myarr = MyArr() + + self.assert_not_deprecated(lambda: np.array(myarr)) + res = np.array(myarr) + assert_array_equal(res, blueprint) + + @pytest.mark.parametrize("protocol", + ["__array__", "__array_interface__", "__array_struct__"]) + def test_strange_dtype_handling(self, protocol): + """The old code would actually use the dtype from the array, but + then end up not using the array (for dimension discovery) + """ + blueprint = np.arange(10).astype("f4") + MyArr = type("MyArr", (), {protocol: getattr(blueprint, protocol), + "__float__": lambda _: 0.5}) + myarr = MyArr() + + # Make sure we warn (and capture the FutureWarning) + with pytest.warns(FutureWarning, match=self.message): + res = np.array([[myarr]]) + + assert res.shape == (1, 1) + assert res.dtype == "f4" + assert res[0, 0] == 0.5 + + @pytest.mark.parametrize("protocol", + ["__array__", "__array_interface__", "__array_struct__"]) + def test_assignment_not_deprecated(self, protocol): + # If the result is dtype=object we do not unpack a nested array or + # array-like, if it is nested at exactly the right depth. + # NOTE: We actually do still call __array__, etc. but ignore the result + # in the end. For `dtype=object` we could optimize that away. + blueprint = np.arange(10).astype("f4") + MyArr = type("MyArr", (), {protocol: getattr(blueprint, protocol), + "__float__": lambda _: 0.5}) + myarr = MyArr() + + res = np.empty(3, dtype=object) + def set(): + res[:] = [myarr, myarr, myarr] + self.assert_not_deprecated(set) + assert res[0] is myarr + assert res[1] is myarr + assert res[2] is myarr + + class TestDeprecatedUnpickleObjectScalar(_DeprecationTestCase): # Deprecated 2020-11-24, NumPy 1.20 """ |