summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/neps/nep-0021-advanced-indexing.rst537
-rw-r--r--doc/release/1.16.0-notes.rst10
-rw-r--r--numpy/core/numerictypes.py50
-rw-r--r--numpy/core/src/multiarray/arrayobject.c183
-rw-r--r--numpy/core/src/umath/ufunc_object.c180
-rw-r--r--numpy/core/tests/test_deprecations.py8
-rw-r--r--numpy/core/tests/test_numerictypes.py6
-rw-r--r--numpy/core/tests/test_ufunc.py21
8 files changed, 746 insertions, 249 deletions
diff --git a/doc/neps/nep-0021-advanced-indexing.rst b/doc/neps/nep-0021-advanced-indexing.rst
new file mode 100644
index 000000000..92d40d2eb
--- /dev/null
+++ b/doc/neps/nep-0021-advanced-indexing.rst
@@ -0,0 +1,537 @@
+==========================================================
+Implementing intuitive and full featured advanced indexing
+==========================================================
+
+:Author: Sebastian Berg
+:Status: Draft
+:Type: Standards Track
+:Created: 2015-08-27
+
+
+Abstract
+--------
+
+Advanced indexing with multiple array indices is typically confusing to
+both new, and in many cases even old, users of NumPy. To avoid this problem
+and allow for more and clearer features, we propose to:
+
+1. Introduce ``arr.oindex[indices]`` which allows advanced indices, but
+ uses outer indexing logic.
+2. Introduce ``arr.vindex[indices]`` which use the current
+ "vectorized"/broadcasted logic but with two differences from
+ fancy indexing:
+
+ 1. Boolean indices always use the outer indexing logic.
+ (Multi dimensional booleans should be allowed).
+ 2. The integer index result dimensions are always the first axes
+ of the result array. No transpose is done, even for a single
+ integer array index.
+
+3. Plain indexing on the array will only give warnings and eventually
+ errors either:
+
+ * when there is ambiguity between legacy fancy and outer indexing
+ (note that ``arr[[1, 2], :, 0]`` is such a case, an integer
+ can be the "second" integer index array),
+ * when any integer index array is present (possibly additional for
+ more then one boolean index array).
+
+These constraints are sufficient for making indexing generally consistent
+with expectations and providing a less surprising learning curve with
+``oindex``.
+
+Note that all things mentioned here apply both for assignment as well as
+subscription.
+
+Understanding these details is *not* easy. The `Examples` section in the
+discussion gives code examples.
+And the hopefully easier `Motivational Example` provides some
+motivational use-cases for the general ideas and is likely a good start for
+anyone not intimately familiar with advanced indexing.
+
+
+Detailed Description
+--------------------
+
+Old style advanced indexing with multiple array (boolean or integer) indices,
+also called "fancy indexing", tends to be very confusing for new users.
+While fancy (or legacy) indexing is useful in many cases one would naively
+assume that the result of multiple 1-d ranges is analogous to multiple
+slices along each dimension (also called "outer indexing").
+
+However, legacy fancy indexing with multiple arrays broadcasts these arrays
+into a single index over multiple dimensions. There are three main points
+of confusion when multiple array indices are involved:
+
+1. Most new users will usually expect outer indexing (consistent with
+ slicing). This is also the most common way of handling this in other
+ packages or languages.
+2. The axes introduced by the array indices are at the front, unless
+ all array indices are consecutive, in which case one can deduce where
+ the user "expects" them to be:
+
+ * `arr[:, [0, 1], :, [0, 1]]` will have the first dimension shaped 2.
+ * `arr[:, [0, 1], [0, 1]]` will have the second dimension shaped 2.
+
+3. When a boolean array index is mixed with another boolean or integer
+ array, the result is very hard to understand (the boolean array is
+ converted to integer array indices and then broadcast), and hardly
+ useful.
+ There is no well defined broadcast for booleans, so that boolean
+ indices are logically always "outer" type indices.
+
+
+Furthermore, since other packages use outer type logic, enabling its
+use in NumPy, will allow simpler cross package code.
+
+
+Proposed rules
+~~~~~~~~~~~~~~
+
+From the three problems noted above some expectations for NumPy can
+be deduced:
+
+1. There should be a prominent outer/orthogonal indexing method such as
+ ``arr.oindex[indices]``.
+
+2. Considering how confusing vectorized/fancy indexing can be, it should
+ be possible to be made more explicitly (e.g. ``arr.vindex[indices]``).
+
+3. A new ``arr.vindex[indices]`` method, would not be tied to the
+ confusing transpose rules of fancy indexing, which is for example
+ needed for the simple case of a single advanced index. Thus,
+ no transposing should be done. The axes created by the integer array
+ indices are always inserted at the front, even for a single index.
+
+4. Boolean indexing is conceptionally outer indexing. A broadcasting
+ together with other advanced indices in the manner of legacy
+ "fancy indexing" is generally not helpful or well defined.
+ A user who wishes the "``nonzero``" plus broadcast behaviour can thus
+ be expected to do this manually. Thus, ``vindex`` would still use
+ outer type indexing for boolean index arrays.
+ Note that using this rule a single boolean index can still index into
+ multiple dimensions at once.
+
+5. An ``arr.lindex`` or ``arr.findex`` should likely be implemented to allow
+ legacy fancy indexing indefinetly. This also gives a simple way to
+ update fancy indexing code making deprecations to plain indexing
+ easier.
+
+6. Plain indexing ``arr[...]`` should return an error for ambiguous cases.
+ For the beginning, this probably means cases where ``arr[ind]`` and
+ ``arr.oindex[ind]`` return different results give deprecation warnings.
+ Due to the transposing behaviour, this means that``arr[0, :, index_arr]``
+ will be deprecated, but ``arr[:, 0, index_arr]`` will not for the time
+ being.
+
+Unlike plain indexing, the new indexing attributes are explicitly aimed
+at higher dimensional indexing, two more changes should be implemented:
+
+* The indexing attributes will enforce exact dimension and indexing match.
+ This means that no implicit ellipsis (``...``) will be added. Unless
+ an ellipsis is present the indexing expression will thus only work for
+ an array with a specific number of dimensions.
+ This makes the expression more explicit and safeguards against wrong
+ dimensionality of arrays.
+
+* The current plain indexing allows for the use of non-tuples for
+ multi-dimensional indexing such as ``arr[[slice(None), 2]]``.
+ This creates some inconsistencies and thus the indexing attributes
+ should only allow plain python tuples for this purpose.
+ (Whether or not this should be the case for plain indexing is a
+ different issue.)
+
+* The new attributes should not do the getitem to implement setitem
+ branch, since it is a cludge and not useful for vectorized
+ indexing. (not implemented yet)
+
+
+Open Questions
+~~~~~~~~~~~~~~
+
+* The names ``oindex`` and ``vindex`` are just suggestions at the time of
+ writing this, another name NumPy has used for something like ``oindex``
+ is ``np.ix_``. See also below.
+
+* It would be possible to limit the use of boolean indices in ``vindex``,
+ assuming that they are rare and to some degree special. Alternatively,
+ boolean indices could be broadcasted as in legacy fancy indexing in
+ principle.
+
+* ``oindex`` and ``vindex`` could always return copies, even when no array
+ operation occurs. One argument for allowing a view return is that this way
+ ``oindex`` can be used as a general index replacement.
+ However, there is one argument for returning copies. It is possible for
+ ``arr.vindex[array_scalar, ...]``, where ``array_scalar`` should be
+ a 0-D array but is not, since 0-D arrays tend to be converted.
+ Copying always "fixes" this possible inconsistency.
+
+* The final state to morph plain indexing in is not fixed in this PEP.
+ It is for example possible that `arr[index]`` will be equivalent to
+ ``arr.oindex`` at some point in the future.
+ Since such a change will take years, it seems unnecessary to make
+ specific decisions at this time.
+
+* The proposed changes to plain indexing could be postponed indefinitely or
+ not taken in order to not break or force major fixes to existing code bases.
+
+* Should there be/is it possible to have a mechanism to not allow the new
+ indexing attributes for subclasses unless specifically implemented?
+
+* Should we try to warn users about the special indexing attributes
+ not being implemented? If a subclass has its own indexing, inheriting
+ it from ndarray should be wrong.
+
+
+
+Alternative Names
+~~~~~~~~~~~~~~~~~
+
+Possible names suggested (more suggestions will be added).
+
+============== ======== ======= ============
+**Orthogonal** oindex oix
+**Vectorized** vindex vix
+**Legacy** l/findex legacy_index
+============== ======== ======= ============
+
+
+Subclasses
+~~~~~~~~~~
+
+Subclasses are a bit problematic in the light of these changes. There are
+some possible solutions for this. For most subclasses (those which do not
+provide ``__getitem__`` or ``__setitem__``) the special attributes should
+just work. Subclasses that *do* provide it must be updated accordingly
+and should preferably not subclass working versions of these attributes.
+
+All subclasses will inherit the attributes, however, it seems possible
+to test ``subclass.__getitem__.__classobj__`` when getting i.e.
+``subclass.vindex``. If this is not ``ndarray``, the subclass has special
+handling and an ``AttributeError`` can be given.
+
+A further question is how to facilitate implementing the special attributes.
+Also there is the weird functionality where ``__setitem__`` calls
+``__getitem__`` for non-advanced indices. It might be good to avoid it for
+the new attributes, but on the other hand, that may make it even more
+confusing.
+
+To facilitate implementations we could provide functions similar to
+``operator.itemgetter`` and ``operator.setitem`` for the attributes.
+Possibly a MixIn could be provided to help implementation, but further
+ideas on this are necessary.
+
+Related Operation
+~~~~~~~~~~~~~~~~~
+
+There exist a further indexing or indexing like method. That is the
+inverse of a command such as ``np.argmin(arr, axis=axis)``, to pick
+the specific elements *along* an axis given an array of (at least
+typically) the same size.
+
+These function are added to NumPy in versoin 15 as `take_along_axis` and
+`put_along_axis`.
+
+
+Implementation
+--------------
+
+Implementation of a special indexing object available through
+``arr.oindex``, ``arr.vindex``, and ``arr.lindex`` to allow these indexing
+operations. Also starting to deprecate those plain index operations
+which are not ambiguous.
+Furthermore, the NumPy code base will need to use the new attributes and
+tests will have to be adapted.
+
+
+Backward compatibility
+----------------------
+
+As a new feature, no backward compatibility issues would arise.
+Some forward compatibility issues with subclasses that do not specifically
+implement the new methods may arise.
+
+
+Alternatives
+------------
+
+NumPy may not choose to offer these different type of indexing methods, or
+choose to only offer them through specific function instead of the proposed
+notation above.
+For variations see also the open questions section above.
+
+
+Discussion
+----------
+
+Some discussion can be found on the pull request:
+
+ * https://github.com/numpy/numpy/pull/6256
+
+A python implementation of the indexing operations can be found at:
+
+ * https://gist.github.com/shoyer/c700193625347eb68fee4d1f0dc8c0c8
+
+
+Examples
+~~~~~~~~
+
+Since the various kinds of indexing is hard to grasp in many cases, these
+examples hopefully give some more insights. Note that they are all in terms
+of shape.
+In the examples, all original dimensions have 5 or more elements,
+advanced indexing inserts smaller dimensions.
+These examples may be hard to grasp without working knowledge of advanced
+indexing as of NumPy 1.9.
+
+Example array::
+
+ >>> arr = np.ones((5, 6, 7, 8))
+
+
+Legacy fancy indexing
+---------------------
+
+Note that the same result can be achieved with ``arr.lindex``, but the
+"future error" will still work in this case.
+
+Single index is transposed (this is the same for all indexing types)::
+
+ >>> arr[[0], ...].shape
+ (1, 6, 7, 8)
+ >>> arr[:, [0], ...].shape
+ (5, 1, 7, 8)
+
+
+Multiple indices are transposed *if* consecutive::
+
+ >>> arr[:, [0], [0], :].shape # future error
+ (5, 1, 8)
+ >>> arr[:, [0], :, [0]].shape # future error
+ (1, 5, 7)
+
+
+It is important to note that a scalar *is* integer array index in this sense
+(and gets broadcasted with the other advanced index)::
+
+ >>> arr[:, [0], 0, :].shape
+ (5, 1, 8)
+ >>> arr[:, [0], :, 0].shape # future error (scalar is "fancy")
+ (1, 5, 7)
+
+
+Single boolean index can act on multiple dimensions (especially the whole
+array). It has to match (as of 1.10. a deprecation warning) the dimensions.
+The boolean index is otherwise identical to (multiple consecutive) integer
+array indices::
+
+ >>> # Create boolean index with one True value for the last two dimensions:
+ >>> bindx = np.zeros((7, 8), dtype=np.bool_)
+ >>> bindx[0, 0] = True
+ >>> arr[:, 0, bindx].shape
+ (5, 1)
+ >>> arr[0, :, bindx].shape
+ (1, 6)
+
+
+The combination with anything that is not a scalar is confusing, e.g.::
+
+ >>> arr[[0], :, bindx].shape # bindx result broadcasts with [0]
+ (1, 6)
+ >>> arr[:, [0, 1], bindx].shape # IndexError
+
+
+Outer indexing
+--------------
+
+Multiple indices are "orthogonal" and their result axes are inserted
+at the same place (they are not broadcasted)::
+
+ >>> arr.oindex[:, [0], [0, 1], :].shape
+ (5, 1, 2, 8)
+ >>> arr.oindex[:, [0], :, [0, 1]].shape
+ (5, 1, 7, 2)
+ >>> arr.oindex[:, [0], 0, :].shape
+ (5, 1, 8)
+ >>> arr.oindex[:, [0], :, 0].shape
+ (5, 1, 7)
+
+
+Boolean indices results are always inserted where the index is::
+
+ >>> # Create boolean index with one True value for the last two dimensions:
+ >>> bindx = np.zeros((7, 8), dtype=np.bool_)
+ >>> bindx[0, 0] = True
+ >>> arr.oindex[:, 0, bindx].shape
+ (5, 1)
+ >>> arr.oindex[0, :, bindx].shape
+ (6, 1)
+
+
+Nothing changed in the presence of other advanced indices since::
+
+ >>> arr.oindex[[0], :, bindx].shape
+ (1, 6, 1)
+ >>> arr.oindex[:, [0, 1], bindx].shape
+ (5, 2, 1)
+
+
+Vectorized/inner indexing
+-------------------------
+
+Multiple indices are broadcasted and iterated as one like fancy indexing,
+but the new axes area always inserted at the front::
+
+ >>> arr.vindex[:, [0], [0, 1], :].shape
+ (2, 5, 8)
+ >>> arr.vindex[:, [0], :, [0, 1]].shape
+ (2, 5, 7)
+ >>> arr.vindex[:, [0], 0, :].shape
+ (1, 5, 8)
+ >>> arr.vindex[:, [0], :, 0].shape
+ (1, 5, 7)
+
+
+Boolean indices results are always inserted where the index is, exactly
+as in ``oindex`` given how specific they are to the axes they operate on::
+
+ >>> # Create boolean index with one True value for the last two dimensions:
+ >>> bindx = np.zeros((7, 8), dtype=np.bool_)
+ >>> bindx[0, 0] = True
+ >>> arr.vindex[:, 0, bindx].shape
+ (5, 1)
+ >>> arr.vindex[0, :, bindx].shape
+ (6, 1)
+
+
+But other advanced indices are again transposed to the front::
+
+ >>> arr.vindex[[0], :, bindx].shape
+ (1, 6, 1)
+ >>> arr.vindex[:, [0, 1], bindx].shape
+ (2, 5, 1)
+
+
+Motivational Example
+~~~~~~~~~~~~~~~~~~~~
+
+Imagine having a data acquisition software storing ``D`` channels and
+``N`` datapoints along the time. She stores this into an ``(N, D)`` shaped
+array. During data analysis, we needs to fetch a pool of channels, for example
+to calculate a mean over them.
+
+This data can be faked using::
+
+ >>> arr = np.random.random((100, 10))
+
+Now one may remember indexing with an integer array and find the correct code::
+
+ >>> group = arr[:, [2, 5]]
+ >>> mean_value = arr.mean()
+
+However, assume that there were some specific time points (first dimension
+of the data) that need to be specially considered. These time points are
+already known and given by::
+
+ >>> interesting_times = np.array([1, 5, 8, 10], dtype=np.intp)
+
+Now to fetch them, we may try to modify the previous code::
+
+ >>> group_at_it = arr[interesting_times, [2, 5]]
+ IndexError: Ambiguous index, use `.oindex` or `.vindex`
+
+An error such as this will point to read up the indexing documentation.
+This should make it clear, that ``oindex`` behaves more like slicing.
+So, out of the different methods it is the obvious choice
+(for now, this is a shape mismatch, but that could possibly also mention
+``oindex``)::
+
+ >>> group_at_it = arr.oindex[interesting_times, [2, 5]]
+
+Now of course one could also have used ``vindex``, but it is much less
+obvious how to achieve the right thing!::
+
+ >>> reshaped_times = interesting_times[:, np.newaxis]
+ >>> group_at_it = arr.vindex[reshaped_times, [2, 5]]
+
+
+One may find, that for example our data is corrupt in some places.
+So, we need to replace these values by zero (or anything else) for these
+times. The first column may for example give the necessary information,
+so that changing the values becomes easy remembering boolean indexing::
+
+ >>> bad_data = arr[:, 0] > 0.5
+ >>> arr[bad_data, :] = 0 # (corrupts further examples)
+
+Again, however, the columns may need to be handled more individually (but in
+groups), and the ``oindex`` attribute works well::
+
+ >>> arr.oindex[bad_data, [2, 5]] = 0
+
+Note that it would be very hard to do this using legacy fancy indexing.
+The only way would be to create an integer array first::
+
+ >>> bad_data_indx = np.nonzero(bad_data)[0]
+ >>> bad_data_indx_reshaped = bad_data_indx[:, np.newaxis]
+ >>> arr[bad_data_indx_reshaped, [2, 5]]
+
+In any case we can use only ``oindex`` to do all of this without getting
+into any trouble or confused by the whole complexity of advanced indexing.
+
+But, some new features are added to the data acquisition. Different sensors
+have to be used depending on the times. Let us assume we already have
+created an array of indices::
+
+ >>> correct_sensors = np.random.randint(10, size=(100, 2))
+
+Which lists for each time the two correct sensors in an ``(N, 2)`` array.
+
+A first try to achieve this may be ``arr[:, correct_sensors]`` and this does
+not work. It should be clear quickly that slicing cannot achieve the desired
+thing. But hopefully users will remember that there is ``vindex`` as a more
+powerful and flexible approach to advanced indexing.
+One may, if trying ``vindex`` randomly, be confused about::
+
+ >>> new_arr = arr.vindex[:, correct_sensors]
+
+which is neither the same, nor the correct result (see transposing rules)!
+This is because slicing works still the same in ``vindex``. However, reading
+the documentation and examples, one can hopefully quickly find the desired
+solution::
+
+ >>> rows = np.arange(len(arr))
+ >>> rows = rows[:, np.newaxis] # make shape fit with correct_sensors
+ >>> new_arr = arr.vindex[rows, correct_sensors]
+
+At this point we have left the straight forward world of ``oindex`` but can
+do random picking of any element from the array. Note that in the last example
+a method such as mentioned in the ``Related Questions`` section could be more
+straight forward. But this approach is even more flexible, since ``rows``
+does not have to be a simple ``arange``, but could be ``intersting_times``::
+
+ >>> interesting_times = np.array([0, 4, 8, 9, 10])
+ >>> correct_sensors_at_it = correct_sensors[interesting_times, :]
+ >>> interesting_times_reshaped = interesting_times[:, np.newaxis]
+ >>> new_arr_it = arr[interesting_times_reshaped, correct_sensors_at_it]
+
+Truly complex situation would arise now if you would for example pool ``L``
+experiments into an array shaped ``(L, N, D)``. But for ``oindex`` this should
+not result into surprises. ``vindex``, being more powerful, will quite
+certainly create some confusion in this case but also cover pretty much all
+eventualities.
+
+
+Copyright
+---------
+
+This document is placed under the CC0 1.0 Universell (CC0 1.0) Public Domain Dedication [1]_.
+
+
+References and Footnotes
+------------------------
+
+.. [1] To the extent possible under law, the person who associated CC0
+ with this work has waived all copyright and related or neighboring
+ rights to this work. The CC0 license may be found at
+ https://creativecommons.org/publicdomain/zero/1.0/
+
diff --git a/doc/release/1.16.0-notes.rst b/doc/release/1.16.0-notes.rst
index 701ef1f6e..d9208e80b 100644
--- a/doc/release/1.16.0-notes.rst
+++ b/doc/release/1.16.0-notes.rst
@@ -37,3 +37,13 @@ Improvements
Changes
=======
+
+Comparison ufuncs will now error rather than return NotImplemented
+------------------------------------------------------------------
+
+Previously, comparison ufuncs such as ``np.equal`` would return
+`NotImplemented` if their arguments had structured dtypes, to help comparison
+operators such as ``__eq__`` deal with those. This is no longer needed, as the
+relevant logic has moved to the comparison operators proper (which thus do
+continue to return `NotImplemented` as needed). Hence, like all other ufuncs,
+the comparison ufuncs will now error on structured dtypes.
diff --git a/numpy/core/numerictypes.py b/numpy/core/numerictypes.py
index 7cd80f432..727fb66d1 100644
--- a/numpy/core/numerictypes.py
+++ b/numpy/core/numerictypes.py
@@ -323,31 +323,37 @@ def _add_aliases():
# insert bit-width version for this class (if relevant)
base, bit, char = bitname(info.type)
- if base != '':
- myname = "%s%d" % (base, bit)
- if (name not in ('longdouble', 'clongdouble') or
- myname not in allTypes):
- base_capitalize = english_capitalize(base)
- if base == 'complex':
- na_name = '%s%d' % (base_capitalize, bit//2)
- elif base == 'bool':
- na_name = base_capitalize
- else:
- na_name = "%s%d" % (base_capitalize, bit)
- allTypes[myname] = info.type
+ assert base != ''
+ myname = "%s%d" % (base, bit)
+
+ # ensure that (c)longdouble does not overwrite the aliases assigned to
+ # (c)double
+ if name in ('longdouble', 'clongdouble') and myname in allTypes:
+ continue
+
+ base_capitalize = english_capitalize(base)
+ if base == 'complex':
+ na_name = '%s%d' % (base_capitalize, bit//2)
+ elif base == 'bool':
+ na_name = base_capitalize
+ else:
+ na_name = "%s%d" % (base_capitalize, bit)
+
+ allTypes[myname] = info.type
+
+ # add mapping for both the bit name and the numarray name
+ sctypeDict[myname] = info.type
+ sctypeDict[na_name] = info.type
- # add mapping for both the bit name and the numarray name
- sctypeDict[myname] = info.type
- sctypeDict[na_name] = info.type
+ # add forward, reverse, and string mapping to numarray
+ sctypeNA[na_name] = info.type
+ sctypeNA[info.type] = na_name
+ sctypeNA[info.char] = na_name
- # add forward, reverse, and string mapping to numarray
- sctypeNA[na_name] = info.type
- sctypeNA[info.type] = na_name
- sctypeNA[info.char] = na_name
- if char != '':
- sctypeDict[char] = info.type
- sctypeNA[char] = na_name
+ assert char != ''
+ sctypeDict[char] = info.type
+ sctypeNA[char] = na_name
_add_aliases()
def _add_integer_aliases():
diff --git a/numpy/core/src/multiarray/arrayobject.c b/numpy/core/src/multiarray/arrayobject.c
index 41748c714..368f5ded7 100644
--- a/numpy/core/src/multiarray/arrayobject.c
+++ b/numpy/core/src/multiarray/arrayobject.c
@@ -1249,7 +1249,8 @@ PyArray_ChainExceptionsCause(PyObject *exc, PyObject *val, PyObject *tb)
}
}
-/* Silence the current error and emit a deprecation warning instead.
+/*
+ * Silence the current error and emit a deprecation warning instead.
*
* If warnings are raised as errors, this sets the warning __cause__ to the
* silenced error.
@@ -1268,6 +1269,118 @@ DEPRECATE_silence_error(const char *msg) {
return 0;
}
+/*
+ * Comparisons can fail, but we do not always want to pass on the exception
+ * (see comment in array_richcompare below), but rather return NotImplemented.
+ * Here, an exception should be set on entrance.
+ * Returns either NotImplemented with the exception cleared, or NULL
+ * with the exception set.
+ * Raises deprecation warnings for cases where behaviour is meant to change
+ * (2015-05-14, 1.10)
+ */
+
+NPY_NO_EXPORT PyObject *
+_failed_comparison_workaround(PyArrayObject *self, PyObject *other, int cmp_op)
+{
+ PyObject *exc, *val, *tb;
+ PyArrayObject *array_other;
+ int other_is_flexible, ndim_other;
+ int self_is_flexible = PyTypeNum_ISFLEXIBLE(PyArray_DESCR(self)->type_num);
+
+ PyErr_Fetch(&exc, &val, &tb);
+ /*
+ * Determine whether other has a flexible dtype; here, inconvertible
+ * is counted as inflexible. (This repeats work done in the ufunc,
+ * but OK to waste some time in an unlikely path.)
+ */
+ array_other = (PyArrayObject *)PyArray_FROM_O(other);
+ if (array_other) {
+ other_is_flexible = PyTypeNum_ISFLEXIBLE(
+ PyArray_DESCR(array_other)->type_num);
+ ndim_other = PyArray_NDIM(array_other);
+ Py_DECREF(array_other);
+ }
+ else {
+ PyErr_Clear(); /* we restore the original error if needed */
+ other_is_flexible = 0;
+ ndim_other = 0;
+ }
+ if (cmp_op == Py_EQ || cmp_op == Py_NE) {
+ /*
+ * note: for == and !=, a structured dtype self cannot get here,
+ * but a string can. Other can be string or structured.
+ */
+ if (other_is_flexible || self_is_flexible) {
+ /*
+ * For scalars, returning NotImplemented is correct.
+ * For arrays, we emit a future deprecation warning.
+ * When this warning is removed, a correctly shaped
+ * array of bool should be returned.
+ */
+ if (ndim_other != 0 || PyArray_NDIM(self) != 0) {
+ /* 2015-05-14, 1.10 */
+ if (DEPRECATE_FUTUREWARNING(
+ "elementwise comparison failed; returning scalar "
+ "instead, but in the future will perform "
+ "elementwise comparison") < 0) {
+ goto fail;
+ }
+ }
+ }
+ else {
+ /*
+ * If neither self nor other had a flexible dtype, the error cannot
+ * have been caused by a lack of implementation in the ufunc.
+ *
+ * 2015-05-14, 1.10
+ */
+ if (DEPRECATE(
+ "elementwise comparison failed; "
+ "this will raise an error in the future.") < 0) {
+ goto fail;
+ }
+ }
+ Py_XDECREF(exc);
+ Py_XDECREF(val);
+ Py_XDECREF(tb);
+ Py_INCREF(Py_NotImplemented);
+ return Py_NotImplemented;
+ }
+ else if (other_is_flexible || self_is_flexible) {
+ /*
+ * For LE, LT, GT, GE and a flexible self or other, we return
+ * NotImplemented, which is the correct answer since the ufuncs do
+ * not in fact implement loops for those. On python 3 this will
+ * get us the desired TypeError, but on python 2, one gets strange
+ * ordering, so we emit a warning.
+ */
+#if !defined(NPY_PY3K)
+ /* 2015-05-14, 1.10 */
+ if (DEPRECATE(
+ "unorderable dtypes; returning scalar but in "
+ "the future this will be an error") < 0) {
+ goto fail;
+ }
+#endif
+ Py_XDECREF(exc);
+ Py_XDECREF(val);
+ Py_XDECREF(tb);
+ Py_INCREF(Py_NotImplemented);
+ return Py_NotImplemented;
+ }
+ else {
+ /* LE, LT, GT, or GE with non-flexible other; just pass on error */
+ goto fail;
+ }
+
+fail:
+ /*
+ * Reraise the original exception, possibly chaining with a new one.
+ */
+ PyArray_ChainExceptionsCause(exc, val, tb);
+ return NULL;
+}
+
NPY_NO_EXPORT PyObject *
array_richcompare(PyArrayObject *self, PyObject *other, int cmp_op)
{
@@ -1365,26 +1478,6 @@ array_richcompare(PyArrayObject *self, PyObject *other, int cmp_op)
result = PyArray_GenericBinaryFunction(self,
(PyObject *)other,
n_ops.equal);
- /*
- * If the comparison results in NULL, then the
- * two array objects can not be compared together;
- * indicate that
- */
- if (result == NULL) {
- /*
- * Comparisons should raise errors when element-wise comparison
- * is not possible.
- */
- /* 2015-05-14, 1.10 */
- if (DEPRECATE_silence_error(
- "elementwise == comparison failed; "
- "this will raise an error in the future.") < 0) {
- return NULL;
- }
-
- Py_INCREF(Py_NotImplemented);
- return Py_NotImplemented;
- }
break;
case Py_NE:
RICHCMP_GIVE_UP_IF_NEEDED(obj_self, other);
@@ -1436,21 +1529,6 @@ array_richcompare(PyArrayObject *self, PyObject *other, int cmp_op)
result = PyArray_GenericBinaryFunction(self, (PyObject *)other,
n_ops.not_equal);
- if (result == NULL) {
- /*
- * Comparisons should raise errors when element-wise comparison
- * is not possible.
- */
- /* 2015-05-14, 1.10 */
- if (DEPRECATE_silence_error(
- "elementwise != comparison failed; "
- "this will raise an error in the future.") < 0) {
- return NULL;
- }
-
- Py_INCREF(Py_NotImplemented);
- return Py_NotImplemented;
- }
break;
case Py_GT:
RICHCMP_GIVE_UP_IF_NEEDED(obj_self, other);
@@ -1463,8 +1541,37 @@ array_richcompare(PyArrayObject *self, PyObject *other, int cmp_op)
n_ops.greater_equal);
break;
default:
- result = Py_NotImplemented;
- Py_INCREF(result);
+ Py_INCREF(Py_NotImplemented);
+ return Py_NotImplemented;
+ }
+ if (result == NULL) {
+ /*
+ * 2015-05-14, 1.10; updated 2018-06-18, 1.16.
+ *
+ * Comparisons can raise errors when element-wise comparison is not
+ * possible. Some of these, though, should not be passed on.
+ * In particular, the ufuncs do not have loops for flexible dtype,
+ * so those should be treated separately. Furthermore, for EQ and NE,
+ * we should never fail.
+ *
+ * Our ideal behaviour would be:
+ *
+ * 1. For EQ and NE:
+ * - If self and other are scalars, return NotImplemented,
+ * so that python can assign True of False as appropriate.
+ * - If either is an array, return an array of False or True.
+ *
+ * 2. For LT, LE, GE, GT:
+ * - If self or other was flexible, return NotImplemented
+ * (as is in fact the case), so python can raise a TypeError.
+ * - If other is not convertible to an array, pass on the error
+ * (MHvK, 2018-06-18: not sure about this, but it's what we have).
+ *
+ * However, for backwards compatibilty, we cannot yet return arrays,
+ * so we raise warnings instead. Furthermore, we warn on python2
+ * for LT, LE, GE, GT, since fall-back behaviour is poorly defined.
+ */
+ result = _failed_comparison_workaround(self, other, cmp_op);
}
return result;
}
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index 59fc5aa20..b964c568e 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -577,9 +577,6 @@ get_ufunc_arguments(PyUFuncObject *ufunc,
PyObject *obj, *context;
PyObject *str_key_obj = NULL;
const char *ufunc_name = ufunc_get_name_cstr(ufunc);
- int type_num;
-
- int any_flexible = 0, any_object = 0, any_flexible_userloops = 0;
int has_sig = 0;
/*
@@ -638,166 +635,6 @@ get_ufunc_arguments(PyUFuncObject *ufunc,
if (out_op[i] == NULL) {
goto fail;
}
-
- type_num = PyArray_DESCR(out_op[i])->type_num;
- if (!any_flexible &&
- PyTypeNum_ISFLEXIBLE(type_num)) {
- any_flexible = 1;
- }
- if (!any_object &&
- PyTypeNum_ISOBJECT(type_num)) {
- any_object = 1;
- }
-
- /*
- * If any operand is a flexible dtype, check to see if any
- * struct dtype ufuncs are registered. A ufunc has been registered
- * for a struct dtype if ufunc's arg_dtypes array is not NULL.
- */
- if (PyTypeNum_ISFLEXIBLE(type_num) &&
- !any_flexible_userloops &&
- ufunc->userloops != NULL) {
- PyUFunc_Loop1d *funcdata;
- PyObject *key, *obj;
- key = PyInt_FromLong(type_num);
- if (key == NULL) {
- continue;
- }
- obj = PyDict_GetItem(ufunc->userloops, key);
- Py_DECREF(key);
- if (obj == NULL) {
- continue;
- }
- funcdata = (PyUFunc_Loop1d *)NpyCapsule_AsVoidPtr(obj);
- while (funcdata != NULL) {
- if (funcdata->arg_dtypes != NULL) {
- any_flexible_userloops = 1;
- break;
- }
- funcdata = funcdata->next;
- }
- }
- }
-
- if (any_flexible && !any_flexible_userloops && !any_object && nin == 2) {
- /* Traditionally, we return -2 here (meaning "NotImplemented") anytime
- * we hit the above condition.
- *
- * This condition basically means "we are doomed", b/c the "flexible"
- * dtypes -- strings and void -- cannot have their own ufunc loops
- * registered (except via the special "flexible userloops" mechanism),
- * and they can't be cast to anything except object (and we only cast
- * to object if any_object is true). So really we should do nothing
- * here and continue and let the proper error be raised. But, we can't
- * quite yet, b/c of backcompat.
- *
- * Most of the time, this NotImplemented either got returned directly
- * to the user (who can't do anything useful with it), or got passed
- * back out of a special function like __mul__. And fortunately, for
- * almost all special functions, the end result of this was a
- * TypeError. Which is also what we get if we just continue without
- * this special case, so this special case is unnecessary.
- *
- * The only thing that actually depended on the NotImplemented is
- * array_richcompare, which did two things with it. First, it needed
- * to see this NotImplemented in order to implement the special-case
- * comparisons for
- *
- * string < <= == != >= > string
- * void == != void
- *
- * Now it checks for those cases first, before trying to call the
- * ufunc, so that's no problem. What it doesn't handle, though, is
- * cases like
- *
- * float < string
- *
- * or
- *
- * float == void
- *
- * For those, it just let the NotImplemented bubble out, and accepted
- * Python's default handling. And unfortunately, for comparisons,
- * Python's default is *not* to raise an error. Instead, it returns
- * something that depends on the operator:
- *
- * == return False
- * != return True
- * < <= >= > Python 2: use "fallback" (= weird and broken) ordering
- * Python 3: raise TypeError (hallelujah)
- *
- * In most cases this is straightforwardly broken, because comparison
- * of two arrays should always return an array, and here we end up
- * returning a scalar. However, there is an exception: if we are
- * comparing two scalars for equality, then it actually is correct to
- * return a scalar bool instead of raising an error. If we just
- * removed this special check entirely, then "np.float64(1) == 'foo'"
- * would raise an error instead of returning False, which is genuinely
- * wrong.
- *
- * The proper end goal here is:
- * 1) == and != should be implemented in a proper vectorized way for
- * all types. The short-term hack for this is just to add a
- * special case to PyUFunc_DefaultLegacyInnerLoopSelector where
- * if it can't find a comparison loop for the given types, and
- * the ufunc is np.equal or np.not_equal, then it returns a loop
- * that just fills the output array with False (resp. True). Then
- * array_richcompare could trust that whenever its special cases
- * don't apply, simply calling the ufunc will do the right thing,
- * even without this special check.
- * 2) < <= >= > should raise an error if no comparison function can
- * be found. array_richcompare already handles all string <>
- * string cases, and void dtypes don't have ordering, so again
- * this would mean that array_richcompare could simply call the
- * ufunc and it would do the right thing (i.e., raise an error),
- * again without needing this special check.
- *
- * So this means that for the transition period, our goal is:
- * == and != on scalars should simply return NotImplemented like
- * they always did, since everything ends up working out correctly
- * in this case only
- * == and != on arrays should issue a FutureWarning and then return
- * NotImplemented
- * < <= >= > on all flexible dtypes on py2 should raise a
- * DeprecationWarning, and then return NotImplemented. On py3 we
- * skip the warning, though, b/c it would just be immediately be
- * followed by an exception anyway.
- *
- * And for all other operations, we let things continue as normal.
- */
- /* strcmp() is a hack but I think we can get away with it for this
- * temporary measure.
- */
- if (!strcmp(ufunc_name, "equal") ||
- !strcmp(ufunc_name, "not_equal")) {
- /* Warn on non-scalar, return NotImplemented regardless */
- if (PyArray_NDIM(out_op[0]) != 0 ||
- PyArray_NDIM(out_op[1]) != 0) {
- if (DEPRECATE_FUTUREWARNING(
- "elementwise comparison failed; returning scalar "
- "instead, but in the future will perform elementwise "
- "comparison") < 0) {
- goto fail;
- }
- }
- Py_DECREF(out_op[0]);
- Py_DECREF(out_op[1]);
- return -2;
- }
- else if (!strcmp(ufunc_name, "less") ||
- !strcmp(ufunc_name, "less_equal") ||
- !strcmp(ufunc_name, "greater") ||
- !strcmp(ufunc_name, "greater_equal")) {
-#if !defined(NPY_PY3K)
- if (DEPRECATE("unorderable dtypes; returning scalar but in "
- "the future this will be an error") < 0) {
- goto fail;
- }
-#endif
- Py_DECREF(out_op[0]);
- Py_DECREF(out_op[1]);
- return -2;
- }
}
/* Get positional output arguments */
@@ -4480,22 +4317,7 @@ ufunc_generic_call(PyUFuncObject *ufunc, PyObject *args, PyObject *kwds)
errval = PyUFunc_GenericFunction(ufunc, args, kwds, mps);
if (errval < 0) {
- if (errval == -1) {
- return NULL;
- }
- else if (ufunc->nin == 2 && ufunc->nout == 1) {
- /*
- * For array_richcompare's benefit -- see the long comment in
- * get_ufunc_arguments.
- */
- Py_INCREF(Py_NotImplemented);
- return Py_NotImplemented;
- }
- else {
- PyErr_SetString(PyExc_TypeError,
- "XX can't happen, please report a bug XX");
- return NULL;
- }
+ return NULL;
}
/* Free the input references */
diff --git a/numpy/core/tests/test_deprecations.py b/numpy/core/tests/test_deprecations.py
index 285b2de3c..8eb258666 100644
--- a/numpy/core/tests/test_deprecations.py
+++ b/numpy/core/tests/test_deprecations.py
@@ -190,10 +190,10 @@ class TestComparisonDeprecations(_DeprecationTestCase):
b = np.array(['a', 'b', 'c'])
assert_raises(ValueError, lambda x, y: x == y, a, b)
- # The empty list is not cast to string, as this is only to document
- # that fact (it likely should be changed). This means that the
- # following works (and returns False) due to dtype mismatch:
- a == []
+ # The empty list is not cast to string, and this used to pass due
+ # to dtype mismatch; now (2018-06-21) it correctly leads to a
+ # FutureWarning.
+ assert_warns(FutureWarning, lambda: a == [])
def test_void_dtype_equality_failures(self):
class NotArray(object):
diff --git a/numpy/core/tests/test_numerictypes.py b/numpy/core/tests/test_numerictypes.py
index cdf1b0490..4c3cc6c9e 100644
--- a/numpy/core/tests/test_numerictypes.py
+++ b/numpy/core/tests/test_numerictypes.py
@@ -406,3 +406,9 @@ class TestIsSubDType(object):
for w1, w2 in itertools.product(self.wrappers, repeat=2):
assert_(not np.issubdtype(w1(np.float32), w2(np.float64)))
assert_(not np.issubdtype(w1(np.float64), w2(np.float32)))
+
+
+def TestSctypeDict(object):
+ def test_longdouble(self):
+ assert_(np.sctypeDict['f8'] is not np.longdouble)
+ assert_(np.sctypeDict['c16'] is not np.clongdouble)
diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py
index ef9ced354..0e564e305 100644
--- a/numpy/core/tests/test_ufunc.py
+++ b/numpy/core/tests/test_ufunc.py
@@ -1643,6 +1643,16 @@ class TestUfunc(object):
target = np.array([ True, False, False, False], dtype=bool)
assert_equal(np.all(target == (mra == ra[0])), True)
+ def test_scalar_equal(self):
+ # Scalar comparisons should always work, without deprecation warnings.
+ # even when the ufunc fails.
+ a = np.array(0.)
+ b = np.array('a')
+ assert_(a != b)
+ assert_(b != a)
+ assert_(not (a == b))
+ assert_(not (b == a))
+
def test_NotImplemented_not_returned(self):
# See gh-5964 and gh-2091. Some of these functions are not operator
# related and were fixed for other reasons in the past.
@@ -1652,17 +1662,16 @@ class TestUfunc(object):
np.bitwise_xor, np.left_shift, np.right_shift, np.fmax,
np.fmin, np.fmod, np.hypot, np.logaddexp, np.logaddexp2,
np.logical_and, np.logical_or, np.logical_xor, np.maximum,
- np.minimum, np.mod
- ]
-
- # These functions still return NotImplemented. Will be fixed in
- # future.
- # bad = [np.greater, np.greater_equal, np.less, np.less_equal, np.not_equal]
+ np.minimum, np.mod,
+ np.greater, np.greater_equal, np.less, np.less_equal,
+ np.equal, np.not_equal]
a = np.array('1')
b = 1
+ c = np.array([1., 2.])
for f in binary_funcs:
assert_raises(TypeError, f, a, b)
+ assert_raises(TypeError, f, c, a)
def test_reduce_noncontig_output(self):
# Check that reduction deals with non-contiguous output arrays