diff options
-rw-r--r-- | doc/neps/nep-0021-advanced-indexing.rst | 537 | ||||
-rw-r--r-- | doc/release/1.16.0-notes.rst | 10 | ||||
-rw-r--r-- | numpy/core/numerictypes.py | 50 | ||||
-rw-r--r-- | numpy/core/src/multiarray/arrayobject.c | 183 | ||||
-rw-r--r-- | numpy/core/src/umath/ufunc_object.c | 180 | ||||
-rw-r--r-- | numpy/core/tests/test_deprecations.py | 8 | ||||
-rw-r--r-- | numpy/core/tests/test_numerictypes.py | 6 | ||||
-rw-r--r-- | numpy/core/tests/test_ufunc.py | 21 |
8 files changed, 746 insertions, 249 deletions
diff --git a/doc/neps/nep-0021-advanced-indexing.rst b/doc/neps/nep-0021-advanced-indexing.rst new file mode 100644 index 000000000..92d40d2eb --- /dev/null +++ b/doc/neps/nep-0021-advanced-indexing.rst @@ -0,0 +1,537 @@ +========================================================== +Implementing intuitive and full featured advanced indexing +========================================================== + +:Author: Sebastian Berg +:Status: Draft +:Type: Standards Track +:Created: 2015-08-27 + + +Abstract +-------- + +Advanced indexing with multiple array indices is typically confusing to +both new, and in many cases even old, users of NumPy. To avoid this problem +and allow for more and clearer features, we propose to: + +1. Introduce ``arr.oindex[indices]`` which allows advanced indices, but + uses outer indexing logic. +2. Introduce ``arr.vindex[indices]`` which use the current + "vectorized"/broadcasted logic but with two differences from + fancy indexing: + + 1. Boolean indices always use the outer indexing logic. + (Multi dimensional booleans should be allowed). + 2. The integer index result dimensions are always the first axes + of the result array. No transpose is done, even for a single + integer array index. + +3. Plain indexing on the array will only give warnings and eventually + errors either: + + * when there is ambiguity between legacy fancy and outer indexing + (note that ``arr[[1, 2], :, 0]`` is such a case, an integer + can be the "second" integer index array), + * when any integer index array is present (possibly additional for + more then one boolean index array). + +These constraints are sufficient for making indexing generally consistent +with expectations and providing a less surprising learning curve with +``oindex``. + +Note that all things mentioned here apply both for assignment as well as +subscription. + +Understanding these details is *not* easy. The `Examples` section in the +discussion gives code examples. +And the hopefully easier `Motivational Example` provides some +motivational use-cases for the general ideas and is likely a good start for +anyone not intimately familiar with advanced indexing. + + +Detailed Description +-------------------- + +Old style advanced indexing with multiple array (boolean or integer) indices, +also called "fancy indexing", tends to be very confusing for new users. +While fancy (or legacy) indexing is useful in many cases one would naively +assume that the result of multiple 1-d ranges is analogous to multiple +slices along each dimension (also called "outer indexing"). + +However, legacy fancy indexing with multiple arrays broadcasts these arrays +into a single index over multiple dimensions. There are three main points +of confusion when multiple array indices are involved: + +1. Most new users will usually expect outer indexing (consistent with + slicing). This is also the most common way of handling this in other + packages or languages. +2. The axes introduced by the array indices are at the front, unless + all array indices are consecutive, in which case one can deduce where + the user "expects" them to be: + + * `arr[:, [0, 1], :, [0, 1]]` will have the first dimension shaped 2. + * `arr[:, [0, 1], [0, 1]]` will have the second dimension shaped 2. + +3. When a boolean array index is mixed with another boolean or integer + array, the result is very hard to understand (the boolean array is + converted to integer array indices and then broadcast), and hardly + useful. + There is no well defined broadcast for booleans, so that boolean + indices are logically always "outer" type indices. + + +Furthermore, since other packages use outer type logic, enabling its +use in NumPy, will allow simpler cross package code. + + +Proposed rules +~~~~~~~~~~~~~~ + +From the three problems noted above some expectations for NumPy can +be deduced: + +1. There should be a prominent outer/orthogonal indexing method such as + ``arr.oindex[indices]``. + +2. Considering how confusing vectorized/fancy indexing can be, it should + be possible to be made more explicitly (e.g. ``arr.vindex[indices]``). + +3. A new ``arr.vindex[indices]`` method, would not be tied to the + confusing transpose rules of fancy indexing, which is for example + needed for the simple case of a single advanced index. Thus, + no transposing should be done. The axes created by the integer array + indices are always inserted at the front, even for a single index. + +4. Boolean indexing is conceptionally outer indexing. A broadcasting + together with other advanced indices in the manner of legacy + "fancy indexing" is generally not helpful or well defined. + A user who wishes the "``nonzero``" plus broadcast behaviour can thus + be expected to do this manually. Thus, ``vindex`` would still use + outer type indexing for boolean index arrays. + Note that using this rule a single boolean index can still index into + multiple dimensions at once. + +5. An ``arr.lindex`` or ``arr.findex`` should likely be implemented to allow + legacy fancy indexing indefinetly. This also gives a simple way to + update fancy indexing code making deprecations to plain indexing + easier. + +6. Plain indexing ``arr[...]`` should return an error for ambiguous cases. + For the beginning, this probably means cases where ``arr[ind]`` and + ``arr.oindex[ind]`` return different results give deprecation warnings. + Due to the transposing behaviour, this means that``arr[0, :, index_arr]`` + will be deprecated, but ``arr[:, 0, index_arr]`` will not for the time + being. + +Unlike plain indexing, the new indexing attributes are explicitly aimed +at higher dimensional indexing, two more changes should be implemented: + +* The indexing attributes will enforce exact dimension and indexing match. + This means that no implicit ellipsis (``...``) will be added. Unless + an ellipsis is present the indexing expression will thus only work for + an array with a specific number of dimensions. + This makes the expression more explicit and safeguards against wrong + dimensionality of arrays. + +* The current plain indexing allows for the use of non-tuples for + multi-dimensional indexing such as ``arr[[slice(None), 2]]``. + This creates some inconsistencies and thus the indexing attributes + should only allow plain python tuples for this purpose. + (Whether or not this should be the case for plain indexing is a + different issue.) + +* The new attributes should not do the getitem to implement setitem + branch, since it is a cludge and not useful for vectorized + indexing. (not implemented yet) + + +Open Questions +~~~~~~~~~~~~~~ + +* The names ``oindex`` and ``vindex`` are just suggestions at the time of + writing this, another name NumPy has used for something like ``oindex`` + is ``np.ix_``. See also below. + +* It would be possible to limit the use of boolean indices in ``vindex``, + assuming that they are rare and to some degree special. Alternatively, + boolean indices could be broadcasted as in legacy fancy indexing in + principle. + +* ``oindex`` and ``vindex`` could always return copies, even when no array + operation occurs. One argument for allowing a view return is that this way + ``oindex`` can be used as a general index replacement. + However, there is one argument for returning copies. It is possible for + ``arr.vindex[array_scalar, ...]``, where ``array_scalar`` should be + a 0-D array but is not, since 0-D arrays tend to be converted. + Copying always "fixes" this possible inconsistency. + +* The final state to morph plain indexing in is not fixed in this PEP. + It is for example possible that `arr[index]`` will be equivalent to + ``arr.oindex`` at some point in the future. + Since such a change will take years, it seems unnecessary to make + specific decisions at this time. + +* The proposed changes to plain indexing could be postponed indefinitely or + not taken in order to not break or force major fixes to existing code bases. + +* Should there be/is it possible to have a mechanism to not allow the new + indexing attributes for subclasses unless specifically implemented? + +* Should we try to warn users about the special indexing attributes + not being implemented? If a subclass has its own indexing, inheriting + it from ndarray should be wrong. + + + +Alternative Names +~~~~~~~~~~~~~~~~~ + +Possible names suggested (more suggestions will be added). + +============== ======== ======= ============ +**Orthogonal** oindex oix +**Vectorized** vindex vix +**Legacy** l/findex legacy_index +============== ======== ======= ============ + + +Subclasses +~~~~~~~~~~ + +Subclasses are a bit problematic in the light of these changes. There are +some possible solutions for this. For most subclasses (those which do not +provide ``__getitem__`` or ``__setitem__``) the special attributes should +just work. Subclasses that *do* provide it must be updated accordingly +and should preferably not subclass working versions of these attributes. + +All subclasses will inherit the attributes, however, it seems possible +to test ``subclass.__getitem__.__classobj__`` when getting i.e. +``subclass.vindex``. If this is not ``ndarray``, the subclass has special +handling and an ``AttributeError`` can be given. + +A further question is how to facilitate implementing the special attributes. +Also there is the weird functionality where ``__setitem__`` calls +``__getitem__`` for non-advanced indices. It might be good to avoid it for +the new attributes, but on the other hand, that may make it even more +confusing. + +To facilitate implementations we could provide functions similar to +``operator.itemgetter`` and ``operator.setitem`` for the attributes. +Possibly a MixIn could be provided to help implementation, but further +ideas on this are necessary. + +Related Operation +~~~~~~~~~~~~~~~~~ + +There exist a further indexing or indexing like method. That is the +inverse of a command such as ``np.argmin(arr, axis=axis)``, to pick +the specific elements *along* an axis given an array of (at least +typically) the same size. + +These function are added to NumPy in versoin 15 as `take_along_axis` and +`put_along_axis`. + + +Implementation +-------------- + +Implementation of a special indexing object available through +``arr.oindex``, ``arr.vindex``, and ``arr.lindex`` to allow these indexing +operations. Also starting to deprecate those plain index operations +which are not ambiguous. +Furthermore, the NumPy code base will need to use the new attributes and +tests will have to be adapted. + + +Backward compatibility +---------------------- + +As a new feature, no backward compatibility issues would arise. +Some forward compatibility issues with subclasses that do not specifically +implement the new methods may arise. + + +Alternatives +------------ + +NumPy may not choose to offer these different type of indexing methods, or +choose to only offer them through specific function instead of the proposed +notation above. +For variations see also the open questions section above. + + +Discussion +---------- + +Some discussion can be found on the pull request: + + * https://github.com/numpy/numpy/pull/6256 + +A python implementation of the indexing operations can be found at: + + * https://gist.github.com/shoyer/c700193625347eb68fee4d1f0dc8c0c8 + + +Examples +~~~~~~~~ + +Since the various kinds of indexing is hard to grasp in many cases, these +examples hopefully give some more insights. Note that they are all in terms +of shape. +In the examples, all original dimensions have 5 or more elements, +advanced indexing inserts smaller dimensions. +These examples may be hard to grasp without working knowledge of advanced +indexing as of NumPy 1.9. + +Example array:: + + >>> arr = np.ones((5, 6, 7, 8)) + + +Legacy fancy indexing +--------------------- + +Note that the same result can be achieved with ``arr.lindex``, but the +"future error" will still work in this case. + +Single index is transposed (this is the same for all indexing types):: + + >>> arr[[0], ...].shape + (1, 6, 7, 8) + >>> arr[:, [0], ...].shape + (5, 1, 7, 8) + + +Multiple indices are transposed *if* consecutive:: + + >>> arr[:, [0], [0], :].shape # future error + (5, 1, 8) + >>> arr[:, [0], :, [0]].shape # future error + (1, 5, 7) + + +It is important to note that a scalar *is* integer array index in this sense +(and gets broadcasted with the other advanced index):: + + >>> arr[:, [0], 0, :].shape + (5, 1, 8) + >>> arr[:, [0], :, 0].shape # future error (scalar is "fancy") + (1, 5, 7) + + +Single boolean index can act on multiple dimensions (especially the whole +array). It has to match (as of 1.10. a deprecation warning) the dimensions. +The boolean index is otherwise identical to (multiple consecutive) integer +array indices:: + + >>> # Create boolean index with one True value for the last two dimensions: + >>> bindx = np.zeros((7, 8), dtype=np.bool_) + >>> bindx[0, 0] = True + >>> arr[:, 0, bindx].shape + (5, 1) + >>> arr[0, :, bindx].shape + (1, 6) + + +The combination with anything that is not a scalar is confusing, e.g.:: + + >>> arr[[0], :, bindx].shape # bindx result broadcasts with [0] + (1, 6) + >>> arr[:, [0, 1], bindx].shape # IndexError + + +Outer indexing +-------------- + +Multiple indices are "orthogonal" and their result axes are inserted +at the same place (they are not broadcasted):: + + >>> arr.oindex[:, [0], [0, 1], :].shape + (5, 1, 2, 8) + >>> arr.oindex[:, [0], :, [0, 1]].shape + (5, 1, 7, 2) + >>> arr.oindex[:, [0], 0, :].shape + (5, 1, 8) + >>> arr.oindex[:, [0], :, 0].shape + (5, 1, 7) + + +Boolean indices results are always inserted where the index is:: + + >>> # Create boolean index with one True value for the last two dimensions: + >>> bindx = np.zeros((7, 8), dtype=np.bool_) + >>> bindx[0, 0] = True + >>> arr.oindex[:, 0, bindx].shape + (5, 1) + >>> arr.oindex[0, :, bindx].shape + (6, 1) + + +Nothing changed in the presence of other advanced indices since:: + + >>> arr.oindex[[0], :, bindx].shape + (1, 6, 1) + >>> arr.oindex[:, [0, 1], bindx].shape + (5, 2, 1) + + +Vectorized/inner indexing +------------------------- + +Multiple indices are broadcasted and iterated as one like fancy indexing, +but the new axes area always inserted at the front:: + + >>> arr.vindex[:, [0], [0, 1], :].shape + (2, 5, 8) + >>> arr.vindex[:, [0], :, [0, 1]].shape + (2, 5, 7) + >>> arr.vindex[:, [0], 0, :].shape + (1, 5, 8) + >>> arr.vindex[:, [0], :, 0].shape + (1, 5, 7) + + +Boolean indices results are always inserted where the index is, exactly +as in ``oindex`` given how specific they are to the axes they operate on:: + + >>> # Create boolean index with one True value for the last two dimensions: + >>> bindx = np.zeros((7, 8), dtype=np.bool_) + >>> bindx[0, 0] = True + >>> arr.vindex[:, 0, bindx].shape + (5, 1) + >>> arr.vindex[0, :, bindx].shape + (6, 1) + + +But other advanced indices are again transposed to the front:: + + >>> arr.vindex[[0], :, bindx].shape + (1, 6, 1) + >>> arr.vindex[:, [0, 1], bindx].shape + (2, 5, 1) + + +Motivational Example +~~~~~~~~~~~~~~~~~~~~ + +Imagine having a data acquisition software storing ``D`` channels and +``N`` datapoints along the time. She stores this into an ``(N, D)`` shaped +array. During data analysis, we needs to fetch a pool of channels, for example +to calculate a mean over them. + +This data can be faked using:: + + >>> arr = np.random.random((100, 10)) + +Now one may remember indexing with an integer array and find the correct code:: + + >>> group = arr[:, [2, 5]] + >>> mean_value = arr.mean() + +However, assume that there were some specific time points (first dimension +of the data) that need to be specially considered. These time points are +already known and given by:: + + >>> interesting_times = np.array([1, 5, 8, 10], dtype=np.intp) + +Now to fetch them, we may try to modify the previous code:: + + >>> group_at_it = arr[interesting_times, [2, 5]] + IndexError: Ambiguous index, use `.oindex` or `.vindex` + +An error such as this will point to read up the indexing documentation. +This should make it clear, that ``oindex`` behaves more like slicing. +So, out of the different methods it is the obvious choice +(for now, this is a shape mismatch, but that could possibly also mention +``oindex``):: + + >>> group_at_it = arr.oindex[interesting_times, [2, 5]] + +Now of course one could also have used ``vindex``, but it is much less +obvious how to achieve the right thing!:: + + >>> reshaped_times = interesting_times[:, np.newaxis] + >>> group_at_it = arr.vindex[reshaped_times, [2, 5]] + + +One may find, that for example our data is corrupt in some places. +So, we need to replace these values by zero (or anything else) for these +times. The first column may for example give the necessary information, +so that changing the values becomes easy remembering boolean indexing:: + + >>> bad_data = arr[:, 0] > 0.5 + >>> arr[bad_data, :] = 0 # (corrupts further examples) + +Again, however, the columns may need to be handled more individually (but in +groups), and the ``oindex`` attribute works well:: + + >>> arr.oindex[bad_data, [2, 5]] = 0 + +Note that it would be very hard to do this using legacy fancy indexing. +The only way would be to create an integer array first:: + + >>> bad_data_indx = np.nonzero(bad_data)[0] + >>> bad_data_indx_reshaped = bad_data_indx[:, np.newaxis] + >>> arr[bad_data_indx_reshaped, [2, 5]] + +In any case we can use only ``oindex`` to do all of this without getting +into any trouble or confused by the whole complexity of advanced indexing. + +But, some new features are added to the data acquisition. Different sensors +have to be used depending on the times. Let us assume we already have +created an array of indices:: + + >>> correct_sensors = np.random.randint(10, size=(100, 2)) + +Which lists for each time the two correct sensors in an ``(N, 2)`` array. + +A first try to achieve this may be ``arr[:, correct_sensors]`` and this does +not work. It should be clear quickly that slicing cannot achieve the desired +thing. But hopefully users will remember that there is ``vindex`` as a more +powerful and flexible approach to advanced indexing. +One may, if trying ``vindex`` randomly, be confused about:: + + >>> new_arr = arr.vindex[:, correct_sensors] + +which is neither the same, nor the correct result (see transposing rules)! +This is because slicing works still the same in ``vindex``. However, reading +the documentation and examples, one can hopefully quickly find the desired +solution:: + + >>> rows = np.arange(len(arr)) + >>> rows = rows[:, np.newaxis] # make shape fit with correct_sensors + >>> new_arr = arr.vindex[rows, correct_sensors] + +At this point we have left the straight forward world of ``oindex`` but can +do random picking of any element from the array. Note that in the last example +a method such as mentioned in the ``Related Questions`` section could be more +straight forward. But this approach is even more flexible, since ``rows`` +does not have to be a simple ``arange``, but could be ``intersting_times``:: + + >>> interesting_times = np.array([0, 4, 8, 9, 10]) + >>> correct_sensors_at_it = correct_sensors[interesting_times, :] + >>> interesting_times_reshaped = interesting_times[:, np.newaxis] + >>> new_arr_it = arr[interesting_times_reshaped, correct_sensors_at_it] + +Truly complex situation would arise now if you would for example pool ``L`` +experiments into an array shaped ``(L, N, D)``. But for ``oindex`` this should +not result into surprises. ``vindex``, being more powerful, will quite +certainly create some confusion in this case but also cover pretty much all +eventualities. + + +Copyright +--------- + +This document is placed under the CC0 1.0 Universell (CC0 1.0) Public Domain Dedication [1]_. + + +References and Footnotes +------------------------ + +.. [1] To the extent possible under law, the person who associated CC0 + with this work has waived all copyright and related or neighboring + rights to this work. The CC0 license may be found at + https://creativecommons.org/publicdomain/zero/1.0/ + diff --git a/doc/release/1.16.0-notes.rst b/doc/release/1.16.0-notes.rst index 701ef1f6e..d9208e80b 100644 --- a/doc/release/1.16.0-notes.rst +++ b/doc/release/1.16.0-notes.rst @@ -37,3 +37,13 @@ Improvements Changes ======= + +Comparison ufuncs will now error rather than return NotImplemented +------------------------------------------------------------------ + +Previously, comparison ufuncs such as ``np.equal`` would return +`NotImplemented` if their arguments had structured dtypes, to help comparison +operators such as ``__eq__`` deal with those. This is no longer needed, as the +relevant logic has moved to the comparison operators proper (which thus do +continue to return `NotImplemented` as needed). Hence, like all other ufuncs, +the comparison ufuncs will now error on structured dtypes. diff --git a/numpy/core/numerictypes.py b/numpy/core/numerictypes.py index 7cd80f432..727fb66d1 100644 --- a/numpy/core/numerictypes.py +++ b/numpy/core/numerictypes.py @@ -323,31 +323,37 @@ def _add_aliases(): # insert bit-width version for this class (if relevant) base, bit, char = bitname(info.type) - if base != '': - myname = "%s%d" % (base, bit) - if (name not in ('longdouble', 'clongdouble') or - myname not in allTypes): - base_capitalize = english_capitalize(base) - if base == 'complex': - na_name = '%s%d' % (base_capitalize, bit//2) - elif base == 'bool': - na_name = base_capitalize - else: - na_name = "%s%d" % (base_capitalize, bit) - allTypes[myname] = info.type + assert base != '' + myname = "%s%d" % (base, bit) + + # ensure that (c)longdouble does not overwrite the aliases assigned to + # (c)double + if name in ('longdouble', 'clongdouble') and myname in allTypes: + continue + + base_capitalize = english_capitalize(base) + if base == 'complex': + na_name = '%s%d' % (base_capitalize, bit//2) + elif base == 'bool': + na_name = base_capitalize + else: + na_name = "%s%d" % (base_capitalize, bit) + + allTypes[myname] = info.type + + # add mapping for both the bit name and the numarray name + sctypeDict[myname] = info.type + sctypeDict[na_name] = info.type - # add mapping for both the bit name and the numarray name - sctypeDict[myname] = info.type - sctypeDict[na_name] = info.type + # add forward, reverse, and string mapping to numarray + sctypeNA[na_name] = info.type + sctypeNA[info.type] = na_name + sctypeNA[info.char] = na_name - # add forward, reverse, and string mapping to numarray - sctypeNA[na_name] = info.type - sctypeNA[info.type] = na_name - sctypeNA[info.char] = na_name - if char != '': - sctypeDict[char] = info.type - sctypeNA[char] = na_name + assert char != '' + sctypeDict[char] = info.type + sctypeNA[char] = na_name _add_aliases() def _add_integer_aliases(): diff --git a/numpy/core/src/multiarray/arrayobject.c b/numpy/core/src/multiarray/arrayobject.c index 41748c714..368f5ded7 100644 --- a/numpy/core/src/multiarray/arrayobject.c +++ b/numpy/core/src/multiarray/arrayobject.c @@ -1249,7 +1249,8 @@ PyArray_ChainExceptionsCause(PyObject *exc, PyObject *val, PyObject *tb) } } -/* Silence the current error and emit a deprecation warning instead. +/* + * Silence the current error and emit a deprecation warning instead. * * If warnings are raised as errors, this sets the warning __cause__ to the * silenced error. @@ -1268,6 +1269,118 @@ DEPRECATE_silence_error(const char *msg) { return 0; } +/* + * Comparisons can fail, but we do not always want to pass on the exception + * (see comment in array_richcompare below), but rather return NotImplemented. + * Here, an exception should be set on entrance. + * Returns either NotImplemented with the exception cleared, or NULL + * with the exception set. + * Raises deprecation warnings for cases where behaviour is meant to change + * (2015-05-14, 1.10) + */ + +NPY_NO_EXPORT PyObject * +_failed_comparison_workaround(PyArrayObject *self, PyObject *other, int cmp_op) +{ + PyObject *exc, *val, *tb; + PyArrayObject *array_other; + int other_is_flexible, ndim_other; + int self_is_flexible = PyTypeNum_ISFLEXIBLE(PyArray_DESCR(self)->type_num); + + PyErr_Fetch(&exc, &val, &tb); + /* + * Determine whether other has a flexible dtype; here, inconvertible + * is counted as inflexible. (This repeats work done in the ufunc, + * but OK to waste some time in an unlikely path.) + */ + array_other = (PyArrayObject *)PyArray_FROM_O(other); + if (array_other) { + other_is_flexible = PyTypeNum_ISFLEXIBLE( + PyArray_DESCR(array_other)->type_num); + ndim_other = PyArray_NDIM(array_other); + Py_DECREF(array_other); + } + else { + PyErr_Clear(); /* we restore the original error if needed */ + other_is_flexible = 0; + ndim_other = 0; + } + if (cmp_op == Py_EQ || cmp_op == Py_NE) { + /* + * note: for == and !=, a structured dtype self cannot get here, + * but a string can. Other can be string or structured. + */ + if (other_is_flexible || self_is_flexible) { + /* + * For scalars, returning NotImplemented is correct. + * For arrays, we emit a future deprecation warning. + * When this warning is removed, a correctly shaped + * array of bool should be returned. + */ + if (ndim_other != 0 || PyArray_NDIM(self) != 0) { + /* 2015-05-14, 1.10 */ + if (DEPRECATE_FUTUREWARNING( + "elementwise comparison failed; returning scalar " + "instead, but in the future will perform " + "elementwise comparison") < 0) { + goto fail; + } + } + } + else { + /* + * If neither self nor other had a flexible dtype, the error cannot + * have been caused by a lack of implementation in the ufunc. + * + * 2015-05-14, 1.10 + */ + if (DEPRECATE( + "elementwise comparison failed; " + "this will raise an error in the future.") < 0) { + goto fail; + } + } + Py_XDECREF(exc); + Py_XDECREF(val); + Py_XDECREF(tb); + Py_INCREF(Py_NotImplemented); + return Py_NotImplemented; + } + else if (other_is_flexible || self_is_flexible) { + /* + * For LE, LT, GT, GE and a flexible self or other, we return + * NotImplemented, which is the correct answer since the ufuncs do + * not in fact implement loops for those. On python 3 this will + * get us the desired TypeError, but on python 2, one gets strange + * ordering, so we emit a warning. + */ +#if !defined(NPY_PY3K) + /* 2015-05-14, 1.10 */ + if (DEPRECATE( + "unorderable dtypes; returning scalar but in " + "the future this will be an error") < 0) { + goto fail; + } +#endif + Py_XDECREF(exc); + Py_XDECREF(val); + Py_XDECREF(tb); + Py_INCREF(Py_NotImplemented); + return Py_NotImplemented; + } + else { + /* LE, LT, GT, or GE with non-flexible other; just pass on error */ + goto fail; + } + +fail: + /* + * Reraise the original exception, possibly chaining with a new one. + */ + PyArray_ChainExceptionsCause(exc, val, tb); + return NULL; +} + NPY_NO_EXPORT PyObject * array_richcompare(PyArrayObject *self, PyObject *other, int cmp_op) { @@ -1365,26 +1478,6 @@ array_richcompare(PyArrayObject *self, PyObject *other, int cmp_op) result = PyArray_GenericBinaryFunction(self, (PyObject *)other, n_ops.equal); - /* - * If the comparison results in NULL, then the - * two array objects can not be compared together; - * indicate that - */ - if (result == NULL) { - /* - * Comparisons should raise errors when element-wise comparison - * is not possible. - */ - /* 2015-05-14, 1.10 */ - if (DEPRECATE_silence_error( - "elementwise == comparison failed; " - "this will raise an error in the future.") < 0) { - return NULL; - } - - Py_INCREF(Py_NotImplemented); - return Py_NotImplemented; - } break; case Py_NE: RICHCMP_GIVE_UP_IF_NEEDED(obj_self, other); @@ -1436,21 +1529,6 @@ array_richcompare(PyArrayObject *self, PyObject *other, int cmp_op) result = PyArray_GenericBinaryFunction(self, (PyObject *)other, n_ops.not_equal); - if (result == NULL) { - /* - * Comparisons should raise errors when element-wise comparison - * is not possible. - */ - /* 2015-05-14, 1.10 */ - if (DEPRECATE_silence_error( - "elementwise != comparison failed; " - "this will raise an error in the future.") < 0) { - return NULL; - } - - Py_INCREF(Py_NotImplemented); - return Py_NotImplemented; - } break; case Py_GT: RICHCMP_GIVE_UP_IF_NEEDED(obj_self, other); @@ -1463,8 +1541,37 @@ array_richcompare(PyArrayObject *self, PyObject *other, int cmp_op) n_ops.greater_equal); break; default: - result = Py_NotImplemented; - Py_INCREF(result); + Py_INCREF(Py_NotImplemented); + return Py_NotImplemented; + } + if (result == NULL) { + /* + * 2015-05-14, 1.10; updated 2018-06-18, 1.16. + * + * Comparisons can raise errors when element-wise comparison is not + * possible. Some of these, though, should not be passed on. + * In particular, the ufuncs do not have loops for flexible dtype, + * so those should be treated separately. Furthermore, for EQ and NE, + * we should never fail. + * + * Our ideal behaviour would be: + * + * 1. For EQ and NE: + * - If self and other are scalars, return NotImplemented, + * so that python can assign True of False as appropriate. + * - If either is an array, return an array of False or True. + * + * 2. For LT, LE, GE, GT: + * - If self or other was flexible, return NotImplemented + * (as is in fact the case), so python can raise a TypeError. + * - If other is not convertible to an array, pass on the error + * (MHvK, 2018-06-18: not sure about this, but it's what we have). + * + * However, for backwards compatibilty, we cannot yet return arrays, + * so we raise warnings instead. Furthermore, we warn on python2 + * for LT, LE, GE, GT, since fall-back behaviour is poorly defined. + */ + result = _failed_comparison_workaround(self, other, cmp_op); } return result; } diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index 59fc5aa20..b964c568e 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -577,9 +577,6 @@ get_ufunc_arguments(PyUFuncObject *ufunc, PyObject *obj, *context; PyObject *str_key_obj = NULL; const char *ufunc_name = ufunc_get_name_cstr(ufunc); - int type_num; - - int any_flexible = 0, any_object = 0, any_flexible_userloops = 0; int has_sig = 0; /* @@ -638,166 +635,6 @@ get_ufunc_arguments(PyUFuncObject *ufunc, if (out_op[i] == NULL) { goto fail; } - - type_num = PyArray_DESCR(out_op[i])->type_num; - if (!any_flexible && - PyTypeNum_ISFLEXIBLE(type_num)) { - any_flexible = 1; - } - if (!any_object && - PyTypeNum_ISOBJECT(type_num)) { - any_object = 1; - } - - /* - * If any operand is a flexible dtype, check to see if any - * struct dtype ufuncs are registered. A ufunc has been registered - * for a struct dtype if ufunc's arg_dtypes array is not NULL. - */ - if (PyTypeNum_ISFLEXIBLE(type_num) && - !any_flexible_userloops && - ufunc->userloops != NULL) { - PyUFunc_Loop1d *funcdata; - PyObject *key, *obj; - key = PyInt_FromLong(type_num); - if (key == NULL) { - continue; - } - obj = PyDict_GetItem(ufunc->userloops, key); - Py_DECREF(key); - if (obj == NULL) { - continue; - } - funcdata = (PyUFunc_Loop1d *)NpyCapsule_AsVoidPtr(obj); - while (funcdata != NULL) { - if (funcdata->arg_dtypes != NULL) { - any_flexible_userloops = 1; - break; - } - funcdata = funcdata->next; - } - } - } - - if (any_flexible && !any_flexible_userloops && !any_object && nin == 2) { - /* Traditionally, we return -2 here (meaning "NotImplemented") anytime - * we hit the above condition. - * - * This condition basically means "we are doomed", b/c the "flexible" - * dtypes -- strings and void -- cannot have their own ufunc loops - * registered (except via the special "flexible userloops" mechanism), - * and they can't be cast to anything except object (and we only cast - * to object if any_object is true). So really we should do nothing - * here and continue and let the proper error be raised. But, we can't - * quite yet, b/c of backcompat. - * - * Most of the time, this NotImplemented either got returned directly - * to the user (who can't do anything useful with it), or got passed - * back out of a special function like __mul__. And fortunately, for - * almost all special functions, the end result of this was a - * TypeError. Which is also what we get if we just continue without - * this special case, so this special case is unnecessary. - * - * The only thing that actually depended on the NotImplemented is - * array_richcompare, which did two things with it. First, it needed - * to see this NotImplemented in order to implement the special-case - * comparisons for - * - * string < <= == != >= > string - * void == != void - * - * Now it checks for those cases first, before trying to call the - * ufunc, so that's no problem. What it doesn't handle, though, is - * cases like - * - * float < string - * - * or - * - * float == void - * - * For those, it just let the NotImplemented bubble out, and accepted - * Python's default handling. And unfortunately, for comparisons, - * Python's default is *not* to raise an error. Instead, it returns - * something that depends on the operator: - * - * == return False - * != return True - * < <= >= > Python 2: use "fallback" (= weird and broken) ordering - * Python 3: raise TypeError (hallelujah) - * - * In most cases this is straightforwardly broken, because comparison - * of two arrays should always return an array, and here we end up - * returning a scalar. However, there is an exception: if we are - * comparing two scalars for equality, then it actually is correct to - * return a scalar bool instead of raising an error. If we just - * removed this special check entirely, then "np.float64(1) == 'foo'" - * would raise an error instead of returning False, which is genuinely - * wrong. - * - * The proper end goal here is: - * 1) == and != should be implemented in a proper vectorized way for - * all types. The short-term hack for this is just to add a - * special case to PyUFunc_DefaultLegacyInnerLoopSelector where - * if it can't find a comparison loop for the given types, and - * the ufunc is np.equal or np.not_equal, then it returns a loop - * that just fills the output array with False (resp. True). Then - * array_richcompare could trust that whenever its special cases - * don't apply, simply calling the ufunc will do the right thing, - * even without this special check. - * 2) < <= >= > should raise an error if no comparison function can - * be found. array_richcompare already handles all string <> - * string cases, and void dtypes don't have ordering, so again - * this would mean that array_richcompare could simply call the - * ufunc and it would do the right thing (i.e., raise an error), - * again without needing this special check. - * - * So this means that for the transition period, our goal is: - * == and != on scalars should simply return NotImplemented like - * they always did, since everything ends up working out correctly - * in this case only - * == and != on arrays should issue a FutureWarning and then return - * NotImplemented - * < <= >= > on all flexible dtypes on py2 should raise a - * DeprecationWarning, and then return NotImplemented. On py3 we - * skip the warning, though, b/c it would just be immediately be - * followed by an exception anyway. - * - * And for all other operations, we let things continue as normal. - */ - /* strcmp() is a hack but I think we can get away with it for this - * temporary measure. - */ - if (!strcmp(ufunc_name, "equal") || - !strcmp(ufunc_name, "not_equal")) { - /* Warn on non-scalar, return NotImplemented regardless */ - if (PyArray_NDIM(out_op[0]) != 0 || - PyArray_NDIM(out_op[1]) != 0) { - if (DEPRECATE_FUTUREWARNING( - "elementwise comparison failed; returning scalar " - "instead, but in the future will perform elementwise " - "comparison") < 0) { - goto fail; - } - } - Py_DECREF(out_op[0]); - Py_DECREF(out_op[1]); - return -2; - } - else if (!strcmp(ufunc_name, "less") || - !strcmp(ufunc_name, "less_equal") || - !strcmp(ufunc_name, "greater") || - !strcmp(ufunc_name, "greater_equal")) { -#if !defined(NPY_PY3K) - if (DEPRECATE("unorderable dtypes; returning scalar but in " - "the future this will be an error") < 0) { - goto fail; - } -#endif - Py_DECREF(out_op[0]); - Py_DECREF(out_op[1]); - return -2; - } } /* Get positional output arguments */ @@ -4480,22 +4317,7 @@ ufunc_generic_call(PyUFuncObject *ufunc, PyObject *args, PyObject *kwds) errval = PyUFunc_GenericFunction(ufunc, args, kwds, mps); if (errval < 0) { - if (errval == -1) { - return NULL; - } - else if (ufunc->nin == 2 && ufunc->nout == 1) { - /* - * For array_richcompare's benefit -- see the long comment in - * get_ufunc_arguments. - */ - Py_INCREF(Py_NotImplemented); - return Py_NotImplemented; - } - else { - PyErr_SetString(PyExc_TypeError, - "XX can't happen, please report a bug XX"); - return NULL; - } + return NULL; } /* Free the input references */ diff --git a/numpy/core/tests/test_deprecations.py b/numpy/core/tests/test_deprecations.py index 285b2de3c..8eb258666 100644 --- a/numpy/core/tests/test_deprecations.py +++ b/numpy/core/tests/test_deprecations.py @@ -190,10 +190,10 @@ class TestComparisonDeprecations(_DeprecationTestCase): b = np.array(['a', 'b', 'c']) assert_raises(ValueError, lambda x, y: x == y, a, b) - # The empty list is not cast to string, as this is only to document - # that fact (it likely should be changed). This means that the - # following works (and returns False) due to dtype mismatch: - a == [] + # The empty list is not cast to string, and this used to pass due + # to dtype mismatch; now (2018-06-21) it correctly leads to a + # FutureWarning. + assert_warns(FutureWarning, lambda: a == []) def test_void_dtype_equality_failures(self): class NotArray(object): diff --git a/numpy/core/tests/test_numerictypes.py b/numpy/core/tests/test_numerictypes.py index cdf1b0490..4c3cc6c9e 100644 --- a/numpy/core/tests/test_numerictypes.py +++ b/numpy/core/tests/test_numerictypes.py @@ -406,3 +406,9 @@ class TestIsSubDType(object): for w1, w2 in itertools.product(self.wrappers, repeat=2): assert_(not np.issubdtype(w1(np.float32), w2(np.float64))) assert_(not np.issubdtype(w1(np.float64), w2(np.float32))) + + +def TestSctypeDict(object): + def test_longdouble(self): + assert_(np.sctypeDict['f8'] is not np.longdouble) + assert_(np.sctypeDict['c16'] is not np.clongdouble) diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py index ef9ced354..0e564e305 100644 --- a/numpy/core/tests/test_ufunc.py +++ b/numpy/core/tests/test_ufunc.py @@ -1643,6 +1643,16 @@ class TestUfunc(object): target = np.array([ True, False, False, False], dtype=bool) assert_equal(np.all(target == (mra == ra[0])), True) + def test_scalar_equal(self): + # Scalar comparisons should always work, without deprecation warnings. + # even when the ufunc fails. + a = np.array(0.) + b = np.array('a') + assert_(a != b) + assert_(b != a) + assert_(not (a == b)) + assert_(not (b == a)) + def test_NotImplemented_not_returned(self): # See gh-5964 and gh-2091. Some of these functions are not operator # related and were fixed for other reasons in the past. @@ -1652,17 +1662,16 @@ class TestUfunc(object): np.bitwise_xor, np.left_shift, np.right_shift, np.fmax, np.fmin, np.fmod, np.hypot, np.logaddexp, np.logaddexp2, np.logical_and, np.logical_or, np.logical_xor, np.maximum, - np.minimum, np.mod - ] - - # These functions still return NotImplemented. Will be fixed in - # future. - # bad = [np.greater, np.greater_equal, np.less, np.less_equal, np.not_equal] + np.minimum, np.mod, + np.greater, np.greater_equal, np.less, np.less_equal, + np.equal, np.not_equal] a = np.array('1') b = 1 + c = np.array([1., 2.]) for f in binary_funcs: assert_raises(TypeError, f, a, b) + assert_raises(TypeError, f, c, a) def test_reduce_noncontig_output(self): # Check that reduction deals with non-contiguous output arrays |