diff options
24 files changed, 120 insertions, 45 deletions
diff --git a/doc/release/upcoming_changes/14800.improvement.rst b/doc/release/upcoming_changes/14800.improvement.rst new file mode 100644 index 000000000..158c31536 --- /dev/null +++ b/doc/release/upcoming_changes/14800.improvement.rst @@ -0,0 +1,14 @@ +Comparison on ``object`` dtypes will prefer ``object`` output +------------------------------------------------------------- +Comparison ufuncs (``np.equal`` and friends) would return boolean arrays when +the input array dtype was ``object``. This led to inconsistent behaviour for +ragged arrays ``a = np.array([1, np.array([1, 2, 3])], dtype=object)``. This +will now return an object array:: + + >>> a = np.array([1, np.array([1, 2, 3])], dtype=object) + >>> np.equal(a, a) + array([True, array([ True, True, True])], dtype=object) + +The old behaviour, which will raise a ``ValueError`` in this case, is still +available by specifying a dtype as ``np.equal(a, a, dtype=bool)``. + diff --git a/numpy/core/code_generators/generate_umath.py b/numpy/core/code_generators/generate_umath.py index e0b6a654c..760b9c919 100644 --- a/numpy/core/code_generators/generate_umath.py +++ b/numpy/core/code_generators/generate_umath.py @@ -226,7 +226,7 @@ chartoname = { 'P': 'OBJECT', } -all = '?bBhHiIlLqQefdgFDGOMm' +noobj = '?bBhHiIlLqQefdgFDGmM' O = 'O' P = 'P' ints = 'bBhHiIlLqQ' @@ -246,10 +246,8 @@ inexactvec = 'fd' noint = inexact+O nointP = inexact+P allP = bints+times+flts+cmplxP -nobool = all[1:] -noobj = all[:-3]+all[-2:] -nobool_or_obj = all[1:-3]+all[-2:] -nobool_or_datetime = all[1:-2]+all[-1:] +nobool_or_obj = noobj[1:] +nobool_or_datetime = noobj[1:-1] + O # includes m - timedelta64 intflt = ints+flts intfltcmplx = ints+flts+cmplx nocmplx = bints+times+flts @@ -431,43 +429,49 @@ defdict = { Ufunc(2, 1, None, docstrings.get('numpy.core.umath.greater'), 'PyUFunc_SimpleBinaryComparisonTypeResolver', - TD(all, out='?', simd=[('avx2', ints)]), + TD(noobj, out='?', simd=[('avx2', ints)]), [TypeDescription('O', FullTypeDescr, 'OO', 'O')], + TD('O', out='?'), ), 'greater_equal': Ufunc(2, 1, None, docstrings.get('numpy.core.umath.greater_equal'), 'PyUFunc_SimpleBinaryComparisonTypeResolver', - TD(all, out='?', simd=[('avx2', ints)]), + TD(noobj, out='?', simd=[('avx2', ints)]), [TypeDescription('O', FullTypeDescr, 'OO', 'O')], + TD('O', out='?'), ), 'less': Ufunc(2, 1, None, docstrings.get('numpy.core.umath.less'), 'PyUFunc_SimpleBinaryComparisonTypeResolver', - TD(all, out='?', simd=[('avx2', ints)]), + TD(noobj, out='?', simd=[('avx2', ints)]), [TypeDescription('O', FullTypeDescr, 'OO', 'O')], + TD('O', out='?'), ), 'less_equal': Ufunc(2, 1, None, docstrings.get('numpy.core.umath.less_equal'), 'PyUFunc_SimpleBinaryComparisonTypeResolver', - TD(all, out='?', simd=[('avx2', ints)]), + TD(noobj, out='?', simd=[('avx2', ints)]), [TypeDescription('O', FullTypeDescr, 'OO', 'O')], + TD('O', out='?'), ), 'equal': Ufunc(2, 1, None, docstrings.get('numpy.core.umath.equal'), 'PyUFunc_SimpleBinaryComparisonTypeResolver', - TD(all, out='?', simd=[('avx2', ints)]), + TD(noobj, out='?', simd=[('avx2', ints)]), [TypeDescription('O', FullTypeDescr, 'OO', 'O')], + TD('O', out='?'), ), 'not_equal': Ufunc(2, 1, None, docstrings.get('numpy.core.umath.not_equal'), 'PyUFunc_SimpleBinaryComparisonTypeResolver', - TD(all, out='?', simd=[('avx2', ints)]), + TD(noobj, out='?', simd=[('avx2', ints)]), [TypeDescription('O', FullTypeDescr, 'OO', 'O')], + TD('O', out='?'), ), 'logical_and': Ufunc(2, 1, True_, @@ -475,6 +479,7 @@ defdict = { 'PyUFunc_SimpleBinaryComparisonTypeResolver', TD(nodatetime_or_obj, out='?', simd=[('avx2', ints)]), TD(O, f='npy_ObjectLogicalAnd'), + TD(O, f='npy_ObjectLogicalAnd', out='?'), ), 'logical_not': Ufunc(1, 1, None, @@ -482,6 +487,7 @@ defdict = { None, TD(nodatetime_or_obj, out='?', simd=[('avx2', ints)]), TD(O, f='npy_ObjectLogicalNot'), + TD(O, f='npy_ObjectLogicalNot', out='?'), ), 'logical_or': Ufunc(2, 1, False_, @@ -489,6 +495,7 @@ defdict = { 'PyUFunc_SimpleBinaryComparisonTypeResolver', TD(nodatetime_or_obj, out='?', simd=[('avx2', ints)]), TD(O, f='npy_ObjectLogicalOr'), + TD(O, f='npy_ObjectLogicalOr', out='?'), ), 'logical_xor': Ufunc(2, 1, False_, diff --git a/numpy/core/fromnumeric.py b/numpy/core/fromnumeric.py index 5f7716455..6e5f3dabf 100644 --- a/numpy/core/fromnumeric.py +++ b/numpy/core/fromnumeric.py @@ -796,7 +796,9 @@ def argpartition(a, kth, axis=-1, kind='introselect', order=None): -------- partition : Describes partition algorithms used. ndarray.partition : Inplace partition. - argsort : Full indirect sort + argsort : Full indirect sort. + take_along_axis : Apply ``index_array`` from argpartition + to an array as if by calling partition. Notes ----- @@ -816,6 +818,14 @@ def argpartition(a, kth, axis=-1, kind='introselect', order=None): >>> np.array(x)[np.argpartition(x, 3)] array([2, 1, 3, 4]) + Multi-dimensional array: + + >>> x = np.array([[3, 4, 2], [1, 3, 1]]) + >>> index_array = np.argpartition(x, kth=1, axis=-1) + >>> np.take_along_axis(x, index_array, axis=-1) # same as np.partition(x, kth=1) + array([[2, 3, 4], + [1, 1, 3]]) + """ return _wrapfunc(a, 'argpartition', kth, axis=axis, kind=kind, order=order) @@ -1025,6 +1035,8 @@ def argsort(a, axis=-1, kind=None, order=None): lexsort : Indirect stable sort with multiple keys. ndarray.sort : Inplace sort. argpartition : Indirect partial sort. + take_along_axis : Apply ``index_array`` from argsort + to an array as if by calling sort. Notes ----- @@ -1120,6 +1132,8 @@ def argmax(a, axis=None, out=None): ndarray.argmax, argmin amax : The maximum value along a given axis. unravel_index : Convert a flat index into an index tuple. + take_along_axis : Apply ``np.expand_dims(index_array, axis)`` + from argmax to an array as if by calling max. Notes ----- @@ -1154,6 +1168,16 @@ def argmax(a, axis=None, out=None): >>> np.argmax(b) # Only the first occurrence is returned. 1 + >>> x = np.array([[4,2,3], [1,0,3]]) + >>> index_array = np.argmax(x, axis=-1) + >>> # Same as np.max(x, axis=-1, keepdims=True) + >>> np.take_along_axis(x, np.expand_dims(index_array, axis=-1), axis=-1) + array([[4], + [3]]) + >>> # Same as np.max(x, axis=-1) + >>> np.take_along_axis(x, np.expand_dims(index_array, axis=-1), axis=-1).squeeze(axis=-1) + array([4, 3]) + """ return _wrapfunc(a, 'argmax', axis=axis, out=out) @@ -1189,6 +1213,8 @@ def argmin(a, axis=None, out=None): ndarray.argmin, argmax amin : The minimum value along a given axis. unravel_index : Convert a flat index into an index tuple. + take_along_axis : Apply ``np.expand_dims(index_array, axis)`` + from argmin to an array as if by calling min. Notes ----- @@ -1223,6 +1249,16 @@ def argmin(a, axis=None, out=None): >>> np.argmin(b) # Only the first occurrence is returned. 0 + >>> x = np.array([[4,2,3], [1,0,3]]) + >>> index_array = np.argmin(x, axis=-1) + >>> # Same as np.min(x, axis=-1, keepdims=True) + >>> np.take_along_axis(x, np.expand_dims(index_array, axis=-1), axis=-1) + array([[2], + [0]]) + >>> # Same as np.max(x, axis=-1) + >>> np.take_along_axis(x, np.expand_dims(index_array, axis=-1), axis=-1).squeeze(axis=-1) + array([2, 0]) + """ return _wrapfunc(a, 'argmin', axis=axis, out=out) diff --git a/numpy/core/setup.py b/numpy/core/setup.py index a33318472..a4b5cfe5f 100644 --- a/numpy/core/setup.py +++ b/numpy/core/setup.py @@ -774,7 +774,7 @@ def configuration(parent_package='',top_path=None): join('src', 'multiarray', 'arrayobject.h'), join('src', 'multiarray', 'arraytypes.h'), join('src', 'multiarray', 'arrayfunction_override.h'), - join('src', 'multiarray', 'buffer.h'), + join('src', 'multiarray', 'npy_buffer.h'), join('src', 'multiarray', 'calculation.h'), join('src', 'multiarray', 'common.h'), join('src', 'multiarray', 'convert_datatype.h'), diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py index 84b78b585..6356f08ba 100644 --- a/numpy/core/setup_common.py +++ b/numpy/core/setup_common.py @@ -266,8 +266,9 @@ def check_long_double_representation(cmd): except ValueError: # try linking to support CC="gcc -flto" or icc -ipo # struct needs to be volatile so it isn't optimized away + # additionally "clang -flto" requires the foo struct to be used body = body.replace('struct', 'volatile struct') - body += "int main(void) { return 0; }\n" + body += "int main(void) { return foo.before[0]; }\n" src, obj = cmd._compile(body, None, None, 'c') cmd.temp_files.append("_configtest") cmd.compiler.link_executable([obj], "_configtest") diff --git a/numpy/core/src/multiarray/arrayobject.c b/numpy/core/src/multiarray/arrayobject.c index 5ed5b7635..a5cebfbd8 100644 --- a/numpy/core/src/multiarray/arrayobject.c +++ b/numpy/core/src/multiarray/arrayobject.c @@ -48,7 +48,7 @@ maintainer email: oliphant.travis@ieee.org #include "mapping.h" #include "getset.h" #include "sequence.h" -#include "buffer.h" +#include "npy_buffer.h" #include "array_assign.h" #include "alloc.h" #include "mem_overlap.h" diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src index 5c6699a3b..e36b95c00 100644 --- a/numpy/core/src/multiarray/arraytypes.c.src +++ b/numpy/core/src/multiarray/arraytypes.c.src @@ -36,7 +36,7 @@ #include "cblasfuncs.h" #include "npy_cblas.h" -#include "buffer.h" +#include "npy_buffer.h" /* check for sequences, but ignore the types numpy considers scalars */ static NPY_INLINE npy_bool diff --git a/numpy/core/src/multiarray/buffer.c b/numpy/core/src/multiarray/buffer.c index b729027ad..0edadee98 100644 --- a/numpy/core/src/multiarray/buffer.c +++ b/numpy/core/src/multiarray/buffer.c @@ -11,7 +11,7 @@ #include "npy_pycompat.h" -#include "buffer.h" +#include "npy_buffer.h" #include "common.h" #include "numpyos.h" #include "arrayobject.h" diff --git a/numpy/core/src/multiarray/common.c b/numpy/core/src/multiarray/common.c index a71b0818c..c991f7428 100644 --- a/numpy/core/src/multiarray/common.c +++ b/numpy/core/src/multiarray/common.c @@ -12,7 +12,7 @@ #include "usertypes.h" #include "common.h" -#include "buffer.h" +#include "npy_buffer.h" #include "get_attr_string.h" #include "mem_overlap.h" diff --git a/numpy/core/src/multiarray/conversion_utils.c b/numpy/core/src/multiarray/conversion_utils.c index 5f0ad5817..ca126b4b1 100644 --- a/numpy/core/src/multiarray/conversion_utils.c +++ b/numpy/core/src/multiarray/conversion_utils.c @@ -16,7 +16,7 @@ #include "conversion_utils.h" #include "alloc.h" -#include "buffer.h" +#include "npy_buffer.h" static int PyArray_PyIntAsInt_ErrMsg(PyObject *o, const char * msg) NPY_GCC_NONNULL(2); diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index 62804b979..64933ae1b 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -19,7 +19,7 @@ #include "ctors.h" #include "convert_datatype.h" #include "shape.h" -#include "buffer.h" +#include "npy_buffer.h" #include "lowlevel_strided_loops.h" #include "methods.h" #include "_datetime.h" diff --git a/numpy/core/src/multiarray/descriptor.c b/numpy/core/src/multiarray/descriptor.c index 522b69307..d4e18e457 100644 --- a/numpy/core/src/multiarray/descriptor.c +++ b/numpy/core/src/multiarray/descriptor.c @@ -19,7 +19,7 @@ #include "descriptor.h" #include "alloc.h" #include "assert.h" -#include "buffer.h" +#include "npy_buffer.h" /* * offset: A starting offset. diff --git a/numpy/core/src/multiarray/getset.c b/numpy/core/src/multiarray/getset.c index 116e37ce5..6e5d480d0 100644 --- a/numpy/core/src/multiarray/getset.c +++ b/numpy/core/src/multiarray/getset.c @@ -20,7 +20,7 @@ #include "arrayobject.h" #include "mem_overlap.h" #include "alloc.h" -#include "buffer.h" +#include "npy_buffer.h" /******************* array attribute get and set routines ******************/ diff --git a/numpy/core/src/multiarray/buffer.h b/numpy/core/src/multiarray/npy_buffer.h index fae413c85..fae413c85 100644 --- a/numpy/core/src/multiarray/buffer.h +++ b/numpy/core/src/multiarray/npy_buffer.h diff --git a/numpy/core/src/multiarray/scalartypes.c.src b/numpy/core/src/multiarray/scalartypes.c.src index 9adca6773..32d712e0c 100644 --- a/numpy/core/src/multiarray/scalartypes.c.src +++ b/numpy/core/src/multiarray/scalartypes.c.src @@ -28,7 +28,7 @@ #include "npy_import.h" #include "dragon4.h" #include "npy_longdouble.h" -#include "buffer.h" +#include "npy_buffer.h" #include <stdlib.h> diff --git a/numpy/core/tests/test_deprecations.py b/numpy/core/tests/test_deprecations.py index 8bffaa9af..9bdcd8241 100644 --- a/numpy/core/tests/test_deprecations.py +++ b/numpy/core/tests/test_deprecations.py @@ -172,10 +172,11 @@ class TestComparisonDeprecations(_DeprecationTestCase): # (warning is issued a couple of times here) self.assert_deprecated(op, args=(a, a[:-1]), num=None) - # Element comparison error (numpy array can't be compared). + # ragged array comparison returns True/False a = np.array([1, np.array([1,2,3])], dtype=object) b = np.array([1, np.array([1,2,3])], dtype=object) - self.assert_deprecated(op, args=(a, b), num=None) + res = op(a, b) + assert res.dtype == 'object' def test_string(self): # For two string arrays, strings always raised the broadcasting error: diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py index 707c690dd..d9f961581 100644 --- a/numpy/core/tests/test_ufunc.py +++ b/numpy/core/tests/test_ufunc.py @@ -1090,14 +1090,18 @@ class TestUfunc(object): return '==' arr0d = np.array(HasComparisons()) - assert_equal(arr0d == arr0d, True) - assert_equal(np.equal(arr0d, arr0d), True) # normal behavior is a cast + assert_equal(arr0d == arr0d, '==') + assert_equal(np.equal(arr0d, arr0d), '==') + assert_equal(np.equal(arr0d, arr0d, dtype=bool), True) assert_equal(np.equal(arr0d, arr0d, dtype=object), '==') arr1d = np.array([HasComparisons()]) - assert_equal(arr1d == arr1d, np.array([True])) - assert_equal(np.equal(arr1d, arr1d), np.array([True])) # normal behavior is a cast - assert_equal(np.equal(arr1d, arr1d, dtype=object), np.array(['=='])) + ret_obj = np.array(['=='], dtype=object) + ret_bool = np.array([True]) + assert_equal(arr1d == arr1d, ret_obj) + assert_equal(np.equal(arr1d, arr1d), ret_obj) + assert_equal(np.equal(arr1d, arr1d, dtype=object), ret_obj) + assert_equal(np.equal(arr1d, arr1d, dtype=bool), ret_bool) def test_object_array_reduction(self): # Reductions on object arrays diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py index 9b4ce9e47..96a9f1f8b 100644 --- a/numpy/core/tests/test_umath.py +++ b/numpy/core/tests/test_umath.py @@ -170,10 +170,11 @@ class TestOut(object): class TestComparisons(object): def test_ignore_object_identity_in_equal(self): - # Check error raised when comparing identical objects whose comparison + # Check comparing identical objects whose comparison # is not a simple boolean, e.g., arrays that are compared elementwise. a = np.array([np.array([1, 2, 3]), None], dtype=object) - assert_raises(ValueError, np.equal, a, a) + b = np.equal(a, a.copy()) + assert b.shape == a.shape # Check error raised when comparing identical non-comparable objects. class FunkyType(object): @@ -188,10 +189,11 @@ class TestComparisons(object): assert_equal(np.equal(a, a), [False]) def test_ignore_object_identity_in_not_equal(self): - # Check error raised when comparing identical objects whose comparison + # Check comparing identical objects whose comparison # is not a simple boolean, e.g., arrays that are compared elementwise. a = np.array([np.array([1, 2, 3]), None], dtype=object) - assert_raises(ValueError, np.not_equal, a, a) + b = np.not_equal(a, a.copy()) + assert b.shape == a.shape # Check error raised when comparing identical non-comparable objects. class FunkyType(object): diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index 2309f7e42..cf45e181b 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -562,11 +562,15 @@ def in1d(ar1, ar2, assume_unique=False, invert=False): if invert: mask = np.ones(len(ar1), dtype=bool) for a in ar2: - mask &= (ar1 != a) + # convert object arrays to bool + # cannot use np.not_equal until 'S' and 'U' have loops + mask &= (ar1 != a).astype(bool) else: mask = np.zeros(len(ar1), dtype=bool) for a in ar2: - mask |= (ar1 == a) + # convert object arrays to bool + # cannot use np.equal until 'S' and 'U' have loops + mask |= (ar1 == a).astype(bool) return mask # Otherwise use sorting diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index 18ccab3b8..457cca146 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -99,7 +99,7 @@ def _replace_nan(a, val): if a.dtype == np.object_: # object arrays do not support `isnan` (gh-9009), so make a guess - mask = a != a + mask = np.not_equal(a, a, dtype=bool) elif issubclass(a.dtype.type, np.inexact): mask = np.isnan(a) else: diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py index 1eae8ccfb..9075ff538 100644 --- a/numpy/lib/tests/test_function_base.py +++ b/numpy/lib/tests/test_function_base.py @@ -2523,7 +2523,7 @@ class TestPercentile(object): assert_equal(np.percentile(x, 0, interpolation='nearest'), np.nan) def test_fraction(self): - x = [Fraction(i, 2) for i in np.arange(8)] + x = [Fraction(i, 2) for i in range(8)] p = np.percentile(x, Fraction(0)) assert_equal(p, Fraction(0)) @@ -2943,7 +2943,7 @@ class TestQuantile(object): def test_fraction(self): # fractional input, integral quantile - x = [Fraction(i, 2) for i in np.arange(8)] + x = [Fraction(i, 2) for i in range(8)] q = np.quantile(x, 0) assert_equal(q, 0) diff --git a/numpy/linalg/tests/test_regression.py b/numpy/linalg/tests/test_regression.py index bd3a45872..289566109 100644 --- a/numpy/linalg/tests/test_regression.py +++ b/numpy/linalg/tests/test_regression.py @@ -109,10 +109,9 @@ class TestRegression(object): assert_raises(ValueError, linalg.norm, testvector, ord='nuc') assert_raises(ValueError, linalg.norm, testvector, ord=np.inf) assert_raises(ValueError, linalg.norm, testvector, ord=-np.inf) - with warnings.catch_warnings(): - warnings.simplefilter("error", DeprecationWarning) - assert_raises((AttributeError, DeprecationWarning), - linalg.norm, testvector, ord=0) + # Succeeds, equivalent to "sum(x != 0)" + r = linalg.norm(testvector, ord=0) + assert_(r.dtype == 'bool') assert_raises(ValueError, linalg.norm, testvector, ord=-1) assert_raises(ValueError, linalg.norm, testvector, ord=-2) diff --git a/numpy/ma/core.py b/numpy/ma/core.py index bb0d8d412..f98a29d82 100644 --- a/numpy/ma/core.py +++ b/numpy/ma/core.py @@ -4790,7 +4790,12 @@ class MaskedArray(ndarray): mask = _check_mask_axis(self._mask, axis, **kwargs) if out is None: - d = self.filled(True).all(axis=axis, **kwargs).view(type(self)) + r = self.filled(True).all(axis=axis, **kwargs) + # object dtypes with axis=None return a scalar + if isinstance(r, bool): + d = type(self)(r) + else: + d = r.view(type(self)) if d.ndim: d.__setmask__(mask) elif mask: diff --git a/tools/travis-test.sh b/tools/travis-test.sh index e04a33143..549a2d570 100755 --- a/tools/travis-test.sh +++ b/tools/travis-test.sh @@ -53,7 +53,9 @@ setup_base() $PYTHON setup.py build build_src --verbose-cfg build_ext --inplace 2>&1 | tee log fi grep -v "_configtest" log \ - | grep -vE "ld returned 1|no previously-included files matching|manifest_maker: standard file '-c'" \ + | grep -vE "ld returned 1|no files found matching" \ + | grep -vE "no previously-included files matching" \ + | grep -vE "manifest_maker: standard file '-c'" \ | grep -E "warning\>" \ | tee warnings if [ "$LAPACK" != "None" ]; then |