diff options
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/core/src/multiarray/temp_elide.c | 20 | ||||
-rw-r--r-- | numpy/ma/core.py | 171 | ||||
-rw-r--r-- | numpy/ma/tests/test_core.py | 103 | ||||
-rw-r--r-- | numpy/matrixlib/defmatrix.py | 44 | ||||
-rw-r--r-- | numpy/matrixlib/tests/test_defmatrix.py | 9 | ||||
-rw-r--r-- | numpy/random/tests/test_random.py | 10 |
6 files changed, 211 insertions, 146 deletions
diff --git a/numpy/core/src/multiarray/temp_elide.c b/numpy/core/src/multiarray/temp_elide.c index 84612da3f..fae6763e4 100644 --- a/numpy/core/src/multiarray/temp_elide.c +++ b/numpy/core/src/multiarray/temp_elide.c @@ -10,7 +10,7 @@ /* * Functions used to try to avoid/elide temporaries in python expressions - * of type a + b + b by translating some operations into inplace operations. + * of type a + b + b by translating some operations into in-place operations. * This example translates to this bytecode: * * 0 LOAD_FAST 0 (a) @@ -23,7 +23,7 @@ * instructions so they always have a reference count larger than 1. * The temporary of the first BINARY_ADD on the other hand only has a count of * 1. Only temporaries can have a count of 1 in python so we can use this to - * transform the second operation into an inplace operation and not affect the + * transform the second operation into an in-place operation and not affect the * output of the program. * CPython does the same thing to resize memory instead of copying when doing * string concatenation. @@ -41,19 +41,19 @@ * This is an expensive operation so temporaries are only avoided for rather * large arrays. * - * A possible future improvement would be to change cpython to give as access + * A possible future improvement would be to change cpython to give us access * to the top of the stack. Then we could just check that the objects involved * are on the cpython stack instead of checking the function callstack. * - * Elision can be applied to all operations that do have inplace variants and + * Elision can be applied to all operations that do have in-place variants and * do not change types (addition, subtraction, multiplication, float division, * logical and bitwise operations ...) * For commutative operations (addition, multiplication, ...) if eliding into - * the lefthand side fails it can succedd on the righthand side by swapping the + * the lefthand side fails it can succeed on the righthand side by swapping the * arguments. E.g. b * (a * 2) can be elided by changing it to (2 * a) * b. * - * TODO only supports systems with backtrace(), windows can probably be - * supported too by using the appropriate windows apis. + * TODO only supports systems with backtrace(), Windows can probably be + * supported too by using the appropriate Windows APIs. */ #if defined HAVE_BACKTRACE && defined HAVE_DLFCN_H && ! defined PYPY_VERSION @@ -69,7 +69,7 @@ #endif /* * Heuristic size of the array in bytes at which backtrace overhead generation - * becomes less than speed gained by inplace operations. Depends on stack depth + * becomes less than speed gained by in-place operations. Depends on stack depth * being checked. Measurements with 10 stacks show it getting worthwhile * around 100KiB but to be conservative put it higher around where the L2 cache * spills. @@ -79,7 +79,7 @@ #else /* * in debug mode always elide but skip scalars as these can convert to 0d array - * during in place operations + * during in-place operations */ #define NPY_MIN_ELIDE_BYTES (32) #endif @@ -272,7 +272,7 @@ check_callers(int * cannot) /* * check if in "alhs @op@ orhs" that alhs is a temporary (refcnt == 1) so we - * can do inplace operations instead of creating a new temporary + * can do in-place operations instead of creating a new temporary * "cannot" is set to true if it cannot be done even with swapped arguments */ static int diff --git a/numpy/ma/core.py b/numpy/ma/core.py index 3b2b39b18..ea4a1d85f 100644 --- a/numpy/ma/core.py +++ b/numpy/ma/core.py @@ -23,6 +23,7 @@ Released for unlimited redistribution. from __future__ import division, absolute_import, print_function import sys +import operator import warnings from functools import reduce @@ -1602,21 +1603,11 @@ def make_mask(m, copy=False, shrink=True, dtype=MaskType): """ if m is nomask: return nomask - elif isinstance(m, ndarray): - # We won't return after this point to make sure we can shrink the mask - # Fill the mask in case there are missing data - m = filled(m, True) - # Make sure the input dtype is valid - dtype = make_mask_descr(dtype) - if m.dtype == dtype: - if copy: - result = m.copy() - else: - result = m - else: - result = np.array(m, dtype=dtype, copy=copy) - else: - result = np.array(filled(m, True), dtype=MaskType) + + # Make sure the input dtype is valid. + dtype = make_mask_descr(dtype) + # Fill the mask in case there are missing data; turn it into an ndarray. + result = np.array(filled(m, True), copy=copy, dtype=dtype, subok=True) # Bas les masques ! if shrink and (not result.dtype.names) and (not result.any()): return nomask @@ -1733,7 +1724,8 @@ def mask_or(m1, m2, copy=False, shrink=True): if (dtype1 != dtype2): raise ValueError("Incompatible dtypes '%s'<>'%s'" % (dtype1, dtype2)) if dtype1.names: - newmask = np.empty_like(m1) + # Allocate an output mask array with the properly broadcast shape. + newmask = np.empty(np.broadcast(m1, m2).shape, dtype1) _recursive_mask_or(m1, m2, newmask) return newmask return make_mask(umath.logical_or(m1, m2), copy=copy, shrink=shrink) @@ -3873,81 +3865,84 @@ class MaskedArray(ndarray): return True return False - def __eq__(self, other): - """ - Check whether other equals self elementwise. + def _comparison(self, other, compare): + """Compare self with other using operator.eq or operator.ne. + + When either of the elements is masked, the result is masked as well, + but the underlying boolean data are still set, with self and other + considered equal if both are masked, and unequal otherwise. + For structured arrays, all fields are combined, with masked values + ignored. The result is masked if all fields were masked, with self + and other considered equal only if both were fully masked. """ - if self is masked: - return masked omask = getmask(other) - if omask is nomask: - check = self.filled(0).__eq__(other) - try: - check = check.view(type(self)) - check._mask = self._mask - except AttributeError: - # Dang, we have a bool instead of an array: return the bool - return check + smask = self.mask + mask = mask_or(smask, omask, copy=True) + + odata = getdata(other) + if mask.dtype.names: + # For possibly masked structured arrays we need to be careful, + # since the standard structured array comparison will use all + # fields, masked or not. To avoid masked fields influencing the + # outcome, we set all masked fields in self to other, so they'll + # count as equal. To prepare, we ensure we have the right shape. + broadcast_shape = np.broadcast(self, odata).shape + sbroadcast = np.broadcast_to(self, broadcast_shape, subok=True) + sbroadcast._mask = mask + sdata = sbroadcast.filled(odata) + # Now take care of the mask; the merged mask should have an item + # masked if all fields were masked (in one and/or other). + mask = (mask == np.ones((), mask.dtype)) + else: - odata = filled(other, 0) - check = self.filled(0).__eq__(odata).view(type(self)) - if self._mask is nomask: - check._mask = omask - else: - mask = mask_or(self._mask, omask) - if mask.dtype.names: - if mask.size > 1: - axis = 1 - else: - axis = None - try: - mask = mask.view((bool_, len(self.dtype))).all(axis) - except (ValueError, np.AxisError): - # TODO: what error are we trying to catch here? - # invalid axis, or invalid view? - mask = np.all([[f[n].all() for n in mask.dtype.names] - for f in mask], axis=axis) - check._mask = mask + # For regular arrays, just use the data as they come. + sdata = self.data + + check = compare(sdata, odata) + + if isinstance(check, (np.bool_, bool)): + return masked if mask else check + + if mask is not nomask: + # Adjust elements that were masked, which should be treated + # as equal if masked in both, unequal if masked in one. + # Note that this works automatically for structured arrays too. + check = np.where(mask, compare(smask, omask), check) + if mask.shape != check.shape: + # Guarantee consistency of the shape, making a copy since the + # the mask may need to get written to later. + mask = np.broadcast_to(mask, check.shape).copy() + + check = check.view(type(self)) + check._mask = mask return check - def __ne__(self, other): + def __eq__(self, other): + """Check whether other equals self elementwise. + + When either of the elements is masked, the result is masked as well, + but the underlying boolean data are still set, with self and other + considered equal if both are masked, and unequal otherwise. + + For structured arrays, all fields are combined, with masked values + ignored. The result is masked if all fields were masked, with self + and other considered equal only if both were fully masked. """ - Check whether other doesn't equal self elementwise + return self._comparison(other, operator.eq) + def __ne__(self, other): + """Check whether other does not equal self elementwise. + + When either of the elements is masked, the result is masked as well, + but the underlying boolean data are still set, with self and other + considered equal if both are masked, and unequal otherwise. + + For structured arrays, all fields are combined, with masked values + ignored. The result is masked if all fields were masked, with self + and other considered equal only if both were fully masked. """ - if self is masked: - return masked - omask = getmask(other) - if omask is nomask: - check = self.filled(0).__ne__(other) - try: - check = check.view(type(self)) - check._mask = self._mask - except AttributeError: - # In case check is a boolean (or a numpy.bool) - return check - else: - odata = filled(other, 0) - check = self.filled(0).__ne__(odata).view(type(self)) - if self._mask is nomask: - check._mask = omask - else: - mask = mask_or(self._mask, omask) - if mask.dtype.names: - if mask.size > 1: - axis = 1 - else: - axis = None - try: - mask = mask.view((bool_, len(self.dtype))).all(axis) - except (ValueError, np.AxisError): - # TODO: what error are we trying to catch here? - # invalid axis, or invalid view? - mask = np.all([[f[n].all() for n in mask.dtype.names] - for f in mask], axis=axis) - check._mask = mask - return check + return self._comparison(other, operator.ne) def __add__(self, other): """ @@ -5640,6 +5635,18 @@ class MaskedArray(ndarray): np.subtract(out, min_value, out=out, casting='unsafe') return out + def partition(self, *args, **kwargs): + warnings.warn("Warning: 'partition' will ignore the 'mask' " + "of the {}.".format(self.__class__.__name__), + stacklevel=2) + return super(MaskedArray, self).partition(*args, **kwargs) + + def argpartition(self, *args, **kwargs): + warnings.warn("Warning: 'argpartition' will ignore the 'mask' " + "of the {}.".format(self.__class__.__name__), + stacklevel=2) + return super(MaskedArray, self).argpartition(*args, **kwargs) + def take(self, indices, axis=None, out=None, mode='raise'): """ """ diff --git a/numpy/ma/tests/test_core.py b/numpy/ma/tests/test_core.py index f9d032f09..ca1ef16c4 100644 --- a/numpy/ma/tests/test_core.py +++ b/numpy/ma/tests/test_core.py @@ -1335,32 +1335,96 @@ class TestMaskedArrayArithmetic(TestCase): ndtype = [('A', int), ('B', int)] a = array([(1, 1), (2, 2)], mask=[(0, 1), (0, 0)], dtype=ndtype) test = (a == a) - assert_equal(test, [True, True]) + assert_equal(test.data, [True, True]) + assert_equal(test.mask, [False, False]) + test = (a == a[0]) + assert_equal(test.data, [True, False]) assert_equal(test.mask, [False, False]) b = array([(1, 1), (2, 2)], mask=[(1, 0), (0, 0)], dtype=ndtype) test = (a == b) - assert_equal(test, [False, True]) + assert_equal(test.data, [False, True]) + assert_equal(test.mask, [True, False]) + test = (a[0] == b) + assert_equal(test.data, [False, False]) assert_equal(test.mask, [True, False]) b = array([(1, 1), (2, 2)], mask=[(0, 1), (1, 0)], dtype=ndtype) test = (a == b) - assert_equal(test, [True, False]) + assert_equal(test.data, [True, True]) assert_equal(test.mask, [False, False]) + # complicated dtype, 2-dimensional array. + ndtype = [('A', int), ('B', [('BA', int), ('BB', int)])] + a = array([[(1, (1, 1)), (2, (2, 2))], + [(3, (3, 3)), (4, (4, 4))]], + mask=[[(0, (1, 0)), (0, (0, 1))], + [(1, (0, 0)), (1, (1, 1))]], dtype=ndtype) + test = (a[0, 0] == a) + assert_equal(test.data, [[True, False], [False, False]]) + assert_equal(test.mask, [[False, False], [False, True]]) def test_ne_on_structured(self): # Test the equality of structured arrays ndtype = [('A', int), ('B', int)] a = array([(1, 1), (2, 2)], mask=[(0, 1), (0, 0)], dtype=ndtype) test = (a != a) - assert_equal(test, [False, False]) + assert_equal(test.data, [False, False]) + assert_equal(test.mask, [False, False]) + test = (a != a[0]) + assert_equal(test.data, [False, True]) assert_equal(test.mask, [False, False]) b = array([(1, 1), (2, 2)], mask=[(1, 0), (0, 0)], dtype=ndtype) test = (a != b) - assert_equal(test, [True, False]) + assert_equal(test.data, [True, False]) + assert_equal(test.mask, [True, False]) + test = (a[0] != b) + assert_equal(test.data, [True, True]) assert_equal(test.mask, [True, False]) b = array([(1, 1), (2, 2)], mask=[(0, 1), (1, 0)], dtype=ndtype) test = (a != b) - assert_equal(test, [False, True]) + assert_equal(test.data, [False, False]) assert_equal(test.mask, [False, False]) + # complicated dtype, 2-dimensional array. + ndtype = [('A', int), ('B', [('BA', int), ('BB', int)])] + a = array([[(1, (1, 1)), (2, (2, 2))], + [(3, (3, 3)), (4, (4, 4))]], + mask=[[(0, (1, 0)), (0, (0, 1))], + [(1, (0, 0)), (1, (1, 1))]], dtype=ndtype) + test = (a[0, 0] != a) + assert_equal(test.data, [[False, True], [True, True]]) + assert_equal(test.mask, [[False, False], [False, True]]) + + def test_eq_ne_structured_extra(self): + # ensure simple examples are symmetric and make sense. + # from https://github.com/numpy/numpy/pull/8590#discussion_r101126465 + dt = np.dtype('i4,i4') + for m1 in (mvoid((1, 2), mask=(0, 0), dtype=dt), + mvoid((1, 2), mask=(0, 1), dtype=dt), + mvoid((1, 2), mask=(1, 0), dtype=dt), + mvoid((1, 2), mask=(1, 1), dtype=dt)): + ma1 = m1.view(MaskedArray) + r1 = ma1.view('2i4') + for m2 in (np.array((1, 1), dtype=dt), + mvoid((1, 1), dtype=dt), + mvoid((1, 0), mask=(0, 1), dtype=dt), + mvoid((3, 2), mask=(0, 1), dtype=dt)): + ma2 = m2.view(MaskedArray) + r2 = ma2.view('2i4') + eq_expected = (r1 == r2).all() + assert_equal(m1 == m2, eq_expected) + assert_equal(m2 == m1, eq_expected) + assert_equal(ma1 == m2, eq_expected) + assert_equal(m1 == ma2, eq_expected) + assert_equal(ma1 == ma2, eq_expected) + # Also check it is the same if we do it element by element. + el_by_el = [m1[name] == m2[name] for name in dt.names] + assert_equal(array(el_by_el, dtype=bool).all(), eq_expected) + ne_expected = (r1 != r2).any() + assert_equal(m1 != m2, ne_expected) + assert_equal(m2 != m1, ne_expected) + assert_equal(ma1 != m2, ne_expected) + assert_equal(m1 != ma2, ne_expected) + assert_equal(ma1 != ma2, ne_expected) + el_by_el = [m1[name] != m2[name] for name in dt.names] + assert_equal(array(el_by_el, dtype=bool).any(), ne_expected) def test_eq_with_None(self): # Really, comparisons with None should not be done, but check them @@ -1393,6 +1457,22 @@ class TestMaskedArrayArithmetic(TestCase): assert_equal(a == 0, False) assert_equal(a != 1, False) assert_equal(a != 0, True) + b = array(1, mask=True) + assert_equal(b == 0, masked) + assert_equal(b == 1, masked) + assert_equal(b != 0, masked) + assert_equal(b != 1, masked) + + def test_eq_different_dimensions(self): + m1 = array([1, 1], mask=[0, 1]) + # test comparison with both masked and regular arrays. + for m2 in (array([[0, 1], [1, 2]]), + np.array([[0, 1], [1, 2]])): + test = (m1 == m2) + assert_equal(test.data, [[False, False], + [True, False]]) + assert_equal(test.mask, [[False, True], + [False, True]]) def test_numpyarithmetics(self): # Check that the mask is not back-propagated when using numpy functions @@ -3978,7 +4058,15 @@ class TestMaskedArrayFunctions(TestCase): test = make_mask(mask, dtype=mask.dtype) assert_equal(test.dtype, bdtype) assert_equal(test, np.array([(0, 0), (0, 1)], dtype=bdtype)) - + # Ensure this also works for void + mask = np.array((False, True), dtype='?,?')[()] + assert_(isinstance(mask, np.void)) + test = make_mask(mask, dtype=mask.dtype) + assert_equal(test, mask) + assert_(test is not mask) + mask = np.array((0, 1), dtype='i4,i4')[()] + test2 = make_mask(mask, dtype=mask.dtype) + assert_equal(test2, test) # test that nomask is returned when m is nomask. bools = [True, False] dtypes = [MaskType, np.float] @@ -3987,7 +4075,6 @@ class TestMaskedArrayFunctions(TestCase): res = make_mask(nomask, copy=cpy, shrink=shr, dtype=dt) assert_(res is nomask, msgformat % (cpy, shr, dt)) - def test_mask_or(self): # Initialize mtype = [('a', np.bool), ('b', np.bool)] diff --git a/numpy/matrixlib/defmatrix.py b/numpy/matrixlib/defmatrix.py index bd14846c6..7026fad1a 100644 --- a/numpy/matrixlib/defmatrix.py +++ b/numpy/matrixlib/defmatrix.py @@ -3,49 +3,15 @@ from __future__ import division, absolute_import, print_function __all__ = ['matrix', 'bmat', 'mat', 'asmatrix'] import sys +import ast import numpy.core.numeric as N from numpy.core.numeric import concatenate, isscalar, binary_repr, identity, asanyarray from numpy.core.numerictypes import issubdtype -# make translation table -_numchars = '0123456789.-+jeEL' - -if sys.version_info[0] >= 3: - class _NumCharTable: - def __getitem__(self, i): - if chr(i) in _numchars: - return chr(i) - else: - return None - _table = _NumCharTable() - def _eval(astr): - str_ = astr.translate(_table) - if not str_: - raise TypeError("Invalid data string supplied: " + astr) - else: - return eval(str_) - -else: - _table = [None]*256 - for k in range(256): - _table[k] = chr(k) - _table = ''.join(_table) - - _todelete = [] - for k in _table: - if k not in _numchars: - _todelete.append(k) - _todelete = ''.join(_todelete) - del k - - def _eval(astr): - str_ = astr.translate(_table, _todelete) - if not str_: - raise TypeError("Invalid data string supplied: " + astr) - else: - return eval(str_) - def _convert_from_string(data): + for char in '[]': + data = data.replace(char, '') + rows = data.split(';') newdata = [] count = 0 @@ -54,7 +20,7 @@ def _convert_from_string(data): newrow = [] for col in trow: temp = col.split() - newrow.extend(map(_eval, temp)) + newrow.extend(map(ast.literal_eval, temp)) if count == 0: Ncols = len(newrow) elif len(newrow) != Ncols: diff --git a/numpy/matrixlib/tests/test_defmatrix.py b/numpy/matrixlib/tests/test_defmatrix.py index 6aa24e4ff..fd36d7770 100644 --- a/numpy/matrixlib/tests/test_defmatrix.py +++ b/numpy/matrixlib/tests/test_defmatrix.py @@ -35,8 +35,8 @@ class TestCtor(TestCase): assert_(mvec.shape == (1, 5)) def test_exceptions(self): - # Check for TypeError when called with invalid string data. - assert_raises(TypeError, matrix, "invalid") + # Check for ValueError when called with invalid string data. + assert_raises(ValueError, matrix, "invalid") def test_bmat_nondefault_str(self): A = np.array([[1, 2], [3, 4]]) @@ -186,6 +186,11 @@ class TestProperties(TestCase): A = matrix([[1, 0], [0, 1]]) assert_(repr(A) == "matrix([[1, 0],\n [0, 1]])") + def test_make_bool_matrix_from_str(self): + A = matrix('True; True; False') + B = matrix([[True], [True], [False]]) + assert_array_equal(A, B) + class TestCasting(TestCase): def test_basic(self): A = np.arange(100).reshape(10, 10) diff --git a/numpy/random/tests/test_random.py b/numpy/random/tests/test_random.py index dc7d18933..e4c58e2bd 100644 --- a/numpy/random/tests/test_random.py +++ b/numpy/random/tests/test_random.py @@ -384,11 +384,11 @@ class TestRandomDist(TestCase): # Check multi dimensional array s = (2, 3) p = [0.1, 0.1, 0.1, 0.1, 0.4, 0.2] - assert_(np.random.choice(6, s, replace=True).shape, s) - assert_(np.random.choice(6, s, replace=False).shape, s) - assert_(np.random.choice(6, s, replace=True, p=p).shape, s) - assert_(np.random.choice(6, s, replace=False, p=p).shape, s) - assert_(np.random.choice(np.arange(6), s, replace=True).shape, s) + assert_equal(np.random.choice(6, s, replace=True).shape, s) + assert_equal(np.random.choice(6, s, replace=False).shape, s) + assert_equal(np.random.choice(6, s, replace=True, p=p).shape, s) + assert_equal(np.random.choice(6, s, replace=False, p=p).shape, s) + assert_equal(np.random.choice(np.arange(6), s, replace=True).shape, s) def test_bytes(self): np.random.seed(self.seed) |