summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/release/1.13.0-notes.rst14
-rw-r--r--numpy/core/src/multiarray/temp_elide.c20
-rw-r--r--numpy/ma/core.py171
-rw-r--r--numpy/ma/tests/test_core.py103
-rw-r--r--numpy/matrixlib/defmatrix.py44
-rw-r--r--numpy/matrixlib/tests/test_defmatrix.py9
-rw-r--r--numpy/random/tests/test_random.py10
7 files changed, 225 insertions, 146 deletions
diff --git a/doc/release/1.13.0-notes.rst b/doc/release/1.13.0-notes.rst
index 049653ea4..1e937cdd4 100644
--- a/doc/release/1.13.0-notes.rst
+++ b/doc/release/1.13.0-notes.rst
@@ -179,6 +179,20 @@ Better default repr for ``ndarray`` subclasses
Subclasses of ndarray with no ``repr`` specialization now correctly indent
their data and type lines.
+More reliable comparisons of masked arrays
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Comparisons of masked arrays were buggy for masked scalars and failed for
+structured arrays with dimension higher than one. Both problems are now
+solved. In the process, it was ensured that in getting the result for a
+structured array, masked fields are properly ignored, i.e., the result is equal
+if all fields that are non-masked in both are equal, thus making the behaviour
+identical to what one gets by comparing an unstructured masked array and then
+doing ``.all()`` over some axis.
+
+np.matrix with booleans elements can now be created using the string syntax
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+``np.matrix`` failed whenever one attempts to use it with booleans, e.g.,
+``np.matrix('True')``. Now, this works as expected.
Changes
=======
diff --git a/numpy/core/src/multiarray/temp_elide.c b/numpy/core/src/multiarray/temp_elide.c
index 84612da3f..fae6763e4 100644
--- a/numpy/core/src/multiarray/temp_elide.c
+++ b/numpy/core/src/multiarray/temp_elide.c
@@ -10,7 +10,7 @@
/*
* Functions used to try to avoid/elide temporaries in python expressions
- * of type a + b + b by translating some operations into inplace operations.
+ * of type a + b + b by translating some operations into in-place operations.
* This example translates to this bytecode:
*
* 0 LOAD_FAST 0 (a)
@@ -23,7 +23,7 @@
* instructions so they always have a reference count larger than 1.
* The temporary of the first BINARY_ADD on the other hand only has a count of
* 1. Only temporaries can have a count of 1 in python so we can use this to
- * transform the second operation into an inplace operation and not affect the
+ * transform the second operation into an in-place operation and not affect the
* output of the program.
* CPython does the same thing to resize memory instead of copying when doing
* string concatenation.
@@ -41,19 +41,19 @@
* This is an expensive operation so temporaries are only avoided for rather
* large arrays.
*
- * A possible future improvement would be to change cpython to give as access
+ * A possible future improvement would be to change cpython to give us access
* to the top of the stack. Then we could just check that the objects involved
* are on the cpython stack instead of checking the function callstack.
*
- * Elision can be applied to all operations that do have inplace variants and
+ * Elision can be applied to all operations that do have in-place variants and
* do not change types (addition, subtraction, multiplication, float division,
* logical and bitwise operations ...)
* For commutative operations (addition, multiplication, ...) if eliding into
- * the lefthand side fails it can succedd on the righthand side by swapping the
+ * the lefthand side fails it can succeed on the righthand side by swapping the
* arguments. E.g. b * (a * 2) can be elided by changing it to (2 * a) * b.
*
- * TODO only supports systems with backtrace(), windows can probably be
- * supported too by using the appropriate windows apis.
+ * TODO only supports systems with backtrace(), Windows can probably be
+ * supported too by using the appropriate Windows APIs.
*/
#if defined HAVE_BACKTRACE && defined HAVE_DLFCN_H && ! defined PYPY_VERSION
@@ -69,7 +69,7 @@
#endif
/*
* Heuristic size of the array in bytes at which backtrace overhead generation
- * becomes less than speed gained by inplace operations. Depends on stack depth
+ * becomes less than speed gained by in-place operations. Depends on stack depth
* being checked. Measurements with 10 stacks show it getting worthwhile
* around 100KiB but to be conservative put it higher around where the L2 cache
* spills.
@@ -79,7 +79,7 @@
#else
/*
* in debug mode always elide but skip scalars as these can convert to 0d array
- * during in place operations
+ * during in-place operations
*/
#define NPY_MIN_ELIDE_BYTES (32)
#endif
@@ -272,7 +272,7 @@ check_callers(int * cannot)
/*
* check if in "alhs @op@ orhs" that alhs is a temporary (refcnt == 1) so we
- * can do inplace operations instead of creating a new temporary
+ * can do in-place operations instead of creating a new temporary
* "cannot" is set to true if it cannot be done even with swapped arguments
*/
static int
diff --git a/numpy/ma/core.py b/numpy/ma/core.py
index 3b2b39b18..ea4a1d85f 100644
--- a/numpy/ma/core.py
+++ b/numpy/ma/core.py
@@ -23,6 +23,7 @@ Released for unlimited redistribution.
from __future__ import division, absolute_import, print_function
import sys
+import operator
import warnings
from functools import reduce
@@ -1602,21 +1603,11 @@ def make_mask(m, copy=False, shrink=True, dtype=MaskType):
"""
if m is nomask:
return nomask
- elif isinstance(m, ndarray):
- # We won't return after this point to make sure we can shrink the mask
- # Fill the mask in case there are missing data
- m = filled(m, True)
- # Make sure the input dtype is valid
- dtype = make_mask_descr(dtype)
- if m.dtype == dtype:
- if copy:
- result = m.copy()
- else:
- result = m
- else:
- result = np.array(m, dtype=dtype, copy=copy)
- else:
- result = np.array(filled(m, True), dtype=MaskType)
+
+ # Make sure the input dtype is valid.
+ dtype = make_mask_descr(dtype)
+ # Fill the mask in case there are missing data; turn it into an ndarray.
+ result = np.array(filled(m, True), copy=copy, dtype=dtype, subok=True)
# Bas les masques !
if shrink and (not result.dtype.names) and (not result.any()):
return nomask
@@ -1733,7 +1724,8 @@ def mask_or(m1, m2, copy=False, shrink=True):
if (dtype1 != dtype2):
raise ValueError("Incompatible dtypes '%s'<>'%s'" % (dtype1, dtype2))
if dtype1.names:
- newmask = np.empty_like(m1)
+ # Allocate an output mask array with the properly broadcast shape.
+ newmask = np.empty(np.broadcast(m1, m2).shape, dtype1)
_recursive_mask_or(m1, m2, newmask)
return newmask
return make_mask(umath.logical_or(m1, m2), copy=copy, shrink=shrink)
@@ -3873,81 +3865,84 @@ class MaskedArray(ndarray):
return True
return False
- def __eq__(self, other):
- """
- Check whether other equals self elementwise.
+ def _comparison(self, other, compare):
+ """Compare self with other using operator.eq or operator.ne.
+
+ When either of the elements is masked, the result is masked as well,
+ but the underlying boolean data are still set, with self and other
+ considered equal if both are masked, and unequal otherwise.
+ For structured arrays, all fields are combined, with masked values
+ ignored. The result is masked if all fields were masked, with self
+ and other considered equal only if both were fully masked.
"""
- if self is masked:
- return masked
omask = getmask(other)
- if omask is nomask:
- check = self.filled(0).__eq__(other)
- try:
- check = check.view(type(self))
- check._mask = self._mask
- except AttributeError:
- # Dang, we have a bool instead of an array: return the bool
- return check
+ smask = self.mask
+ mask = mask_or(smask, omask, copy=True)
+
+ odata = getdata(other)
+ if mask.dtype.names:
+ # For possibly masked structured arrays we need to be careful,
+ # since the standard structured array comparison will use all
+ # fields, masked or not. To avoid masked fields influencing the
+ # outcome, we set all masked fields in self to other, so they'll
+ # count as equal. To prepare, we ensure we have the right shape.
+ broadcast_shape = np.broadcast(self, odata).shape
+ sbroadcast = np.broadcast_to(self, broadcast_shape, subok=True)
+ sbroadcast._mask = mask
+ sdata = sbroadcast.filled(odata)
+ # Now take care of the mask; the merged mask should have an item
+ # masked if all fields were masked (in one and/or other).
+ mask = (mask == np.ones((), mask.dtype))
+
else:
- odata = filled(other, 0)
- check = self.filled(0).__eq__(odata).view(type(self))
- if self._mask is nomask:
- check._mask = omask
- else:
- mask = mask_or(self._mask, omask)
- if mask.dtype.names:
- if mask.size > 1:
- axis = 1
- else:
- axis = None
- try:
- mask = mask.view((bool_, len(self.dtype))).all(axis)
- except (ValueError, np.AxisError):
- # TODO: what error are we trying to catch here?
- # invalid axis, or invalid view?
- mask = np.all([[f[n].all() for n in mask.dtype.names]
- for f in mask], axis=axis)
- check._mask = mask
+ # For regular arrays, just use the data as they come.
+ sdata = self.data
+
+ check = compare(sdata, odata)
+
+ if isinstance(check, (np.bool_, bool)):
+ return masked if mask else check
+
+ if mask is not nomask:
+ # Adjust elements that were masked, which should be treated
+ # as equal if masked in both, unequal if masked in one.
+ # Note that this works automatically for structured arrays too.
+ check = np.where(mask, compare(smask, omask), check)
+ if mask.shape != check.shape:
+ # Guarantee consistency of the shape, making a copy since the
+ # the mask may need to get written to later.
+ mask = np.broadcast_to(mask, check.shape).copy()
+
+ check = check.view(type(self))
+ check._mask = mask
return check
- def __ne__(self, other):
+ def __eq__(self, other):
+ """Check whether other equals self elementwise.
+
+ When either of the elements is masked, the result is masked as well,
+ but the underlying boolean data are still set, with self and other
+ considered equal if both are masked, and unequal otherwise.
+
+ For structured arrays, all fields are combined, with masked values
+ ignored. The result is masked if all fields were masked, with self
+ and other considered equal only if both were fully masked.
"""
- Check whether other doesn't equal self elementwise
+ return self._comparison(other, operator.eq)
+ def __ne__(self, other):
+ """Check whether other does not equal self elementwise.
+
+ When either of the elements is masked, the result is masked as well,
+ but the underlying boolean data are still set, with self and other
+ considered equal if both are masked, and unequal otherwise.
+
+ For structured arrays, all fields are combined, with masked values
+ ignored. The result is masked if all fields were masked, with self
+ and other considered equal only if both were fully masked.
"""
- if self is masked:
- return masked
- omask = getmask(other)
- if omask is nomask:
- check = self.filled(0).__ne__(other)
- try:
- check = check.view(type(self))
- check._mask = self._mask
- except AttributeError:
- # In case check is a boolean (or a numpy.bool)
- return check
- else:
- odata = filled(other, 0)
- check = self.filled(0).__ne__(odata).view(type(self))
- if self._mask is nomask:
- check._mask = omask
- else:
- mask = mask_or(self._mask, omask)
- if mask.dtype.names:
- if mask.size > 1:
- axis = 1
- else:
- axis = None
- try:
- mask = mask.view((bool_, len(self.dtype))).all(axis)
- except (ValueError, np.AxisError):
- # TODO: what error are we trying to catch here?
- # invalid axis, or invalid view?
- mask = np.all([[f[n].all() for n in mask.dtype.names]
- for f in mask], axis=axis)
- check._mask = mask
- return check
+ return self._comparison(other, operator.ne)
def __add__(self, other):
"""
@@ -5640,6 +5635,18 @@ class MaskedArray(ndarray):
np.subtract(out, min_value, out=out, casting='unsafe')
return out
+ def partition(self, *args, **kwargs):
+ warnings.warn("Warning: 'partition' will ignore the 'mask' "
+ "of the {}.".format(self.__class__.__name__),
+ stacklevel=2)
+ return super(MaskedArray, self).partition(*args, **kwargs)
+
+ def argpartition(self, *args, **kwargs):
+ warnings.warn("Warning: 'argpartition' will ignore the 'mask' "
+ "of the {}.".format(self.__class__.__name__),
+ stacklevel=2)
+ return super(MaskedArray, self).argpartition(*args, **kwargs)
+
def take(self, indices, axis=None, out=None, mode='raise'):
"""
"""
diff --git a/numpy/ma/tests/test_core.py b/numpy/ma/tests/test_core.py
index f9d032f09..ca1ef16c4 100644
--- a/numpy/ma/tests/test_core.py
+++ b/numpy/ma/tests/test_core.py
@@ -1335,32 +1335,96 @@ class TestMaskedArrayArithmetic(TestCase):
ndtype = [('A', int), ('B', int)]
a = array([(1, 1), (2, 2)], mask=[(0, 1), (0, 0)], dtype=ndtype)
test = (a == a)
- assert_equal(test, [True, True])
+ assert_equal(test.data, [True, True])
+ assert_equal(test.mask, [False, False])
+ test = (a == a[0])
+ assert_equal(test.data, [True, False])
assert_equal(test.mask, [False, False])
b = array([(1, 1), (2, 2)], mask=[(1, 0), (0, 0)], dtype=ndtype)
test = (a == b)
- assert_equal(test, [False, True])
+ assert_equal(test.data, [False, True])
+ assert_equal(test.mask, [True, False])
+ test = (a[0] == b)
+ assert_equal(test.data, [False, False])
assert_equal(test.mask, [True, False])
b = array([(1, 1), (2, 2)], mask=[(0, 1), (1, 0)], dtype=ndtype)
test = (a == b)
- assert_equal(test, [True, False])
+ assert_equal(test.data, [True, True])
assert_equal(test.mask, [False, False])
+ # complicated dtype, 2-dimensional array.
+ ndtype = [('A', int), ('B', [('BA', int), ('BB', int)])]
+ a = array([[(1, (1, 1)), (2, (2, 2))],
+ [(3, (3, 3)), (4, (4, 4))]],
+ mask=[[(0, (1, 0)), (0, (0, 1))],
+ [(1, (0, 0)), (1, (1, 1))]], dtype=ndtype)
+ test = (a[0, 0] == a)
+ assert_equal(test.data, [[True, False], [False, False]])
+ assert_equal(test.mask, [[False, False], [False, True]])
def test_ne_on_structured(self):
# Test the equality of structured arrays
ndtype = [('A', int), ('B', int)]
a = array([(1, 1), (2, 2)], mask=[(0, 1), (0, 0)], dtype=ndtype)
test = (a != a)
- assert_equal(test, [False, False])
+ assert_equal(test.data, [False, False])
+ assert_equal(test.mask, [False, False])
+ test = (a != a[0])
+ assert_equal(test.data, [False, True])
assert_equal(test.mask, [False, False])
b = array([(1, 1), (2, 2)], mask=[(1, 0), (0, 0)], dtype=ndtype)
test = (a != b)
- assert_equal(test, [True, False])
+ assert_equal(test.data, [True, False])
+ assert_equal(test.mask, [True, False])
+ test = (a[0] != b)
+ assert_equal(test.data, [True, True])
assert_equal(test.mask, [True, False])
b = array([(1, 1), (2, 2)], mask=[(0, 1), (1, 0)], dtype=ndtype)
test = (a != b)
- assert_equal(test, [False, True])
+ assert_equal(test.data, [False, False])
assert_equal(test.mask, [False, False])
+ # complicated dtype, 2-dimensional array.
+ ndtype = [('A', int), ('B', [('BA', int), ('BB', int)])]
+ a = array([[(1, (1, 1)), (2, (2, 2))],
+ [(3, (3, 3)), (4, (4, 4))]],
+ mask=[[(0, (1, 0)), (0, (0, 1))],
+ [(1, (0, 0)), (1, (1, 1))]], dtype=ndtype)
+ test = (a[0, 0] != a)
+ assert_equal(test.data, [[False, True], [True, True]])
+ assert_equal(test.mask, [[False, False], [False, True]])
+
+ def test_eq_ne_structured_extra(self):
+ # ensure simple examples are symmetric and make sense.
+ # from https://github.com/numpy/numpy/pull/8590#discussion_r101126465
+ dt = np.dtype('i4,i4')
+ for m1 in (mvoid((1, 2), mask=(0, 0), dtype=dt),
+ mvoid((1, 2), mask=(0, 1), dtype=dt),
+ mvoid((1, 2), mask=(1, 0), dtype=dt),
+ mvoid((1, 2), mask=(1, 1), dtype=dt)):
+ ma1 = m1.view(MaskedArray)
+ r1 = ma1.view('2i4')
+ for m2 in (np.array((1, 1), dtype=dt),
+ mvoid((1, 1), dtype=dt),
+ mvoid((1, 0), mask=(0, 1), dtype=dt),
+ mvoid((3, 2), mask=(0, 1), dtype=dt)):
+ ma2 = m2.view(MaskedArray)
+ r2 = ma2.view('2i4')
+ eq_expected = (r1 == r2).all()
+ assert_equal(m1 == m2, eq_expected)
+ assert_equal(m2 == m1, eq_expected)
+ assert_equal(ma1 == m2, eq_expected)
+ assert_equal(m1 == ma2, eq_expected)
+ assert_equal(ma1 == ma2, eq_expected)
+ # Also check it is the same if we do it element by element.
+ el_by_el = [m1[name] == m2[name] for name in dt.names]
+ assert_equal(array(el_by_el, dtype=bool).all(), eq_expected)
+ ne_expected = (r1 != r2).any()
+ assert_equal(m1 != m2, ne_expected)
+ assert_equal(m2 != m1, ne_expected)
+ assert_equal(ma1 != m2, ne_expected)
+ assert_equal(m1 != ma2, ne_expected)
+ assert_equal(ma1 != ma2, ne_expected)
+ el_by_el = [m1[name] != m2[name] for name in dt.names]
+ assert_equal(array(el_by_el, dtype=bool).any(), ne_expected)
def test_eq_with_None(self):
# Really, comparisons with None should not be done, but check them
@@ -1393,6 +1457,22 @@ class TestMaskedArrayArithmetic(TestCase):
assert_equal(a == 0, False)
assert_equal(a != 1, False)
assert_equal(a != 0, True)
+ b = array(1, mask=True)
+ assert_equal(b == 0, masked)
+ assert_equal(b == 1, masked)
+ assert_equal(b != 0, masked)
+ assert_equal(b != 1, masked)
+
+ def test_eq_different_dimensions(self):
+ m1 = array([1, 1], mask=[0, 1])
+ # test comparison with both masked and regular arrays.
+ for m2 in (array([[0, 1], [1, 2]]),
+ np.array([[0, 1], [1, 2]])):
+ test = (m1 == m2)
+ assert_equal(test.data, [[False, False],
+ [True, False]])
+ assert_equal(test.mask, [[False, True],
+ [False, True]])
def test_numpyarithmetics(self):
# Check that the mask is not back-propagated when using numpy functions
@@ -3978,7 +4058,15 @@ class TestMaskedArrayFunctions(TestCase):
test = make_mask(mask, dtype=mask.dtype)
assert_equal(test.dtype, bdtype)
assert_equal(test, np.array([(0, 0), (0, 1)], dtype=bdtype))
-
+ # Ensure this also works for void
+ mask = np.array((False, True), dtype='?,?')[()]
+ assert_(isinstance(mask, np.void))
+ test = make_mask(mask, dtype=mask.dtype)
+ assert_equal(test, mask)
+ assert_(test is not mask)
+ mask = np.array((0, 1), dtype='i4,i4')[()]
+ test2 = make_mask(mask, dtype=mask.dtype)
+ assert_equal(test2, test)
# test that nomask is returned when m is nomask.
bools = [True, False]
dtypes = [MaskType, np.float]
@@ -3987,7 +4075,6 @@ class TestMaskedArrayFunctions(TestCase):
res = make_mask(nomask, copy=cpy, shrink=shr, dtype=dt)
assert_(res is nomask, msgformat % (cpy, shr, dt))
-
def test_mask_or(self):
# Initialize
mtype = [('a', np.bool), ('b', np.bool)]
diff --git a/numpy/matrixlib/defmatrix.py b/numpy/matrixlib/defmatrix.py
index bd14846c6..7026fad1a 100644
--- a/numpy/matrixlib/defmatrix.py
+++ b/numpy/matrixlib/defmatrix.py
@@ -3,49 +3,15 @@ from __future__ import division, absolute_import, print_function
__all__ = ['matrix', 'bmat', 'mat', 'asmatrix']
import sys
+import ast
import numpy.core.numeric as N
from numpy.core.numeric import concatenate, isscalar, binary_repr, identity, asanyarray
from numpy.core.numerictypes import issubdtype
-# make translation table
-_numchars = '0123456789.-+jeEL'
-
-if sys.version_info[0] >= 3:
- class _NumCharTable:
- def __getitem__(self, i):
- if chr(i) in _numchars:
- return chr(i)
- else:
- return None
- _table = _NumCharTable()
- def _eval(astr):
- str_ = astr.translate(_table)
- if not str_:
- raise TypeError("Invalid data string supplied: " + astr)
- else:
- return eval(str_)
-
-else:
- _table = [None]*256
- for k in range(256):
- _table[k] = chr(k)
- _table = ''.join(_table)
-
- _todelete = []
- for k in _table:
- if k not in _numchars:
- _todelete.append(k)
- _todelete = ''.join(_todelete)
- del k
-
- def _eval(astr):
- str_ = astr.translate(_table, _todelete)
- if not str_:
- raise TypeError("Invalid data string supplied: " + astr)
- else:
- return eval(str_)
-
def _convert_from_string(data):
+ for char in '[]':
+ data = data.replace(char, '')
+
rows = data.split(';')
newdata = []
count = 0
@@ -54,7 +20,7 @@ def _convert_from_string(data):
newrow = []
for col in trow:
temp = col.split()
- newrow.extend(map(_eval, temp))
+ newrow.extend(map(ast.literal_eval, temp))
if count == 0:
Ncols = len(newrow)
elif len(newrow) != Ncols:
diff --git a/numpy/matrixlib/tests/test_defmatrix.py b/numpy/matrixlib/tests/test_defmatrix.py
index 6aa24e4ff..fd36d7770 100644
--- a/numpy/matrixlib/tests/test_defmatrix.py
+++ b/numpy/matrixlib/tests/test_defmatrix.py
@@ -35,8 +35,8 @@ class TestCtor(TestCase):
assert_(mvec.shape == (1, 5))
def test_exceptions(self):
- # Check for TypeError when called with invalid string data.
- assert_raises(TypeError, matrix, "invalid")
+ # Check for ValueError when called with invalid string data.
+ assert_raises(ValueError, matrix, "invalid")
def test_bmat_nondefault_str(self):
A = np.array([[1, 2], [3, 4]])
@@ -186,6 +186,11 @@ class TestProperties(TestCase):
A = matrix([[1, 0], [0, 1]])
assert_(repr(A) == "matrix([[1, 0],\n [0, 1]])")
+ def test_make_bool_matrix_from_str(self):
+ A = matrix('True; True; False')
+ B = matrix([[True], [True], [False]])
+ assert_array_equal(A, B)
+
class TestCasting(TestCase):
def test_basic(self):
A = np.arange(100).reshape(10, 10)
diff --git a/numpy/random/tests/test_random.py b/numpy/random/tests/test_random.py
index dc7d18933..e4c58e2bd 100644
--- a/numpy/random/tests/test_random.py
+++ b/numpy/random/tests/test_random.py
@@ -384,11 +384,11 @@ class TestRandomDist(TestCase):
# Check multi dimensional array
s = (2, 3)
p = [0.1, 0.1, 0.1, 0.1, 0.4, 0.2]
- assert_(np.random.choice(6, s, replace=True).shape, s)
- assert_(np.random.choice(6, s, replace=False).shape, s)
- assert_(np.random.choice(6, s, replace=True, p=p).shape, s)
- assert_(np.random.choice(6, s, replace=False, p=p).shape, s)
- assert_(np.random.choice(np.arange(6), s, replace=True).shape, s)
+ assert_equal(np.random.choice(6, s, replace=True).shape, s)
+ assert_equal(np.random.choice(6, s, replace=False).shape, s)
+ assert_equal(np.random.choice(6, s, replace=True, p=p).shape, s)
+ assert_equal(np.random.choice(6, s, replace=False, p=p).shape, s)
+ assert_equal(np.random.choice(np.arange(6), s, replace=True).shape, s)
def test_bytes(self):
np.random.seed(self.seed)