ENH: Adding __array_ufunc__ capability to MaskedArrays.

This enables any ufunc numpy operations that are called on a MaskedArray to use the masked version of that function automatically without needing to resort to np.ma.func() calls.
author: Greg Lucas <greg.lucas@lasp.colorado.edu> 2020-04-15 10:28:02 -0600
committer: Greg Lucas <greg.m.lucas@gmail.com> 2022-07-13 10:57:09 -0600
commit: 8cd6f4ca00b6e0da3833fc267d50067b2ddbc069 (patch)
tree: 56f599a1c12b28de18a7ad3b77c4bd187b58aec8 /numpy
parent: 37846563156b3ed8287fc94d6a7d107a316c5ecf (diff)
download: numpy-8cd6f4ca00b6e0da3833fc267d50067b2ddbc069.tar.gz
4 files changed, 195 insertions, 48 deletions
diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py
index 8457551ca..1c3c32bdd 100644
--- a/numpy/lib/tests/test_function_base.py
+++ b/numpy/lib/tests/test_function_base.py
@@ -780,7 +780,7 @@ class TestDiff:
                      mask=[[False, False], [True, False],
                            [False, True], [True, True], [False, False]])
         out = diff(x)
-        assert_array_equal(out.data, [[1], [1], [1], [1], [1]])
+        assert_array_equal(out.data, [[1], [4], [6], [8], [1]])
         assert_array_equal(out.mask, [[False], [True],
                                       [True], [True], [False]])
         assert_(type(out) is type(x))
diff --git a/numpy/ma/core.py b/numpy/ma/core.py
index 93eb74be3..7f17d4adf 100644
--- a/numpy/ma/core.py
+++ b/numpy/ma/core.py
@@ -22,6 +22,7 @@ Released for unlimited redistribution.
 # pylint: disable-msg=E1002
 import builtins
 import inspect
+from numbers import Number
 import operator
 import warnings
 import textwrap
@@ -58,10 +59,10 @@ __all__ = [
     'frombuffer', 'fromflex', 'fromfunction', 'getdata', 'getmask',
     'getmaskarray', 'greater', 'greater_equal', 'harden_mask', 'hypot',
     'identity', 'ids', 'indices', 'inner', 'innerproduct', 'isMA',
-    'isMaskedArray', 'is_mask', 'is_masked', 'isarray', 'left_shift',
-    'less', 'less_equal', 'log', 'log10', 'log2',
-    'logical_and', 'logical_not', 'logical_or', 'logical_xor', 'make_mask',
-    'make_mask_descr', 'make_mask_none', 'mask_or', 'masked',
+    'isMaskedArray', 'is_mask', 'is_masked', 'isarray', 'isfinite',
+    'isinf', 'isnan', 'left_shift', 'less', 'less_equal', 'log', 'log10',
+    'log2', 'logical_and', 'logical_not', 'logical_or', 'logical_xor',
+    'make_mask', 'make_mask_descr', 'make_mask_none', 'mask_or', 'masked',
     'masked_array', 'masked_equal', 'masked_greater',
     'masked_greater_equal', 'masked_inside', 'masked_invalid',
     'masked_less', 'masked_less_equal', 'masked_not_equal',
@@ -925,6 +926,12 @@ class _MaskedUnaryOperation(_MaskedUFunc):
 
         """
         d = getdata(a)
+        if 'out' in kwargs:
+            # Need to drop the mask from the output array when being called
+            kwargs['out'] = getdata(kwargs['out'])
+        args = [getdata(arg) if isinstance(arg, MaskedArray) else arg
+                for arg in args]
+
         # Deal with domain
         if self.domain is not None:
             # Case 1.1. : Domained function
@@ -1048,7 +1055,7 @@ class _MaskedBinaryOperation(_MaskedUFunc):
             masked_result._update_from(b)
         return masked_result
 
-    def reduce(self, target, axis=0, dtype=None):
+    def reduce(self, target, axis=0, dtype=None, **kwargs):
         """
         Reduce `target` along the given `axis`.
 
@@ -1183,6 +1190,10 @@ class _DomainedBinaryOperation(_MaskedUFunc):
 
         # Transforms to a (subclass of) MaskedArray
         masked_result = result.view(get_masked_subclass(a, b))
+        # If the original masks were scalar or nomask, don't expand the result
+        # which comes from the isfinite initialization above
+        if getmask(a).shape + getmask(b).shape == ():
+            m = _shrink_mask(m)
         masked_result._mask = m
         if isinstance(a, MaskedArray):
             masked_result._update_from(a)
@@ -1209,6 +1220,9 @@ floor = _MaskedUnaryOperation(umath.floor)
 ceil = _MaskedUnaryOperation(umath.ceil)
 around = _MaskedUnaryOperation(np.round_)
 logical_not = _MaskedUnaryOperation(umath.logical_not)
+isinf = _MaskedUnaryOperation(umath.isinf)
+isnan = _MaskedUnaryOperation(umath.isnan)
+isfinite = _MaskedUnaryOperation(umath.isfinite)
 
 # Domained unary ufuncs
 sqrt = _MaskedUnaryOperation(umath.sqrt, 0.0,
@@ -3081,7 +3095,7 @@ class MaskedArray(ndarray):
             func, args, out_i = context
             # args sometimes contains outputs (gh-10459), which we don't want
             input_args = args[:func.nin]
-            m = reduce(mask_or, [getmaskarray(arg) for arg in input_args])
+            m = reduce(mask_or, [getmask(arg) for arg in input_args])
             # Get the domain mask
             domain = ufunc_domain.get(func, None)
             if domain is not None:
@@ -3109,7 +3123,6 @@ class MaskedArray(ndarray):
                     else:
                         # Don't modify inplace, we risk back-propagation
                         m = (m | d)
-
             # Make sure the mask has the proper size
             if result is not self and result.shape == () and m:
                 return masked
@@ -3119,6 +3132,85 @@ class MaskedArray(ndarray):
 
         return result
 
+    def __array_ufunc__(self, np_ufunc, method, *inputs, **kwargs):
+        """
+        MaskedArray capability for ufuncs
+
+        Handle masked versions of ufuncs if they are implemented within
+        the MaskedArray module. If the masked ufunc is not implemented,
+        this falls back to the standard numpy ndarray ufunc, which we
+        then call with the ndarray view of the input data.
+
+        """
+        # Output can be specified as arguments or as keyword arguments
+        outputs = kwargs.pop('out', ())
+        if not isinstance(outputs, tuple):
+            outputs = (outputs,)
+        outputs += inputs[np_ufunc.nin:]
+        args = inputs[:np_ufunc.nin]
+
+        # Determine what class types we are compatible with and return
+        # NotImplemented if we don't know how to handle them
+        for arg in args + outputs:
+            if not isinstance(arg, (ndarray, np.bool_, Number, list, str)):
+                return NotImplemented
+
+        # Get the equivalent masked version of the numpy function
+        # if it is in the module level functions
+        ma_ufunc = np.ma.__dict__.get(np_ufunc.__name__, np_ufunc)
+        if ma_ufunc is np_ufunc:
+            # We didn't have a Masked version of the ufunc, so we need to
+            # call the ndarray version with the data from the objects and
+            # prevent infinite recursion.
+
+            # Make ndarray views of the input arguments
+            args = [getdata(input) if isinstance(input, MaskedArray)
+                    else input for input in args]
+        else:
+            # The masked power function doesn't support extra args
+            if np_ufunc.__name__ in ('power'):
+                kwargs = {}
+
+        results = getattr(ma_ufunc, method)(*args, **kwargs)
+        if results is NotImplemented:
+            return NotImplemented
+        if method == 'at':
+            return
+        if np_ufunc.nout == 1:
+            results = (results,)
+        if outputs == ():
+            outputs = (None,) * np_ufunc.nout
+
+        returns = []
+        for i, result in enumerate(results):
+            output = outputs[i]
+
+            # Reapply the mask
+            if isinstance(result, ndarray) and result is not masked:
+                # Need to copy over all of the data and mask from results
+                # to the original object requested with out
+                if output is not None:
+                    if isinstance(output, MaskedArray):
+                        output._update_from(result)
+                        if isinstance(result, MaskedArray):
+                            output.data[:] = result._data
+                            output._mask = result._mask
+                        else:
+                            output.data[:] = result
+                    else:
+                        output[:] = result
+
+                    result = output
+
+            elif output is not None:
+                # An out value was requested, but the result is a scalar
+                output[()] = result
+                result = output
+
+            returns.append(result)
+
+        return returns[0] if np_ufunc.nout == 1 else returns
+
     def view(self, dtype=None, type=None, fill_value=None):
         """
         Return a view of the MaskedArray data.
@@ -3292,7 +3384,7 @@ class MaskedArray(ndarray):
                     return dout
         else:
             # Force dout to MA
-            dout = dout.view(type(self))
+            dout = MaskedArray(dout)
             # Inherit attributes from self
             dout._update_from(self)
             # Check the fill_value
@@ -3854,6 +3946,23 @@ class MaskedArray(ndarray):
                     result = self._data
         return result
 
+    def clip(self, a_min, a_max, out=None, **kwargs):
+        """docstring inherited
+        np.clip.__doc__
+
+        TODO: Should we ignore the clip where the data is masked?
+        It is currently in line with the old numpy version
+        """
+        result = self.data.clip(a_min, a_max, **kwargs).view(MaskedArray)
+        if out is not None:
+            # Just copy the data and mask
+            out.data[:] = getdata(result)
+            out._mask = self._mask
+            return out
+        result._update_from(self)
+        result._mask = self._mask
+        return result
+
     def compressed(self):
         """
         Return all the non-masked data as a 1-D array.
@@ -3947,10 +4056,15 @@ class MaskedArray(ndarray):
         # values.
         condition = np.asarray(condition)
 
-        _new = _data.compress(condition, axis=axis, out=out).view(type(self))
+        _new = _data.compress(condition, axis=axis).view(type(self))
         _new._update_from(self)
         if _mask is not nomask:
             _new._mask = _mask.compress(condition, axis=axis)
+        if out is not None:
+            out._update_from(self)
+            out.data[:] = _new.data
+            out._mask = _new.mask
+            return out
         return _new
 
     def _insert_masked_print(self):
@@ -4200,7 +4314,7 @@ class MaskedArray(ndarray):
         """
         if self._delegate_binop(other):
             return NotImplemented
-        return add(self, other)
+        return np.add(self, other)
 
     def __radd__(self, other):
         """
@@ -4209,7 +4323,7 @@ class MaskedArray(ndarray):
         """
         # In analogy with __rsub__ and __rdiv__, use original order:
         # we get here from `other + self`.
-        return add(other, self)
+        return np.add(other, self)
 
     def __sub__(self, other):
         """
@@ -4218,20 +4332,20 @@ class MaskedArray(ndarray):
         """
         if self._delegate_binop(other):
             return NotImplemented
-        return subtract(self, other)
+        return np.subtract(self, other)
 
     def __rsub__(self, other):
         """
         Subtract self from other, and return a new masked array.
 
         """
-        return subtract(other, self)
+        return np.subtract(other, self)
 
     def __mul__(self, other):
         "Multiply self by other, and return a new masked array."
         if self._delegate_binop(other):
             return NotImplemented
-        return multiply(self, other)
+        return np.multiply(self, other)
 
     def __rmul__(self, other):
         """
@@ -4240,7 +4354,7 @@ class MaskedArray(ndarray):
         """
         # In analogy with __rsub__ and __rdiv__, use original order:
         # we get here from `other * self`.
-        return multiply(other, self)
+        return np.multiply(other, self)
 
     def __div__(self, other):
         """
@@ -4249,7 +4363,7 @@ class MaskedArray(ndarray):
         """
         if self._delegate_binop(other):
             return NotImplemented
-        return divide(self, other)
+        return np.divide(self, other)
 
     def __truediv__(self, other):
         """
@@ -4258,14 +4372,14 @@ class MaskedArray(ndarray):
         """
         if self._delegate_binop(other):
             return NotImplemented
-        return true_divide(self, other)
+        return np.true_divide(self, other)
 
     def __rtruediv__(self, other):
         """
         Divide self into other, and return a new masked array.
 
         """
-        return true_divide(other, self)
+        return np.true_divide(other, self)
 
     def __floordiv__(self, other):
         """
@@ -4274,14 +4388,14 @@ class MaskedArray(ndarray):
         """
         if self._delegate_binop(other):
             return NotImplemented
-        return floor_divide(self, other)
+        return np.floor_divide(self, other)
 
     def __rfloordiv__(self, other):
         """
         Divide self into other, and return a new masked array.
 
         """
-        return floor_divide(other, self)
+        return np.floor_divide(other, self)
 
     def __pow__(self, other):
         """
@@ -5058,8 +5172,8 @@ class MaskedArray(ndarray):
         #!!!: implement out + test!
         m = self._mask
         if m is nomask:
-            result = super().trace(offset=offset, axis1=axis1, axis2=axis2,
-                                   out=out)
+            result = self.view(np.ndarray).trace(offset=offset, axis1=axis1,
+                                                    axis2=axis2, out=out)
             return result.astype(dtype)
         else:
             D = self.diagonal(offset=offset, axis1=axis1, axis2=axis2)
@@ -5159,7 +5273,9 @@ class MaskedArray(ndarray):
                 result = masked
             return result
         # Explicit output
-        result = self.filled(0).sum(axis, dtype=dtype, out=out, **kwargs)
+
+        self.filled(0).sum(axis, dtype=dtype, out=out.view(np.ndarray),
+                           **kwargs)
         if isinstance(out, MaskedArray):
             outmask = getmask(out)
             if outmask is nomask:
@@ -5309,7 +5425,10 @@ class MaskedArray(ndarray):
         """
         kwargs = {} if keepdims is np._NoValue else {'keepdims': keepdims}
         if self._mask is nomask:
-            result = super().mean(axis=axis, dtype=dtype, **kwargs)[()]
+            result = self.view(np.ndarray).mean(axis=axis,
+                                                   dtype=dtype, **kwargs)
+            if isinstance(result, np.ndarray):
+                result = MaskedArray(result, mask=False)
         else:
             is_float16_result = False
             if dtype is None:
@@ -5392,9 +5511,12 @@ class MaskedArray(ndarray):
 
         # Easy case: nomask, business as usual
         if self._mask is nomask:
-            ret = super().var(axis=axis, dtype=dtype, out=out, ddof=ddof,
-                              **kwargs)[()]
+            ret = self.view(np.ndarray).var(axis=axis, dtype=dtype,
+                                            ddof=ddof, **kwargs)
+            if isinstance(ret, np.ndarray):
+                ret = MaskedArray(ret, mask=False)
             if out is not None:
+                out.flat = ret
                 if isinstance(out, MaskedArray):
                     out.__setmask__(nomask)
                 return out
@@ -5452,12 +5574,10 @@ class MaskedArray(ndarray):
         numpy.std : Equivalent function
         """
         kwargs = {} if keepdims is np._NoValue else {'keepdims': keepdims}
-
         dvar = self.var(axis, dtype, out, ddof, **kwargs)
         if dvar is not masked:
             if out is not None:
-                np.power(out, 0.5, out=out, casting='unsafe')
-                return out
+                return np.power(out, 0.5, out=out, casting='unsafe')
             dvar = sqrt(dvar)
         return dvar
 
@@ -5472,6 +5592,10 @@ class MaskedArray(ndarray):
         numpy.ndarray.round : corresponding function for ndarrays
         numpy.around : equivalent function
         """
+        stored_out = None
+        if isinstance(out, MaskedArray):
+            stored_out = out
+            out = getdata(out)
         result = self._data.round(decimals=decimals, out=out).view(type(self))
         if result.ndim > 0:
             result._mask = self._mask
@@ -5482,7 +5606,9 @@ class MaskedArray(ndarray):
         # No explicit output: we're done
         if out is None:
             return result
-        if isinstance(out, MaskedArray):
+        if stored_out is not None:
+            # We got in a masked array originally, so we need to return one
+            out = stored_out
             out.__setmask__(self._mask)
         return out
 
@@ -5870,7 +5996,7 @@ class MaskedArray(ndarray):
             "`mini` is deprecated; use the `min` method or "
             "`np.ma.minimum.reduce instead.",
             DeprecationWarning, stacklevel=2)
-        return minimum.reduce(self, axis)
+        return MaskedArray(np.min(self, axis))
 
     def max(self, axis=None, out=None, fill_value=None, keepdims=np._NoValue):
         """
@@ -6034,13 +6160,13 @@ class MaskedArray(ndarray):
         warnings.warn("Warning: 'partition' will ignore the 'mask' "
                       f"of the {self.__class__.__name__}.",
                       stacklevel=2)
-        return super().partition(*args, **kwargs)
+        return self.view(np.ndarray).partition(*args, **kwargs)
 
     def argpartition(self, *args, **kwargs):
         warnings.warn("Warning: 'argpartition' will ignore the 'mask' "
                       f"of the {self.__class__.__name__}.",
                       stacklevel=2)
-        return super().argpartition(*args, **kwargs)
+        return self.view(np.ndarray).argpartition(*args, **kwargs)
 
     def take(self, indices, axis=None, out=None, mode='raise'):
         """
@@ -6730,7 +6856,7 @@ class _extrema_operation(_MaskedUFunc):
             return self.reduce(a)
         return where(self.compare(a, b), a, b)
 
-    def reduce(self, target, axis=np._NoValue):
+    def reduce(self, target, axis=np._NoValue, **kwargs):
         "Reduce target along the given axis."
         target = narray(target, copy=False, subok=True)
         m = getmask(target)
@@ -6745,12 +6871,10 @@ class _extrema_operation(_MaskedUFunc):
             axis = None
 
         if axis is not np._NoValue:
-            kwargs = dict(axis=axis)
-        else:
-            kwargs = dict()
+            kwargs['axis'] = axis
 
         if m is nomask:
-            t = self.f.reduce(target, **kwargs)
+            t = self.f.reduce(target.view(np.ndarray), **kwargs)
         else:
             target = target.filled(
                 self.fill_value_func(target)).view(type(target))
@@ -7954,6 +8078,23 @@ def allclose(a, b, masked_equal=True, rtol=1e-5, atol=1e-8):
     """
     x = masked_array(a, copy=False)
     y = masked_array(b, copy=False)
+    if masked_equal:
+        # Apply the combined mask right away to avoid comparisons at the
+        # masked locations (assumed mask is True)
+        m = mask_or(getmask(x), getmask(y))
+        # Expand scalars to the proper dimension for comparison if needed
+        if shape(x) != shape(y):
+            if size(x) == 1:
+                # scalar a
+                x = masked_array(np.ones(shape=shape(y))*x, mask=m)
+            elif size(y) == 1:
+                # scalar b
+                y = masked_array(np.ones(shape=shape(x))*y, mask=m)
+            else:
+                raise ValueError("Cannot compare arrays of different shapes.")
+        else:
+            x = masked_array(a, copy=False, mask=m)
+            y = masked_array(b, copy=False, mask=m)
 
     # make sure y is an inexact type to avoid abs(MIN_INT); will cause
     # casting of x later.
@@ -7966,8 +8107,7 @@ def allclose(a, b, masked_equal=True, rtol=1e-5, atol=1e-8):
         if y.dtype != dtype:
             y = masked_array(y, dtype=dtype, copy=False)
 
-    m = mask_or(getmask(x), getmask(y))
-    xinf = np.isinf(masked_array(x, copy=False, mask=m)).filled(False)
+    xinf = filled(np.isinf(x), False)
     # If we have some infs, they should fall at the same place.
     if not np.all(xinf == filled(np.isinf(y), False)):
         return False
diff --git a/numpy/ma/extras.py b/numpy/ma/extras.py
index d2986012b..b3016da5a 100644
--- a/numpy/ma/extras.py
+++ b/numpy/ma/extras.py
@@ -588,8 +588,8 @@ def average(a, axis=None, weights=None, returned=False, *,
     >>> avg, sumweights = np.ma.average(x, axis=0, weights=[1, 2, 3],
     ...                                 returned=True)
     >>> avg
-    masked_array(data=[2.6666666666666665, 3.6666666666666665],
-                 mask=[False, False],
+    masked_array(data=[2.66666667, 3.66666667],
+                 mask=False,
            fill_value=1e+20)
 
     With ``keepdims=True``, the following result has shape (3, 1).
@@ -2016,8 +2016,15 @@ def polyfit(x, y, deg, rcond=None, full=False, w=None, cov=False):
         not_m = ~m
         if w is not None:
             w = w[not_m]
-        return np.polyfit(x[not_m], y[not_m], deg, rcond, full, w, cov)
-    else:
-        return np.polyfit(x, y, deg, rcond, full, w, cov)
+        x = x[not_m]
+        y = y[not_m]
+
+    # Only pass the ndarray data
+    if w is not None:
+        w = w.view(np.ndarray)
+    x = x.view(np.ndarray)
+    y = y.view(np.ndarray)
+
+    return np.polyfit(x, y, deg, rcond, full, w, cov)
 
 polyfit.__doc__ = ma.doc_note(np.polyfit.__doc__, polyfit.__doc__)
diff --git a/numpy/ma/tests/test_core.py b/numpy/ma/tests/test_core.py
index b056d5169..5b779edcb 100644
--- a/numpy/ma/tests/test_core.py
+++ b/numpy/ma/tests/test_core.py
@@ -3208,7 +3208,7 @@ class TestMaskedArrayMethods:
         assert_equal(b.fill_value, 9999)
         assert_equal(b, a[condition])
 
-        condition = (a < 4.)
+        condition = (a.data < 4.)
         b = a.compress(condition)
         assert_equal(b._data, [1., 2., 3.])
         assert_equal(b._mask, [0, 0, 1])
@@ -5367,7 +5367,7 @@ def test_ufunc_with_out_varied():
     a        = array([ 1,  2,  3], mask=[1, 0, 0])
     b        = array([10, 20, 30], mask=[1, 0, 0])
     out      = array([ 0,  0,  0], mask=[0, 0, 1])
-    expected = array([11, 22, 33], mask=[1, 0, 0])
+    expected = array([1, 22, 33], mask=[1, 0, 0])
 
     out_pos = out.copy()
     res_pos = np.add(a, b, out_pos)
author	Greg Lucas <greg.lucas@lasp.colorado.edu>	2020-04-15 10:28:02 -0600
committer	Greg Lucas <greg.m.lucas@gmail.com>	2022-07-13 10:57:09 -0600
commit	8cd6f4ca00b6e0da3833fc267d50067b2ddbc069 (patch)
tree	56f599a1c12b28de18a7ad3b77c4bd187b58aec8 /numpy
parent	37846563156b3ed8287fc94d6a7d107a316c5ecf (diff)
download	numpy-8cd6f4ca00b6e0da3833fc267d50067b2ddbc069.tar.gz