Merge pull request #7421 from pwolfram/nancumsumprod

ENH: adds np.nancumsum and np.nancumprod
author: Stephan Hoyer <shoyer@gmail.com> 2016-03-26 11:59:37 -0700
committer: Stephan Hoyer <shoyer@gmail.com> 2016-03-26 11:59:37 -0700
commit: ef389eec83d23e9159c595f6ea8d78e8e7abbae3 (patch)
tree: d904dd1e5eb1dafe7f2cae4bf091b29bf6d9cbe6
parent: 4b200d27e9bfba4b28d51cd5d5132a2a4cacb8f1 (diff)
parent: a76b8728a71721f0b92c5fa7c0cbcc0f41cceb3e (diff)
download: numpy-ef389eec83d23e9159c595f6ea8d78e8e7abbae3.tar.gz
4 files changed, 252 insertions, 16 deletions
diff --git a/doc/release/1.12.0-notes.rst b/doc/release/1.12.0-notes.rst
index 97ec25777..058bdaac7 100644
--- a/doc/release/1.12.0-notes.rst
+++ b/doc/release/1.12.0-notes.rst
@@ -127,6 +127,11 @@ Add a hook in ``numpy/__init__.py`` to import a ``numpy/_distributor_init.py``
 file that will remain empty (bar a docstring) in the standard numpy source,
 but that can be overwritten by people making binary distributions of numpy.
 
+New nanfunctions ``nancumsum`` and ``nancumprod`` added
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Nanfunctions ``nancumsum`` and ``nancumprod`` have been added to
+compute ``cumsum`` and ``cumprod`` by ignoring nans.
+
 Improvements
 ============
 
diff --git a/doc/source/reference/routines.math.rst b/doc/source/reference/routines.math.rst
index c0be4096a..5cb1e0eec 100644
--- a/doc/source/reference/routines.math.rst
+++ b/doc/source/reference/routines.math.rst
@@ -58,6 +58,8 @@ Sums, products, differences
    nansum
    cumprod
    cumsum
+   nancumprod
+   nancumsum
    diff
    ediff1d
    gradient
diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py
index b963abb21..9d3640647 100644
--- a/numpy/lib/nanfunctions.py
+++ b/numpy/lib/nanfunctions.py
@@ -10,6 +10,8 @@ Functions
 - `nanargmax` -- index of maximum non-NaN value
 - `nansum` -- sum of non-NaN values
 - `nanprod` -- product of non-NaN values
+- `nancumsum` -- cumulative sum of non-NaN values
+- `nancumprod` -- cumulative product of non-NaN values
 - `nanmean` -- mean of non-NaN values
 - `nanvar` -- variance of non-NaN values
 - `nanstd` -- standard deviation of non-NaN values
@@ -27,6 +29,7 @@ from numpy.lib.function_base import _ureduce as _ureduce
 __all__ = [
     'nansum', 'nanmax', 'nanmin', 'nanargmax', 'nanargmin', 'nanmean',
     'nanmedian', 'nanpercentile', 'nanvar', 'nanstd', 'nanprod',
+    'nancumsum', 'nancumprod'
     ]
 
 
@@ -493,7 +496,11 @@ def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
 
     Returns
     -------
-    y : ndarray or numpy scalar
+    nansum : ndarray.
+        A new array holding the result is returned unless `out` is
+        specified, in which it is returned. The result has the same
+        size as `a`, and the same shape as `a` if `axis` is not None
+        or `a` is a 1-d array.
 
     See Also
     --------
@@ -506,11 +513,6 @@ def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
     If both positive and negative infinity are present, the sum will be Not
     A Number (NaN).
 
-    Numpy integer arithmetic is modular. If the size of a sum exceeds the
-    size of an integer accumulator, its value will wrap around and the
-    result will be incorrect. Specifying ``dtype=double`` can alleviate
-    that problem.
-
     Examples
     --------
     >>> np.nansum(1)
@@ -539,7 +541,7 @@ def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
 def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
     """
     Return the product of array elements over a given axis treating Not a
-    Numbers (NaNs) as zero.
+    Numbers (NaNs) as ones.
 
     One is returned for slices that are all-NaN or empty.
 
@@ -573,20 +575,15 @@ def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
 
     Returns
     -------
-    y : ndarray or numpy scalar
+    nanprod : ndarray
+        A new array holding the result is returned unless `out` is
+        specified, in which case it is returned.
 
     See Also
     --------
     numpy.prod : Product across array propagating NaNs.
     isnan : Show which elements are NaN.
 
-    Notes
-    -----
-    Numpy integer arithmetic is modular. If the size of a product exceeds
-    the size of an integer accumulator, its value will wrap around and the
-    result will be incorrect. Specifying ``dtype=double`` can alleviate
-    that problem.
-
     Examples
     --------
     >>> np.nanprod(1)
@@ -606,6 +603,133 @@ def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
     return np.prod(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims)
 
 
+def nancumsum(a, axis=None, dtype=None, out=None):
+    """
+    Return the cumulative sum of array elements over a given axis treating Not a
+    Numbers (NaNs) as zero.  The cumulative sum does not change when NaNs are
+    encountered and leading NaNs are replaced by zeros.
+
+    Zeros are returned for slices that are all-NaN or empty.
+
+    .. versionadded:: 1.12.0
+
+    Parameters
+    ----------
+    a : array_like
+        Input array.
+    axis : int, optional
+        Axis along which the cumulative sum is computed. The default
+        (None) is to compute the cumsum over the flattened array.
+    dtype : dtype, optional
+        Type of the returned array and of the accumulator in which the
+        elements are summed.  If `dtype` is not specified, it defaults
+        to the dtype of `a`, unless `a` has an integer dtype with a
+        precision less than that of the default platform integer.  In
+        that case, the default platform integer is used.
+    out : ndarray, optional
+        Alternative output array in which to place the result. It must
+        have the same shape and buffer length as the expected output
+        but the type will be cast if necessary. See `doc.ufuncs`
+        (Section "Output arguments") for more details.
+
+    Returns
+    -------
+    nancumsum : ndarray.
+        A new array holding the result is returned unless `out` is
+        specified, in which it is returned. The result has the same
+        size as `a`, and the same shape as `a` if `axis` is not None
+        or `a` is a 1-d array.
+
+    See Also
+    --------
+    numpy.cumsum : Cumulative sum across array propagating NaNs.
+    isnan : Show which elements are NaN.
+
+    Examples
+    --------
+    >>> np.nancumsum(1)
+    array([1])
+    >>> np.nancumsum([1])
+    array([1])
+    >>> np.nancumsum([1, np.nan])
+    array([ 1.,  1.])
+    >>> a = np.array([[1, 2], [3, np.nan]])
+    >>> np.nancumsum(a)
+    array([ 1.,  3.,  6.,  6.])
+    >>> np.nancumsum(a, axis=0)
+    array([[ 1.,  2.],
+           [ 4.,  2.]])
+    >>> np.nancumsum(a, axis=1)
+    array([[ 1.,  3.],
+           [ 3.,  3.]])
+
+    """
+    a, mask = _replace_nan(a, 0)
+    return np.cumsum(a, axis=axis, dtype=dtype, out=out)
+
+
+def nancumprod(a, axis=None, dtype=None, out=None):
+    """
+    Return the cumulative product of array elements over a given axis treating Not a
+    Numbers (NaNs) as one.  The cumulative product does not change when NaNs are
+    encountered and leading NaNs are replaced by ones.
+
+    Ones are returned for slices that are all-NaN or empty.
+
+    .. versionadded:: 1.12.0
+
+    Parameters
+    ----------
+    a : array_like
+        Input array.
+    axis : int, optional
+        Axis along which the cumulative product is computed.  By default
+        the input is flattened.
+    dtype : dtype, optional
+        Type of the returned array, as well as of the accumulator in which
+        the elements are multiplied.  If *dtype* is not specified, it
+        defaults to the dtype of `a`, unless `a` has an integer dtype with
+        a precision less than that of the default platform integer.  In
+        that case, the default platform integer is used instead.
+    out : ndarray, optional
+        Alternative output array in which to place the result. It must
+        have the same shape and buffer length as the expected output
+        but the type of the resulting values will be cast if necessary.
+
+    Returns
+    -------
+    nancumprod : ndarray
+        A new array holding the result is returned unless `out` is
+        specified, in which case it is returned.
+
+    See Also
+    --------
+    numpy.cumprod : Cumulative product across array propagating NaNs.
+    isnan : Show which elements are NaN.
+
+    Examples
+    --------
+    >>> np.nancumprod(1)
+    array([1])
+    >>> np.nancumprod([1])
+    array([1])
+    >>> np.nancumprod([1, np.nan])
+    array([ 1.,  1.])
+    >>> a = np.array([[1, 2], [3, np.nan]])
+    >>> np.nancumprod(a)
+    array([ 1.,  2.,  6.,  6.])
+    >>> np.nancumprod(a, axis=0)
+    array([[ 1.,  2.],
+           [ 3.,  2.]])
+    >>> np.nancumprod(a, axis=1)
+    array([[ 1.,  2.],
+           [ 3.,  3.]])
+
+    """
+    a, mask = _replace_nan(a, 1)
+    return np.cumprod(a, axis=axis, dtype=dtype, out=out)
+
+
 def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
     """
     Compute the arithmetic mean along the specified axis, ignoring NaNs.
diff --git a/numpy/lib/tests/test_nanfunctions.py b/numpy/lib/tests/test_nanfunctions.py
index 989c563d9..03f9beff6 100644
--- a/numpy/lib/tests/test_nanfunctions.py
+++ b/numpy/lib/tests/test_nanfunctions.py
@@ -5,7 +5,7 @@ import warnings
 import numpy as np
 from numpy.testing import (
     run_module_suite, TestCase, assert_, assert_equal, assert_almost_equal,
-    assert_raises, assert_array_equal
+    assert_warns, assert_no_warnings, assert_raises, assert_array_equal
     )
 
 
@@ -22,6 +22,18 @@ _rdat = [np.array([0.6244, 0.2692, 0.0116, 0.1170]),
          np.array([0.1042, -0.5954]),
          np.array([0.1610, 0.1859, 0.3146])]
 
+# Rows of _ndat with nans converted to ones
+_ndat_ones = np.array([[0.6244, 1.0, 0.2692, 0.0116, 1.0, 0.1170],
+                       [0.5351, -0.9403, 1.0, 0.2100, 0.4759, 0.2833],
+                       [1.0, 1.0, 1.0, 0.1042, 1.0, -0.5954],
+                       [0.1610, 1.0, 1.0, 0.1859, 0.3146, 1.0]])
+
+# Rows of _ndat with nans converted to zeros
+_ndat_zeros = np.array([[0.6244, 0.0, 0.2692, 0.0116, 0.0, 0.1170],
+                        [0.5351, -0.9403, 0.0, 0.2100, 0.4759, 0.2833],
+                        [0.0, 0.0, 0.0, 0.1042, 0.0, -0.5954],
+                        [0.1610, 0.0, 0.0, 0.1859, 0.3146, 0.0]])
+
 
 class TestNanFunctions_MinMax(TestCase):
 
@@ -241,6 +253,16 @@ class TestNanFunctions_IntTypes(TestCase):
         for mat in self.integer_arrays():
             assert_equal(np.nanprod(mat), tgt)
 
+    def test_nancumsum(self):
+        tgt = np.cumsum(self.mat)
+        for mat in self.integer_arrays():
+            assert_equal(np.nancumsum(mat), tgt)
+
+    def test_nancumprod(self):
+        tgt = np.cumprod(self.mat)
+        for mat in self.integer_arrays():
+            assert_equal(np.nancumprod(mat), tgt)
+
     def test_nanmean(self):
         tgt = np.mean(self.mat)
         for mat in self.integer_arrays():
@@ -388,6 +410,89 @@ class TestNanFunctions_SumProd(TestCase, SharedNanFunctionsTestsMixin):
             assert_equal(res, tgt)
 
 
+class TestNanFunctions_CumSumProd(TestCase, SharedNanFunctionsTestsMixin):
+
+    nanfuncs = [np.nancumsum, np.nancumprod]
+    stdfuncs = [np.cumsum, np.cumprod]
+
+    def test_allnans(self):
+        for f, tgt_value in zip(self.nanfuncs, [0, 1]):
+            # Unlike other nan-functions, sum/prod/cumsum/cumprod don't warn on all nan input
+            with assert_no_warnings():
+                res = f([np.nan]*3, axis=None)
+                tgt = tgt_value*np.ones((3))
+                assert_(np.array_equal(res, tgt), 'result is not %s * np.ones((3))' % (tgt_value))
+                # Check scalar
+                res = f(np.nan)
+                tgt = tgt_value*np.ones((1))
+                assert_(np.array_equal(res, tgt), 'result is not %s * np.ones((1))' % (tgt_value))
+                # Check there is no warning for not all-nan
+                f([0]*3, axis=None)
+
+    def test_empty(self):
+        for f, tgt_value in zip(self.nanfuncs, [0, 1]):
+            mat = np.zeros((0, 3))
+            tgt = tgt_value*np.ones((0, 3))
+            res = f(mat, axis=0)
+            assert_equal(res, tgt)
+            tgt = mat
+            res = f(mat, axis=1)
+            assert_equal(res, tgt)
+            tgt = np.zeros((0))
+            res = f(mat, axis=None)
+            assert_equal(res, tgt)
+
+    def test_keepdims(self):
+        for f, g in zip(self.nanfuncs, self.stdfuncs):
+            mat = np.eye(3)
+            for axis in [None, 0, 1]:
+                tgt = f(mat, axis=axis, out=None)
+                res = g(mat, axis=axis, out=None)
+                assert_(res.ndim == tgt.ndim)
+
+        for f in self.nanfuncs:
+            d = np.ones((3, 5, 7, 11))
+            # Randomly set some elements to NaN:
+            rs = np.random.RandomState(0)
+            d[rs.rand(*d.shape) < 0.5] = np.nan
+            res = f(d, axis=None)
+            assert_equal(res.shape, (1155,))
+            for axis in np.arange(4):
+                res = f(d, axis=axis)
+                assert_equal(res.shape, (3, 5, 7, 11))
+
+    def test_matrices(self):
+        # Check that it works and that type and
+        # shape are preserved
+        mat = np.matrix(np.eye(3))
+        for f in self.nanfuncs:
+            for axis in np.arange(2):
+                res = f(mat, axis=axis)
+                assert_(isinstance(res, np.matrix))
+                assert_(res.shape == (3, 3))
+            res = f(mat)
+            assert_(res.shape == (1, 3*3))
+
+    def test_result_values(self):
+        for axis in (-2, -1, 0, 1, None):
+            tgt = np.cumprod(_ndat_ones, axis=axis)
+            res = np.nancumprod(_ndat, axis=axis)
+            assert_almost_equal(res, tgt)
+            tgt = np.cumsum(_ndat_zeros,axis=axis)
+            res = np.nancumsum(_ndat, axis=axis)
+            assert_almost_equal(res, tgt)
+
+    def test_out(self):
+        mat = np.eye(3)
+        for nf, rf in zip(self.nanfuncs, self.stdfuncs):
+            resout = np.eye(3)
+            for axis in (-2, -1, 0, 1):
+                tgt = rf(mat, axis=axis)
+                res = nf(mat, axis=axis, out=resout)
+                assert_almost_equal(res, resout)
+                assert_almost_equal(res, tgt)
+
+
 class TestNanFunctions_MeanVarStd(TestCase, SharedNanFunctionsTestsMixin):
 
     nanfuncs = [np.nanmean, np.nanvar, np.nanstd]
author	Stephan Hoyer <shoyer@gmail.com>	2016-03-26 11:59:37 -0700
committer	Stephan Hoyer <shoyer@gmail.com>	2016-03-26 11:59:37 -0700
commit	ef389eec83d23e9159c595f6ea8d78e8e7abbae3 (patch)
tree	d904dd1e5eb1dafe7f2cae4bf091b29bf6d9cbe6
parent	4b200d27e9bfba4b28d51cd5d5132a2a4cacb8f1 (diff)
parent	a76b8728a71721f0b92c5fa7c0cbcc0f41cceb3e (diff)
download	numpy-ef389eec83d23e9159c595f6ea8d78e8e7abbae3.tar.gz