summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhillip J. Wolfram <phillipwolfram@gmail.com>2016-03-16 16:02:12 -0600
committerPhillip J. Wolfram <phillipwolfram@gmail.com>2016-03-24 15:08:56 -0600
commita76b8728a71721f0b92c5fa7c0cbcc0f41cceb3e (patch)
treec03c870272b63b3d811dff8d0aaa5ca126a42025
parent1429c606643d1ad305e710c4a31cb6f398d04c53 (diff)
downloadnumpy-a76b8728a71721f0b92c5fa7c0cbcc0f41cceb3e.tar.gz
ENH: adds np.nancumsum and np.nancumprod
This PR adds an implementation of `nancumsum` and `nancumprod`. The actual function is a two-liner adapted from `nansum`. Its structure is adapted from PR: https://github.com/numpy/numpy/pull/5418/
-rw-r--r--doc/release/1.12.0-notes.rst5
-rw-r--r--doc/source/reference/routines.math.rst2
-rw-r--r--numpy/lib/nanfunctions.py154
-rw-r--r--numpy/lib/tests/test_nanfunctions.py107
4 files changed, 252 insertions, 16 deletions
diff --git a/doc/release/1.12.0-notes.rst b/doc/release/1.12.0-notes.rst
index 38e5e4338..84165b391 100644
--- a/doc/release/1.12.0-notes.rst
+++ b/doc/release/1.12.0-notes.rst
@@ -125,6 +125,11 @@ Add a hook in ``numpy/__init__.py`` to import a ``numpy/_distributor_init.py``
file that will remain empty (bar a docstring) in the standard numpy source,
but that can be overwritten by people making binary distributions of numpy.
+New nanfunctions ``nancumsum`` and ``nancumprod`` added
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Nanfunctions ``nancumsum`` and ``nancumprod`` have been added to
+compute ``cumsum`` and ``cumprod`` by ignoring nans.
+
Improvements
============
diff --git a/doc/source/reference/routines.math.rst b/doc/source/reference/routines.math.rst
index c0be4096a..5cb1e0eec 100644
--- a/doc/source/reference/routines.math.rst
+++ b/doc/source/reference/routines.math.rst
@@ -58,6 +58,8 @@ Sums, products, differences
nansum
cumprod
cumsum
+ nancumprod
+ nancumsum
diff
ediff1d
gradient
diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py
index b963abb21..9d3640647 100644
--- a/numpy/lib/nanfunctions.py
+++ b/numpy/lib/nanfunctions.py
@@ -10,6 +10,8 @@ Functions
- `nanargmax` -- index of maximum non-NaN value
- `nansum` -- sum of non-NaN values
- `nanprod` -- product of non-NaN values
+- `nancumsum` -- cumulative sum of non-NaN values
+- `nancumprod` -- cumulative product of non-NaN values
- `nanmean` -- mean of non-NaN values
- `nanvar` -- variance of non-NaN values
- `nanstd` -- standard deviation of non-NaN values
@@ -27,6 +29,7 @@ from numpy.lib.function_base import _ureduce as _ureduce
__all__ = [
'nansum', 'nanmax', 'nanmin', 'nanargmax', 'nanargmin', 'nanmean',
'nanmedian', 'nanpercentile', 'nanvar', 'nanstd', 'nanprod',
+ 'nancumsum', 'nancumprod'
]
@@ -493,7 +496,11 @@ def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
Returns
-------
- y : ndarray or numpy scalar
+ nansum : ndarray.
+ A new array holding the result is returned unless `out` is
+ specified, in which it is returned. The result has the same
+ size as `a`, and the same shape as `a` if `axis` is not None
+ or `a` is a 1-d array.
See Also
--------
@@ -506,11 +513,6 @@ def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
If both positive and negative infinity are present, the sum will be Not
A Number (NaN).
- Numpy integer arithmetic is modular. If the size of a sum exceeds the
- size of an integer accumulator, its value will wrap around and the
- result will be incorrect. Specifying ``dtype=double`` can alleviate
- that problem.
-
Examples
--------
>>> np.nansum(1)
@@ -539,7 +541,7 @@ def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
"""
Return the product of array elements over a given axis treating Not a
- Numbers (NaNs) as zero.
+ Numbers (NaNs) as ones.
One is returned for slices that are all-NaN or empty.
@@ -573,20 +575,15 @@ def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
Returns
-------
- y : ndarray or numpy scalar
+ nanprod : ndarray
+ A new array holding the result is returned unless `out` is
+ specified, in which case it is returned.
See Also
--------
numpy.prod : Product across array propagating NaNs.
isnan : Show which elements are NaN.
- Notes
- -----
- Numpy integer arithmetic is modular. If the size of a product exceeds
- the size of an integer accumulator, its value will wrap around and the
- result will be incorrect. Specifying ``dtype=double`` can alleviate
- that problem.
-
Examples
--------
>>> np.nanprod(1)
@@ -606,6 +603,133 @@ def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
return np.prod(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims)
+def nancumsum(a, axis=None, dtype=None, out=None):
+ """
+ Return the cumulative sum of array elements over a given axis treating Not a
+ Numbers (NaNs) as zero. The cumulative sum does not change when NaNs are
+ encountered and leading NaNs are replaced by zeros.
+
+ Zeros are returned for slices that are all-NaN or empty.
+
+ .. versionadded:: 1.12.0
+
+ Parameters
+ ----------
+ a : array_like
+ Input array.
+ axis : int, optional
+ Axis along which the cumulative sum is computed. The default
+ (None) is to compute the cumsum over the flattened array.
+ dtype : dtype, optional
+ Type of the returned array and of the accumulator in which the
+ elements are summed. If `dtype` is not specified, it defaults
+ to the dtype of `a`, unless `a` has an integer dtype with a
+ precision less than that of the default platform integer. In
+ that case, the default platform integer is used.
+ out : ndarray, optional
+ Alternative output array in which to place the result. It must
+ have the same shape and buffer length as the expected output
+ but the type will be cast if necessary. See `doc.ufuncs`
+ (Section "Output arguments") for more details.
+
+ Returns
+ -------
+ nancumsum : ndarray.
+ A new array holding the result is returned unless `out` is
+ specified, in which it is returned. The result has the same
+ size as `a`, and the same shape as `a` if `axis` is not None
+ or `a` is a 1-d array.
+
+ See Also
+ --------
+ numpy.cumsum : Cumulative sum across array propagating NaNs.
+ isnan : Show which elements are NaN.
+
+ Examples
+ --------
+ >>> np.nancumsum(1)
+ array([1])
+ >>> np.nancumsum([1])
+ array([1])
+ >>> np.nancumsum([1, np.nan])
+ array([ 1., 1.])
+ >>> a = np.array([[1, 2], [3, np.nan]])
+ >>> np.nancumsum(a)
+ array([ 1., 3., 6., 6.])
+ >>> np.nancumsum(a, axis=0)
+ array([[ 1., 2.],
+ [ 4., 2.]])
+ >>> np.nancumsum(a, axis=1)
+ array([[ 1., 3.],
+ [ 3., 3.]])
+
+ """
+ a, mask = _replace_nan(a, 0)
+ return np.cumsum(a, axis=axis, dtype=dtype, out=out)
+
+
+def nancumprod(a, axis=None, dtype=None, out=None):
+ """
+ Return the cumulative product of array elements over a given axis treating Not a
+ Numbers (NaNs) as one. The cumulative product does not change when NaNs are
+ encountered and leading NaNs are replaced by ones.
+
+ Ones are returned for slices that are all-NaN or empty.
+
+ .. versionadded:: 1.12.0
+
+ Parameters
+ ----------
+ a : array_like
+ Input array.
+ axis : int, optional
+ Axis along which the cumulative product is computed. By default
+ the input is flattened.
+ dtype : dtype, optional
+ Type of the returned array, as well as of the accumulator in which
+ the elements are multiplied. If *dtype* is not specified, it
+ defaults to the dtype of `a`, unless `a` has an integer dtype with
+ a precision less than that of the default platform integer. In
+ that case, the default platform integer is used instead.
+ out : ndarray, optional
+ Alternative output array in which to place the result. It must
+ have the same shape and buffer length as the expected output
+ but the type of the resulting values will be cast if necessary.
+
+ Returns
+ -------
+ nancumprod : ndarray
+ A new array holding the result is returned unless `out` is
+ specified, in which case it is returned.
+
+ See Also
+ --------
+ numpy.cumprod : Cumulative product across array propagating NaNs.
+ isnan : Show which elements are NaN.
+
+ Examples
+ --------
+ >>> np.nancumprod(1)
+ array([1])
+ >>> np.nancumprod([1])
+ array([1])
+ >>> np.nancumprod([1, np.nan])
+ array([ 1., 1.])
+ >>> a = np.array([[1, 2], [3, np.nan]])
+ >>> np.nancumprod(a)
+ array([ 1., 2., 6., 6.])
+ >>> np.nancumprod(a, axis=0)
+ array([[ 1., 2.],
+ [ 3., 2.]])
+ >>> np.nancumprod(a, axis=1)
+ array([[ 1., 2.],
+ [ 3., 3.]])
+
+ """
+ a, mask = _replace_nan(a, 1)
+ return np.cumprod(a, axis=axis, dtype=dtype, out=out)
+
+
def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
"""
Compute the arithmetic mean along the specified axis, ignoring NaNs.
diff --git a/numpy/lib/tests/test_nanfunctions.py b/numpy/lib/tests/test_nanfunctions.py
index 989c563d9..03f9beff6 100644
--- a/numpy/lib/tests/test_nanfunctions.py
+++ b/numpy/lib/tests/test_nanfunctions.py
@@ -5,7 +5,7 @@ import warnings
import numpy as np
from numpy.testing import (
run_module_suite, TestCase, assert_, assert_equal, assert_almost_equal,
- assert_raises, assert_array_equal
+ assert_warns, assert_no_warnings, assert_raises, assert_array_equal
)
@@ -22,6 +22,18 @@ _rdat = [np.array([0.6244, 0.2692, 0.0116, 0.1170]),
np.array([0.1042, -0.5954]),
np.array([0.1610, 0.1859, 0.3146])]
+# Rows of _ndat with nans converted to ones
+_ndat_ones = np.array([[0.6244, 1.0, 0.2692, 0.0116, 1.0, 0.1170],
+ [0.5351, -0.9403, 1.0, 0.2100, 0.4759, 0.2833],
+ [1.0, 1.0, 1.0, 0.1042, 1.0, -0.5954],
+ [0.1610, 1.0, 1.0, 0.1859, 0.3146, 1.0]])
+
+# Rows of _ndat with nans converted to zeros
+_ndat_zeros = np.array([[0.6244, 0.0, 0.2692, 0.0116, 0.0, 0.1170],
+ [0.5351, -0.9403, 0.0, 0.2100, 0.4759, 0.2833],
+ [0.0, 0.0, 0.0, 0.1042, 0.0, -0.5954],
+ [0.1610, 0.0, 0.0, 0.1859, 0.3146, 0.0]])
+
class TestNanFunctions_MinMax(TestCase):
@@ -241,6 +253,16 @@ class TestNanFunctions_IntTypes(TestCase):
for mat in self.integer_arrays():
assert_equal(np.nanprod(mat), tgt)
+ def test_nancumsum(self):
+ tgt = np.cumsum(self.mat)
+ for mat in self.integer_arrays():
+ assert_equal(np.nancumsum(mat), tgt)
+
+ def test_nancumprod(self):
+ tgt = np.cumprod(self.mat)
+ for mat in self.integer_arrays():
+ assert_equal(np.nancumprod(mat), tgt)
+
def test_nanmean(self):
tgt = np.mean(self.mat)
for mat in self.integer_arrays():
@@ -388,6 +410,89 @@ class TestNanFunctions_SumProd(TestCase, SharedNanFunctionsTestsMixin):
assert_equal(res, tgt)
+class TestNanFunctions_CumSumProd(TestCase, SharedNanFunctionsTestsMixin):
+
+ nanfuncs = [np.nancumsum, np.nancumprod]
+ stdfuncs = [np.cumsum, np.cumprod]
+
+ def test_allnans(self):
+ for f, tgt_value in zip(self.nanfuncs, [0, 1]):
+ # Unlike other nan-functions, sum/prod/cumsum/cumprod don't warn on all nan input
+ with assert_no_warnings():
+ res = f([np.nan]*3, axis=None)
+ tgt = tgt_value*np.ones((3))
+ assert_(np.array_equal(res, tgt), 'result is not %s * np.ones((3))' % (tgt_value))
+ # Check scalar
+ res = f(np.nan)
+ tgt = tgt_value*np.ones((1))
+ assert_(np.array_equal(res, tgt), 'result is not %s * np.ones((1))' % (tgt_value))
+ # Check there is no warning for not all-nan
+ f([0]*3, axis=None)
+
+ def test_empty(self):
+ for f, tgt_value in zip(self.nanfuncs, [0, 1]):
+ mat = np.zeros((0, 3))
+ tgt = tgt_value*np.ones((0, 3))
+ res = f(mat, axis=0)
+ assert_equal(res, tgt)
+ tgt = mat
+ res = f(mat, axis=1)
+ assert_equal(res, tgt)
+ tgt = np.zeros((0))
+ res = f(mat, axis=None)
+ assert_equal(res, tgt)
+
+ def test_keepdims(self):
+ for f, g in zip(self.nanfuncs, self.stdfuncs):
+ mat = np.eye(3)
+ for axis in [None, 0, 1]:
+ tgt = f(mat, axis=axis, out=None)
+ res = g(mat, axis=axis, out=None)
+ assert_(res.ndim == tgt.ndim)
+
+ for f in self.nanfuncs:
+ d = np.ones((3, 5, 7, 11))
+ # Randomly set some elements to NaN:
+ rs = np.random.RandomState(0)
+ d[rs.rand(*d.shape) < 0.5] = np.nan
+ res = f(d, axis=None)
+ assert_equal(res.shape, (1155,))
+ for axis in np.arange(4):
+ res = f(d, axis=axis)
+ assert_equal(res.shape, (3, 5, 7, 11))
+
+ def test_matrices(self):
+ # Check that it works and that type and
+ # shape are preserved
+ mat = np.matrix(np.eye(3))
+ for f in self.nanfuncs:
+ for axis in np.arange(2):
+ res = f(mat, axis=axis)
+ assert_(isinstance(res, np.matrix))
+ assert_(res.shape == (3, 3))
+ res = f(mat)
+ assert_(res.shape == (1, 3*3))
+
+ def test_result_values(self):
+ for axis in (-2, -1, 0, 1, None):
+ tgt = np.cumprod(_ndat_ones, axis=axis)
+ res = np.nancumprod(_ndat, axis=axis)
+ assert_almost_equal(res, tgt)
+ tgt = np.cumsum(_ndat_zeros,axis=axis)
+ res = np.nancumsum(_ndat, axis=axis)
+ assert_almost_equal(res, tgt)
+
+ def test_out(self):
+ mat = np.eye(3)
+ for nf, rf in zip(self.nanfuncs, self.stdfuncs):
+ resout = np.eye(3)
+ for axis in (-2, -1, 0, 1):
+ tgt = rf(mat, axis=axis)
+ res = nf(mat, axis=axis, out=resout)
+ assert_almost_equal(res, resout)
+ assert_almost_equal(res, tgt)
+
+
class TestNanFunctions_MeanVarStd(TestCase, SharedNanFunctionsTestsMixin):
nanfuncs = [np.nanmean, np.nanvar, np.nanstd]