diff options
author | Stephan Hoyer <shoyer@gmail.com> | 2016-03-26 11:59:37 -0700 |
---|---|---|
committer | Stephan Hoyer <shoyer@gmail.com> | 2016-03-26 11:59:37 -0700 |
commit | ef389eec83d23e9159c595f6ea8d78e8e7abbae3 (patch) | |
tree | d904dd1e5eb1dafe7f2cae4bf091b29bf6d9cbe6 | |
parent | 4b200d27e9bfba4b28d51cd5d5132a2a4cacb8f1 (diff) | |
parent | a76b8728a71721f0b92c5fa7c0cbcc0f41cceb3e (diff) | |
download | numpy-ef389eec83d23e9159c595f6ea8d78e8e7abbae3.tar.gz |
Merge pull request #7421 from pwolfram/nancumsumprod
ENH: adds np.nancumsum and np.nancumprod
-rw-r--r-- | doc/release/1.12.0-notes.rst | 5 | ||||
-rw-r--r-- | doc/source/reference/routines.math.rst | 2 | ||||
-rw-r--r-- | numpy/lib/nanfunctions.py | 154 | ||||
-rw-r--r-- | numpy/lib/tests/test_nanfunctions.py | 107 |
4 files changed, 252 insertions, 16 deletions
diff --git a/doc/release/1.12.0-notes.rst b/doc/release/1.12.0-notes.rst index 97ec25777..058bdaac7 100644 --- a/doc/release/1.12.0-notes.rst +++ b/doc/release/1.12.0-notes.rst @@ -127,6 +127,11 @@ Add a hook in ``numpy/__init__.py`` to import a ``numpy/_distributor_init.py`` file that will remain empty (bar a docstring) in the standard numpy source, but that can be overwritten by people making binary distributions of numpy. +New nanfunctions ``nancumsum`` and ``nancumprod`` added +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Nanfunctions ``nancumsum`` and ``nancumprod`` have been added to +compute ``cumsum`` and ``cumprod`` by ignoring nans. + Improvements ============ diff --git a/doc/source/reference/routines.math.rst b/doc/source/reference/routines.math.rst index c0be4096a..5cb1e0eec 100644 --- a/doc/source/reference/routines.math.rst +++ b/doc/source/reference/routines.math.rst @@ -58,6 +58,8 @@ Sums, products, differences nansum cumprod cumsum + nancumprod + nancumsum diff ediff1d gradient diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index b963abb21..9d3640647 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -10,6 +10,8 @@ Functions - `nanargmax` -- index of maximum non-NaN value - `nansum` -- sum of non-NaN values - `nanprod` -- product of non-NaN values +- `nancumsum` -- cumulative sum of non-NaN values +- `nancumprod` -- cumulative product of non-NaN values - `nanmean` -- mean of non-NaN values - `nanvar` -- variance of non-NaN values - `nanstd` -- standard deviation of non-NaN values @@ -27,6 +29,7 @@ from numpy.lib.function_base import _ureduce as _ureduce __all__ = [ 'nansum', 'nanmax', 'nanmin', 'nanargmax', 'nanargmin', 'nanmean', 'nanmedian', 'nanpercentile', 'nanvar', 'nanstd', 'nanprod', + 'nancumsum', 'nancumprod' ] @@ -493,7 +496,11 @@ def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): Returns ------- - y : ndarray or numpy scalar + nansum : ndarray. + A new array holding the result is returned unless `out` is + specified, in which it is returned. The result has the same + size as `a`, and the same shape as `a` if `axis` is not None + or `a` is a 1-d array. See Also -------- @@ -506,11 +513,6 @@ def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): If both positive and negative infinity are present, the sum will be Not A Number (NaN). - Numpy integer arithmetic is modular. If the size of a sum exceeds the - size of an integer accumulator, its value will wrap around and the - result will be incorrect. Specifying ``dtype=double`` can alleviate - that problem. - Examples -------- >>> np.nansum(1) @@ -539,7 +541,7 @@ def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): """ Return the product of array elements over a given axis treating Not a - Numbers (NaNs) as zero. + Numbers (NaNs) as ones. One is returned for slices that are all-NaN or empty. @@ -573,20 +575,15 @@ def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): Returns ------- - y : ndarray or numpy scalar + nanprod : ndarray + A new array holding the result is returned unless `out` is + specified, in which case it is returned. See Also -------- numpy.prod : Product across array propagating NaNs. isnan : Show which elements are NaN. - Notes - ----- - Numpy integer arithmetic is modular. If the size of a product exceeds - the size of an integer accumulator, its value will wrap around and the - result will be incorrect. Specifying ``dtype=double`` can alleviate - that problem. - Examples -------- >>> np.nanprod(1) @@ -606,6 +603,133 @@ def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): return np.prod(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims) +def nancumsum(a, axis=None, dtype=None, out=None): + """ + Return the cumulative sum of array elements over a given axis treating Not a + Numbers (NaNs) as zero. The cumulative sum does not change when NaNs are + encountered and leading NaNs are replaced by zeros. + + Zeros are returned for slices that are all-NaN or empty. + + .. versionadded:: 1.12.0 + + Parameters + ---------- + a : array_like + Input array. + axis : int, optional + Axis along which the cumulative sum is computed. The default + (None) is to compute the cumsum over the flattened array. + dtype : dtype, optional + Type of the returned array and of the accumulator in which the + elements are summed. If `dtype` is not specified, it defaults + to the dtype of `a`, unless `a` has an integer dtype with a + precision less than that of the default platform integer. In + that case, the default platform integer is used. + out : ndarray, optional + Alternative output array in which to place the result. It must + have the same shape and buffer length as the expected output + but the type will be cast if necessary. See `doc.ufuncs` + (Section "Output arguments") for more details. + + Returns + ------- + nancumsum : ndarray. + A new array holding the result is returned unless `out` is + specified, in which it is returned. The result has the same + size as `a`, and the same shape as `a` if `axis` is not None + or `a` is a 1-d array. + + See Also + -------- + numpy.cumsum : Cumulative sum across array propagating NaNs. + isnan : Show which elements are NaN. + + Examples + -------- + >>> np.nancumsum(1) + array([1]) + >>> np.nancumsum([1]) + array([1]) + >>> np.nancumsum([1, np.nan]) + array([ 1., 1.]) + >>> a = np.array([[1, 2], [3, np.nan]]) + >>> np.nancumsum(a) + array([ 1., 3., 6., 6.]) + >>> np.nancumsum(a, axis=0) + array([[ 1., 2.], + [ 4., 2.]]) + >>> np.nancumsum(a, axis=1) + array([[ 1., 3.], + [ 3., 3.]]) + + """ + a, mask = _replace_nan(a, 0) + return np.cumsum(a, axis=axis, dtype=dtype, out=out) + + +def nancumprod(a, axis=None, dtype=None, out=None): + """ + Return the cumulative product of array elements over a given axis treating Not a + Numbers (NaNs) as one. The cumulative product does not change when NaNs are + encountered and leading NaNs are replaced by ones. + + Ones are returned for slices that are all-NaN or empty. + + .. versionadded:: 1.12.0 + + Parameters + ---------- + a : array_like + Input array. + axis : int, optional + Axis along which the cumulative product is computed. By default + the input is flattened. + dtype : dtype, optional + Type of the returned array, as well as of the accumulator in which + the elements are multiplied. If *dtype* is not specified, it + defaults to the dtype of `a`, unless `a` has an integer dtype with + a precision less than that of the default platform integer. In + that case, the default platform integer is used instead. + out : ndarray, optional + Alternative output array in which to place the result. It must + have the same shape and buffer length as the expected output + but the type of the resulting values will be cast if necessary. + + Returns + ------- + nancumprod : ndarray + A new array holding the result is returned unless `out` is + specified, in which case it is returned. + + See Also + -------- + numpy.cumprod : Cumulative product across array propagating NaNs. + isnan : Show which elements are NaN. + + Examples + -------- + >>> np.nancumprod(1) + array([1]) + >>> np.nancumprod([1]) + array([1]) + >>> np.nancumprod([1, np.nan]) + array([ 1., 1.]) + >>> a = np.array([[1, 2], [3, np.nan]]) + >>> np.nancumprod(a) + array([ 1., 2., 6., 6.]) + >>> np.nancumprod(a, axis=0) + array([[ 1., 2.], + [ 3., 2.]]) + >>> np.nancumprod(a, axis=1) + array([[ 1., 2.], + [ 3., 3.]]) + + """ + a, mask = _replace_nan(a, 1) + return np.cumprod(a, axis=axis, dtype=dtype, out=out) + + def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): """ Compute the arithmetic mean along the specified axis, ignoring NaNs. diff --git a/numpy/lib/tests/test_nanfunctions.py b/numpy/lib/tests/test_nanfunctions.py index 989c563d9..03f9beff6 100644 --- a/numpy/lib/tests/test_nanfunctions.py +++ b/numpy/lib/tests/test_nanfunctions.py @@ -5,7 +5,7 @@ import warnings import numpy as np from numpy.testing import ( run_module_suite, TestCase, assert_, assert_equal, assert_almost_equal, - assert_raises, assert_array_equal + assert_warns, assert_no_warnings, assert_raises, assert_array_equal ) @@ -22,6 +22,18 @@ _rdat = [np.array([0.6244, 0.2692, 0.0116, 0.1170]), np.array([0.1042, -0.5954]), np.array([0.1610, 0.1859, 0.3146])] +# Rows of _ndat with nans converted to ones +_ndat_ones = np.array([[0.6244, 1.0, 0.2692, 0.0116, 1.0, 0.1170], + [0.5351, -0.9403, 1.0, 0.2100, 0.4759, 0.2833], + [1.0, 1.0, 1.0, 0.1042, 1.0, -0.5954], + [0.1610, 1.0, 1.0, 0.1859, 0.3146, 1.0]]) + +# Rows of _ndat with nans converted to zeros +_ndat_zeros = np.array([[0.6244, 0.0, 0.2692, 0.0116, 0.0, 0.1170], + [0.5351, -0.9403, 0.0, 0.2100, 0.4759, 0.2833], + [0.0, 0.0, 0.0, 0.1042, 0.0, -0.5954], + [0.1610, 0.0, 0.0, 0.1859, 0.3146, 0.0]]) + class TestNanFunctions_MinMax(TestCase): @@ -241,6 +253,16 @@ class TestNanFunctions_IntTypes(TestCase): for mat in self.integer_arrays(): assert_equal(np.nanprod(mat), tgt) + def test_nancumsum(self): + tgt = np.cumsum(self.mat) + for mat in self.integer_arrays(): + assert_equal(np.nancumsum(mat), tgt) + + def test_nancumprod(self): + tgt = np.cumprod(self.mat) + for mat in self.integer_arrays(): + assert_equal(np.nancumprod(mat), tgt) + def test_nanmean(self): tgt = np.mean(self.mat) for mat in self.integer_arrays(): @@ -388,6 +410,89 @@ class TestNanFunctions_SumProd(TestCase, SharedNanFunctionsTestsMixin): assert_equal(res, tgt) +class TestNanFunctions_CumSumProd(TestCase, SharedNanFunctionsTestsMixin): + + nanfuncs = [np.nancumsum, np.nancumprod] + stdfuncs = [np.cumsum, np.cumprod] + + def test_allnans(self): + for f, tgt_value in zip(self.nanfuncs, [0, 1]): + # Unlike other nan-functions, sum/prod/cumsum/cumprod don't warn on all nan input + with assert_no_warnings(): + res = f([np.nan]*3, axis=None) + tgt = tgt_value*np.ones((3)) + assert_(np.array_equal(res, tgt), 'result is not %s * np.ones((3))' % (tgt_value)) + # Check scalar + res = f(np.nan) + tgt = tgt_value*np.ones((1)) + assert_(np.array_equal(res, tgt), 'result is not %s * np.ones((1))' % (tgt_value)) + # Check there is no warning for not all-nan + f([0]*3, axis=None) + + def test_empty(self): + for f, tgt_value in zip(self.nanfuncs, [0, 1]): + mat = np.zeros((0, 3)) + tgt = tgt_value*np.ones((0, 3)) + res = f(mat, axis=0) + assert_equal(res, tgt) + tgt = mat + res = f(mat, axis=1) + assert_equal(res, tgt) + tgt = np.zeros((0)) + res = f(mat, axis=None) + assert_equal(res, tgt) + + def test_keepdims(self): + for f, g in zip(self.nanfuncs, self.stdfuncs): + mat = np.eye(3) + for axis in [None, 0, 1]: + tgt = f(mat, axis=axis, out=None) + res = g(mat, axis=axis, out=None) + assert_(res.ndim == tgt.ndim) + + for f in self.nanfuncs: + d = np.ones((3, 5, 7, 11)) + # Randomly set some elements to NaN: + rs = np.random.RandomState(0) + d[rs.rand(*d.shape) < 0.5] = np.nan + res = f(d, axis=None) + assert_equal(res.shape, (1155,)) + for axis in np.arange(4): + res = f(d, axis=axis) + assert_equal(res.shape, (3, 5, 7, 11)) + + def test_matrices(self): + # Check that it works and that type and + # shape are preserved + mat = np.matrix(np.eye(3)) + for f in self.nanfuncs: + for axis in np.arange(2): + res = f(mat, axis=axis) + assert_(isinstance(res, np.matrix)) + assert_(res.shape == (3, 3)) + res = f(mat) + assert_(res.shape == (1, 3*3)) + + def test_result_values(self): + for axis in (-2, -1, 0, 1, None): + tgt = np.cumprod(_ndat_ones, axis=axis) + res = np.nancumprod(_ndat, axis=axis) + assert_almost_equal(res, tgt) + tgt = np.cumsum(_ndat_zeros,axis=axis) + res = np.nancumsum(_ndat, axis=axis) + assert_almost_equal(res, tgt) + + def test_out(self): + mat = np.eye(3) + for nf, rf in zip(self.nanfuncs, self.stdfuncs): + resout = np.eye(3) + for axis in (-2, -1, 0, 1): + tgt = rf(mat, axis=axis) + res = nf(mat, axis=axis, out=resout) + assert_almost_equal(res, resout) + assert_almost_equal(res, tgt) + + class TestNanFunctions_MeanVarStd(TestCase, SharedNanFunctionsTestsMixin): nanfuncs = [np.nanmean, np.nanvar, np.nanstd] |