diff options
author | Mark Wiebe <mwwiebe@gmail.com> | 2011-08-17 22:03:06 -0700 |
---|---|---|
committer | Charles Harris <charlesr.harris@gmail.com> | 2011-08-27 07:26:56 -0600 |
commit | 0fa4f22fec4b19e2a8c1d93e5a1f955167c9addd (patch) | |
tree | 83f7b2ae61161fe6505fcea422f0dc131f42b4c7 /numpy | |
parent | bfda229ec93d37b1ee2cdd8b9443ec4e34536bbf (diff) | |
download | numpy-0fa4f22fec4b19e2a8c1d93e5a1f955167c9addd.tar.gz |
ENH: missingdata: Support 'skipna=' parameter in np.mean
Also add 'keepdims=' parameter to reductions, to support writing of
the np.std function.
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/add_newdocs.py | 31 | ||||
-rw-r--r-- | numpy/core/fromnumeric.py | 54 | ||||
-rw-r--r-- | numpy/core/src/multiarray/item_selection.c | 4 | ||||
-rw-r--r-- | numpy/core/src/multiarray/item_selection.h | 2 | ||||
-rw-r--r-- | numpy/core/src/multiarray/multiarraymodule.c | 23 | ||||
-rw-r--r-- | numpy/core/src/multiarray/reduction.c | 53 | ||||
-rw-r--r-- | numpy/core/src/multiarray/reduction.h | 6 | ||||
-rw-r--r-- | numpy/core/src/umath/ufunc_object.c | 20 |
8 files changed, 143 insertions, 50 deletions
diff --git a/numpy/add_newdocs.py b/numpy/add_newdocs.py index 8596b9c9c..b402d2150 100644 --- a/numpy/add_newdocs.py +++ b/numpy/add_newdocs.py @@ -896,10 +896,21 @@ add_newdoc('numpy.core.multiarray', 'count_nonzero', ---------- a : array_like The array for which to count non-zeros. + axis : None or int or tuple of ints, optional + Axis or axes along which a reduction is performed. + The default (`axis` = None) is perform a reduction over all + the dimensions of the input array. + skipna : bool, optional + If this is set to True, any NA elements in the array are skipped + instead of propagating. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the original `arr`. Returns ------- - count : int + count : int or array of int Number of non-zero values in the array. See Also @@ -910,14 +921,18 @@ add_newdoc('numpy.core.multiarray', 'count_nonzero', -------- >>> np.count_nonzero(np.eye(4)) 4 - >>> np.count_nonzero([[0,1,7,0,0],[3,0,0,2,19]]) 5 + >>> np.count_nonzero([[0,1,7,0,0],[3,0,0,2,19]], axis=1) + array([2, 3]) + >>> np.count_nonzero([[0,1,7,0,0],[3,0,0,2,19]], axis=1, keepdims=True) + array([[2], + [3]]) """) add_newdoc('numpy.core.multiarray', 'count_reduce_items', """ - count_reduce_items(arr, axis=None, skipna=False) + count_reduce_items(arr, axis=None, skipna=False, keepdims=False) Counts the number of items a reduction with the same `axis` and `skipna` parameter values would use. The purpose of this @@ -941,6 +956,10 @@ add_newdoc('numpy.core.multiarray', 'count_reduce_items', counted. The only time this function does any actual counting instead of a cheap multiply of a few sizes is when `skipna` is true and `arr` has an NA mask. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the original `arr`. Returns ------- @@ -5330,7 +5349,7 @@ add_newdoc('numpy.core', 'ufunc', ('types', add_newdoc('numpy.core', 'ufunc', ('reduce', """ - reduce(a, axis=0, dtype=None, out=None, skipna=False) + reduce(a, axis=0, dtype=None, out=None, skipna=False, keepdims=False) Reduces `a`'s dimension by one, by applying ufunc along one axis. @@ -5380,6 +5399,10 @@ add_newdoc('numpy.core', 'ufunc', ('reduce', were not counted in the array. The default, False, causes the NA values to propagate, so if any element in a set of elements being reduced is NA, the result will be NA. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the original `arr`. Returns ------- diff --git a/numpy/core/fromnumeric.py b/numpy/core/fromnumeric.py index 2364fbfe8..4682386d7 100644 --- a/numpy/core/fromnumeric.py +++ b/numpy/core/fromnumeric.py @@ -2336,7 +2336,7 @@ def round_(a, decimals=0, out=None): return round(decimals, out) -def mean(a, axis=None, dtype=None, out=None): +def mean(a, axis=None, dtype=None, out=None, skipna=False, keepdims=False): """ Compute the arithmetic mean along the specified axis. @@ -2361,6 +2361,13 @@ def mean(a, axis=None, dtype=None, out=None): is ``None``; if provided, it must have the same shape as the expected output, but the type will be cast if necessary. See `doc.ufuncs` for details. + skipna : bool, optional + If this is set to True, skips any NA values during calculation + instead of propagating them. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the original `arr`. Returns ------- @@ -2407,11 +2414,29 @@ def mean(a, axis=None, dtype=None, out=None): 0.55000000074505806 """ - try: - mean = a.mean - except AttributeError: - return _wrapit(a, 'mean', axis, dtype, out) - return mean(axis, dtype, out) + if not (type(a) is mu.ndarray): + try: + mean = a.mean + return mean(axis=axis, dtype=dtype, out=out) + except AttributeError: + pass + + arr = asarray(a) + + # Upgrade bool, unsigned int, and int to float64 + if dtype is None and arr.dtype.kind in ['b','u','i']: + ret = um.add.reduce(arr, axis=axis, dtype='f8', + out=out, skipna=skipna, keepdims=keepdims) + else: + ret = um.add.reduce(arr, axis=axis, dtype=dtype, + out=out, skipna=skipna, keepdims=keepdims) + rcount = mu.count_reduce_items(arr, axis=axis, + skipna=skipna, keepdims=keepdims) + if type(ret) is mu.ndarray: + um.true_divide(ret, rcount, out=ret, casting='unsafe') + else: + ret = ret / float(rcount) + return ret def std(a, axis=None, dtype=None, out=None, ddof=0): @@ -2458,14 +2483,15 @@ def std(a, axis=None, dtype=None, out=None, ddof=0): The standard deviation is the square root of the average of the squared deviations from the mean, i.e., ``std = sqrt(mean(abs(x - x.mean())**2))``. - The average squared deviation is normally calculated as ``x.sum() / N``, where - ``N = len(x)``. If, however, `ddof` is specified, the divisor ``N - ddof`` - is used instead. In standard statistical practice, ``ddof=1`` provides an - unbiased estimator of the variance of the infinite population. ``ddof=0`` - provides a maximum likelihood estimate of the variance for normally - distributed variables. The standard deviation computed in this function - is the square root of the estimated variance, so even with ``ddof=1``, it - will not be an unbiased estimate of the standard deviation per se. + The average squared deviation is normally calculated as + ``x.sum() / N``, where ``N = len(x)``. If, however, `ddof` is specified, + the divisor ``N - ddof`` is used instead. In standard statistical + practice, ``ddof=1`` provides an unbiased estimator of the variance + of the infinite population. ``ddof=0`` provides a maximum likelihood + estimate of the variance for normally distributed variables. The + standard deviation computed in this function is the square root of + the estimated variance, so even with ``ddof=1``, it will not be an + unbiased estimate of the standard deviation per se. Note that, for complex numbers, `std` takes the absolute value before squaring, so that the result is always real and nonnegative. diff --git a/numpy/core/src/multiarray/item_selection.c b/numpy/core/src/multiarray/item_selection.c index 5257aec8e..ec0b1aa46 100644 --- a/numpy/core/src/multiarray/item_selection.c +++ b/numpy/core/src/multiarray/item_selection.c @@ -1984,7 +1984,7 @@ reduce_count_nonzero_masked_inner_loop(NpyIter *iter, */ NPY_NO_EXPORT PyObject * PyArray_ReduceCountNonzero(PyArrayObject *arr, PyArrayObject *out, - npy_bool *axis_flags, int skipna) + npy_bool *axis_flags, int skipna, int keepdims) { PyArray_NonzeroFunc *nonzero; PyArrayObject *result; @@ -2005,7 +2005,7 @@ PyArray_ReduceCountNonzero(PyArrayObject *arr, PyArrayObject *out, result = PyArray_ReduceWrapper(arr, out, PyArray_DESCR(arr), dtype, - axis_flags, skipna, + axis_flags, skipna, keepdims, &assign_reduce_unit_zero, &reduce_count_nonzero_inner_loop, &reduce_count_nonzero_masked_inner_loop, diff --git a/numpy/core/src/multiarray/item_selection.h b/numpy/core/src/multiarray/item_selection.h index 722aaa5d1..5c1741aaf 100644 --- a/numpy/core/src/multiarray/item_selection.h +++ b/numpy/core/src/multiarray/item_selection.h @@ -35,7 +35,7 @@ PyArray_MultiIndexSetItem(PyArrayObject *self, npy_intp *multi_index, */ NPY_NO_EXPORT PyObject * PyArray_ReduceCountNonzero(PyArrayObject *arr, PyArrayObject *out, - npy_bool *axis_flags, int skipna); + npy_bool *axis_flags, int skipna, int keepdims); diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c index e21369e6f..2afb0d84a 100644 --- a/numpy/core/src/multiarray/multiarraymodule.c +++ b/numpy/core/src/multiarray/multiarraymodule.c @@ -2166,19 +2166,21 @@ array_zeros(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *kwds) static PyObject * array_count_nonzero(PyObject *NPY_UNUSED(self), PyObject *args, PyObject *kwds) { - static char *kwlist[] = {"arr", "axis", "out", "skipna", NULL}; + static char *kwlist[] = {"arr", "axis", "out", "skipna", "keepdims", NULL}; PyObject *array_in, *axis_in = NULL, *out_in = NULL; PyObject *ret = NULL; PyArrayObject *array, *out = NULL; npy_bool axis_flags[NPY_MAXDIMS]; - int skipna = 0; + int skipna = 0, keepdims = 0; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OOi:count_nonzero", kwlist, + if (!PyArg_ParseTupleAndKeywords(args, kwds, + "O|OOii:count_nonzero", kwlist, &array_in, &axis_in, &out_in, - &skipna)) { + &skipna, + &keepdims)) { return NULL; } @@ -2204,7 +2206,7 @@ array_count_nonzero(PyObject *NPY_UNUSED(self), PyObject *args, PyObject *kwds) } } - ret = PyArray_ReduceCountNonzero(array, out, axis_flags, skipna); + ret = PyArray_ReduceCountNonzero(array, out, axis_flags, skipna, keepdims); Py_DECREF(array); @@ -2214,19 +2216,20 @@ array_count_nonzero(PyObject *NPY_UNUSED(self), PyObject *args, PyObject *kwds) static PyObject * array_count_reduce_items(PyObject *NPY_UNUSED(self), PyObject *args, PyObject *kwds) { - static char *kwlist[] = {"arr", "axis", "skipna", NULL}; + static char *kwlist[] = {"arr", "axis", "skipna", "keepdims", NULL}; PyObject *array_in, *axis_in = NULL; PyObject *ret = NULL; PyArrayObject *array; npy_bool axis_flags[NPY_MAXDIMS]; - int skipna = 0; + int skipna = 0, keepdims = 0; if (!PyArg_ParseTupleAndKeywords(args, kwds, - "O|Oi:count_reduce_items", kwlist, + "O|Oii:count_reduce_items", kwlist, &array_in, &axis_in, - &skipna)) { + &skipna, + &keepdims)) { return NULL; } @@ -2242,7 +2245,7 @@ array_count_reduce_items(PyObject *NPY_UNUSED(self), PyObject *args, PyObject *k return NULL; } - ret = PyArray_CountReduceItems(array, axis_flags, skipna); + ret = PyArray_CountReduceItems(array, axis_flags, skipna, keepdims); Py_DECREF(array); diff --git a/numpy/core/src/multiarray/reduction.c b/numpy/core/src/multiarray/reduction.c index 1866e3cba..f305b8e06 100644 --- a/numpy/core/src/multiarray/reduction.c +++ b/numpy/core/src/multiarray/reduction.c @@ -80,7 +80,7 @@ allocate_reduce_result(PyArrayObject *arr, npy_bool *axis_flags, */ static PyArrayObject * conform_reduce_result(int ndim, npy_bool *axis_flags, - PyArrayObject *out, const char *funcname) + PyArrayObject *out, int keepdims, const char *funcname) { npy_intp strides[NPY_MAXDIMS], shape[NPY_MAXDIMS]; npy_intp *strides_out = PyArray_STRIDES(out); @@ -89,6 +89,35 @@ conform_reduce_result(int ndim, npy_bool *axis_flags, PyArray_Descr *dtype; PyArrayObject_fieldaccess *ret; + /* + * If the 'keepdims' parameter is true, do a simpler validation and + * return a new reference to 'out'. + */ + if (keepdims) { + if (PyArray_NDIM(out) != ndim) { + PyErr_Format(PyExc_ValueError, + "output parameter for reduction operation %s " + "has the wrong number of dimensions (must match " + "the operand's when keepdims=True)", funcname); + return NULL; + } + + for (idim = 0; idim < ndim; ++idim) { + if (axis_flags[idim]) { + if (shape_out[idim] != 1) { + PyErr_Format(PyExc_ValueError, + "output parameter for reduction operation %s " + "has a reduction dimension not equal to one " + "(required when keepdims=True)", funcname); + return NULL; + } + } + } + + Py_INCREF(out); + return out; + } + /* Construct the strides and shape */ idim_out = 0; for (idim = 0; idim < ndim; ++idim) { @@ -180,7 +209,7 @@ conform_reduce_result(int ndim, npy_bool *axis_flags, NPY_NO_EXPORT PyArrayObject * PyArray_CreateReduceResult(PyArrayObject *operand, PyArrayObject *out, PyArray_Descr *dtype, npy_bool *axis_flags, - int need_namask, const char *funcname) + int need_namask, int keepdims, const char *funcname) { PyArrayObject *result; @@ -209,7 +238,7 @@ PyArray_CreateReduceResult(PyArrayObject *operand, PyArrayObject *out, } result = conform_reduce_result(PyArray_NDIM(operand), axis_flags, - out, funcname); + out, keepdims, funcname); } return result; @@ -413,6 +442,8 @@ PyArray_InitializeReduceResult( * result_dtype : The dtype the inner loop expects for the result. * axis_flags : Flags indicating the reduction axes of 'operand'. * skipna : If true, NAs are skipped instead of propagating. + * keepdims : If true, leaves the reduction dimensions in the result + * with size one. * assign_unit : If NULL, PyArray_InitializeReduceResult is used, otherwise * this function is called to initialize the result to * the reduction's unit. @@ -426,7 +457,7 @@ NPY_NO_EXPORT PyArrayObject * PyArray_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out, PyArray_Descr *operand_dtype, PyArray_Descr *result_dtype, - npy_bool *axis_flags, int skipna, + npy_bool *axis_flags, int skipna, int keepdims, PyArray_AssignReduceUnitFunc *assign_unit, PyArray_ReduceInnerLoopFunc *inner_loop, PyArray_ReduceInnerLoopFunc *masked_inner_loop, @@ -466,7 +497,7 @@ PyArray_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out, Py_INCREF(result_dtype); result = PyArray_CreateReduceResult(operand, out, result_dtype, axis_flags, !skipna && use_maskna, - funcname); + keepdims, funcname); if (result == NULL) { goto fail; } @@ -617,7 +648,9 @@ PyArray_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out, finish: /* Strip out the extra 'one' dimensions in the result */ if (out == NULL) { - PyArray_RemoveAxesInPlace(result, axis_flags); + if (!keepdims) { + PyArray_RemoveAxesInPlace(result, axis_flags); + } } else { Py_DECREF(result); @@ -651,7 +684,7 @@ fail: */ NPY_NO_EXPORT PyObject * PyArray_CountReduceItems(PyArrayObject *operand, - npy_bool *axis_flags, int skipna) + npy_bool *axis_flags, int skipna, int keepdims) { int idim, ndim = PyArray_NDIM(operand); @@ -715,7 +748,7 @@ PyArray_CountReduceItems(PyArrayObject *operand, } result = PyArray_CreateReduceResult(operand, NULL, result_dtype, axis_flags, 0, - "count_reduce_items"); + keepdims, "count_reduce_items"); if (result == NULL) { return NULL; } @@ -767,7 +800,9 @@ PyArray_CountReduceItems(PyArrayObject *operand, result_data, result_strides_it); /* Remove the reduction axes and return the result */ - PyArray_RemoveAxesInPlace(result, axis_flags); + if (!keepdims) { + PyArray_RemoveAxesInPlace(result, axis_flags); + } return PyArray_Return(result); } } diff --git a/numpy/core/src/multiarray/reduction.h b/numpy/core/src/multiarray/reduction.h index 849db256b..e2a9806c1 100644 --- a/numpy/core/src/multiarray/reduction.h +++ b/numpy/core/src/multiarray/reduction.h @@ -94,6 +94,8 @@ typedef void (PyArray_ReduceInnerLoopFunc)(NpyIter *iter, * result_dtype : The dtype the inner loop expects for the result. * axis_flags : Flags indicating the reduction axes of 'operand'. * skipna : If true, NAs are skipped instead of propagating. + * keepdims : If true, leaves the reduction dimensions in the result + * with size one. * assign_unit : If NULL, PyArray_InitializeReduceResult is used, otherwise * this function is called to initialize the result to * the reduction's unit. @@ -107,7 +109,7 @@ NPY_NO_EXPORT PyArrayObject * PyArray_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out, PyArray_Descr *operand_dtype, PyArray_Descr *result_dtype, - npy_bool *axis_flags, int skipna, + npy_bool *axis_flags, int skipna, int keepdims, PyArray_AssignReduceUnitFunc *assign_unit, PyArray_ReduceInnerLoopFunc *inner_loop, PyArray_ReduceInnerLoopFunc *masked_inner_loop, @@ -127,7 +129,7 @@ PyArray_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out, */ NPY_NO_EXPORT PyObject * PyArray_CountReduceItems(PyArrayObject *operand, - npy_bool *axis_flags, int skipna); + npy_bool *axis_flags, int skipna, int keepdims); #endif diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index d845140ad..b80d3b5af 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -2610,7 +2610,7 @@ initialize_reduce_result(int identity, PyArrayObject *result, */ static PyArrayObject * PyUFunc_Reduce(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out, - int naxes, int *axes, int otype, int skipna) + int naxes, int *axes, int otype, int skipna, int keepdims) { int iaxes, ndim, retcode; PyArray_Descr *otype_dtype = NULL; @@ -2723,7 +2723,7 @@ PyUFunc_Reduce(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out, Py_XINCREF(otype_dtype); result = PyArray_CreateReduceResult(arr, out, otype_dtype, axis_flags, !skipna && use_maskna, - ufunc_name); + keepdims, ufunc_name); if (result == NULL) { return NULL; } @@ -2911,7 +2911,9 @@ finish: /* Strip out the extra 'one' dimensions in the result */ if (out == NULL) { - PyArray_RemoveAxesInPlace(result, axis_flags); + if (!keepdims) { + PyArray_RemoveAxesInPlace(result, axis_flags); + } } else { Py_DECREF(result); @@ -3708,8 +3710,9 @@ PyUFunc_GenericReduction(PyUFuncObject *self, PyObject *args, PyArrayObject *indices = NULL; PyArray_Descr *otype = NULL; PyArrayObject *out = NULL; - int skipna = 0; - static char *kwlist1[] = {"array", "axis", "dtype", "out", "skipna", NULL}; + int skipna = 0, keepdims = 0; + static char *kwlist1[] = {"array", "axis", "dtype", + "out", "skipna", "keepdims", NULL}; static char *kwlist2[] = {"array", "indices", "axis", "dtype", "out", "skipna", NULL}; static char *_reduce_type[] = {"reduce", "accumulate", "reduceat", NULL}; @@ -3758,12 +3761,13 @@ PyUFunc_GenericReduction(PyUFuncObject *self, PyObject *args, } } else { - if(!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO&O&i", kwlist1, + if(!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO&O&ii", kwlist1, &op, &axes_in, PyArray_DescrConverter2, &otype, PyArray_OutputConverter, &out, - &skipna)) { + &skipna, + &keepdims)) { Py_XDECREF(otype); return NULL; } @@ -3930,7 +3934,7 @@ PyUFunc_GenericReduction(PyUFuncObject *self, PyObject *args, switch(operation) { case UFUNC_REDUCE: ret = PyUFunc_Reduce(self, mp, out, naxes, axes, - otype->type_num, skipna); + otype->type_num, skipna, keepdims); break; case UFUNC_ACCUMULATE: if (naxes != 1) { |