summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Wiebe <mwwiebe@gmail.com>2011-08-17 14:05:38 -0700
committerCharles Harris <charlesr.harris@gmail.com>2011-08-27 07:26:55 -0600
commit6bfd819a0897caf6e6db244930c40ed0d17b9e62 (patch)
tree2fb8316f2aef0905ac687e15bd39c18b125e7479
parenta1faa1b6883c47333508a0476c1304b0a8a3f64e (diff)
downloadnumpy-6bfd819a0897caf6e6db244930c40ed0d17b9e62.tar.gz
ENH: missingdata: Towards making count_nonzero a full-featured reduction operation
-rw-r--r--numpy/core/src/multiarray/conversion_utils.c86
-rw-r--r--numpy/core/src/multiarray/conversion_utils.h11
-rw-r--r--numpy/core/src/multiarray/item_selection.c68
-rw-r--r--numpy/core/src/multiarray/na_mask.c71
-rw-r--r--numpy/core/src/umath/ufunc_object.c53
5 files changed, 248 insertions, 41 deletions
diff --git a/numpy/core/src/multiarray/conversion_utils.c b/numpy/core/src/multiarray/conversion_utils.c
index 461bd565f..5e59bf306 100644
--- a/numpy/core/src/multiarray/conversion_utils.c
+++ b/numpy/core/src/multiarray/conversion_utils.c
@@ -177,6 +177,8 @@ PyArray_BufferConverter(PyObject *obj, PyArray_Chunk *buf)
/*NUMPY_API
* Get axis from an object (possibly None) -- a converter function,
+ *
+ * See also PyArray_ConvertMultiAxis, which also handles a tuple of axes.
*/
NPY_NO_EXPORT int
PyArray_AxisConverter(PyObject *obj, int *axis)
@@ -193,6 +195,90 @@ PyArray_AxisConverter(PyObject *obj, int *axis)
return PY_SUCCEED;
}
+/*
+ * Converts an axis parameter into an ndim-length C-array of
+ * boolean flags, True for each axis specified.
+ *
+ * If obj is None, everything is set to True. If obj is a tuple,
+ * each axis within the tuple is set to True. If obj is an integer,
+ * just that axis is set to True.
+ */
+NPY_NO_EXPORT int
+PyArray_ConvertMultiAxis(PyObject *axis_in, int ndim, npy_bool *out_axis_flags)
+{
+ /* None means all of the axes */
+ if (axis_in == Py_None || axis_in == NULL) {
+ memset(out_axis_flags, 1, ndim);
+ return NPY_SUCCEED;
+ }
+ /* A tuple of which axes */
+ else if (PyTuple_Check(axis_in)) {
+ int i, naxes;
+
+ memset(out_axis_flags, 0, ndim);
+
+ naxes = PyTuple_Size(axis_in);
+ if (naxes < 0) {
+ return NPY_FAIL;
+ }
+ for (i = 0; i < naxes; ++i) {
+ PyObject *tmp = PyTuple_GET_ITEM(axis_in, i);
+ long axis = PyInt_AsLong(tmp);
+ if (axis == -1 && PyErr_Occurred()) {
+ return NPY_FAIL;
+ }
+ if (axis < 0) {
+ axis += ndim;
+ }
+ if (axis < 0 || axis >= ndim) {
+ PyErr_SetString(PyExc_ValueError,
+ "'axis' entry is out of bounds");
+ return NPY_FAIL;
+ }
+ if (out_axis_flags[axis]) {
+ PyErr_SetString(PyExc_ValueError,
+ "duplicate value in 'axis'");
+ return NPY_FAIL;
+ }
+ out_axis_flags[axis] = 1;
+ }
+
+ return NPY_SUCCEED;
+ }
+ /* Try to interpret axis as an integer */
+ else {
+ long axis;
+
+ memset(out_axis_flags, 0, ndim);
+
+ axis = PyInt_AsLong(axis_in);
+ /* TODO: PyNumber_Index would be good to use here */
+ if (axis == -1 && PyErr_Occurred()) {
+ return NPY_FAIL;
+ }
+ if (axis < 0) {
+ axis += ndim;
+ }
+ /*
+ * Special case letting axis=0 slip through for scalars,
+ * for backwards compatibility reasons.
+ */
+ if (axis == 0 && ndim == 0) {
+ return NPY_SUCCEED;
+ }
+
+ if (axis < 0 || axis >= ndim) {
+ PyErr_SetString(PyExc_ValueError,
+ "'axis' entry is out of bounds");
+ return NPY_FAIL;
+ }
+
+ out_axis_flags[axis] = 1;
+
+ return NPY_SUCCEED;
+ }
+}
+
/*NUMPY_API
* Convert an object to true / false
*/
diff --git a/numpy/core/src/multiarray/conversion_utils.h b/numpy/core/src/multiarray/conversion_utils.h
index 64b26b23e..3ebd6ebf0 100644
--- a/numpy/core/src/multiarray/conversion_utils.h
+++ b/numpy/core/src/multiarray/conversion_utils.h
@@ -40,4 +40,15 @@ PyArray_TypestrConvert(int itemsize, int gentype);
NPY_NO_EXPORT PyObject *
PyArray_IntTupleFromIntp(int len, intp *vals);
+/*
+ * Converts an axis parameter into an ndim-length C-array of
+ * boolean flags, True for each axis specified.
+ *
+ * If obj is None, everything is set to True. If obj is a tuple,
+ * each axis within the tuple is set to True. If obj is an integer,
+ * just that axis is set to True.
+ */
+NPY_NO_EXPORT int
+PyArray_ConvertMultiAxis(PyObject *axis_in, int ndim, npy_bool *out_axis_flags);
+
#endif
diff --git a/numpy/core/src/multiarray/item_selection.c b/numpy/core/src/multiarray/item_selection.c
index ff217be62..28f243600 100644
--- a/numpy/core/src/multiarray/item_selection.c
+++ b/numpy/core/src/multiarray/item_selection.c
@@ -1891,6 +1891,74 @@ count_boolean_trues(int ndim, char *data, npy_intp *ashape, npy_intp *astrides)
return count;
}
+/*
+ * A full reduction version of PyArray_CountNonzero, supporting
+ * an 'out' parameter and doing the count as a reduction along
+ * selected axes. It also supports a 'skipna' parameter, which
+ * skips over any NA masked values in arr.
+ */
+NPY_NO_EXPORT PyObject *
+PyArray_ReduceCountNonzero(PyArrayObject *arr, PyArrayObject *out,
+ npy_bool *axis_flags, int skipna)
+{
+ PyArray_NonzeroFunc *nonzero;
+ int ndim, use_maskna;
+ PyArray_Descr *dtype;
+ PyArrayObject *result = NULL;
+
+ nonzero = PyArray_DESCR(arr)->f->nonzero;
+ if (nonzero == NULL) {
+ PyErr_SetString(PyExc_TypeError,
+ "Cannot count the number of non-zeros for a dtype "
+ "which doesn't have a 'nonzero' function");
+ return NULL;
+ }
+
+ ndim = PyArray_NDIM(arr);
+ use_maskna = PyArray_HASMASKNA(arr);
+
+ /*
+ * If 'arr' has an NA mask, but 'out' doesn't, validate that 'arr'
+ * contains no NA values so we can ignore the mask entirely.
+ */
+ if (use_maskna && !skipna && out != NULL && !PyArray_HASMASKNA(out)) {
+ if (PyArray_ContainsNA(arr)) {
+ PyErr_SetString(PyExc_ValueError,
+ "Cannot assign NA value to an array which "
+ "does not support NAs");
+ return NULL;
+ }
+ else {
+ use_maskna = 0;
+ }
+ }
+
+ /* This reference gets stolen by PyArray_CreateReduceResult */
+ dtype = PyArray_DescrFromType(NPY_INTP);
+ if (dtype == NULL) {
+ return NULL;
+ }
+ /* This either conforms 'out' to the ndim of 'arr', or allocates
+ * a new array appropriate for this reduction.
+ */
+ result = PyArray_CreateReduceResult(arr, out,
+ dtype, axis_flags, !skipna && use_maskna,
+ "count_nonzero");
+ if (result == NULL) {
+ return NULL;
+ }
+
+ if (use_maskna) {
+ /*
+ * Do the reduction on the NA mask before the data. This way
+ * we can avoid modifying the outputs which end up masked, obeying
+ * the required NA masking semantics.
+ */
+ if (!skipna) {
+ }
+ }
+}
+
/*NUMPY_API
* Counts the number of non-zero elements in the array. Raises
* an error if the array contains an NA.
diff --git a/numpy/core/src/multiarray/na_mask.c b/numpy/core/src/multiarray/na_mask.c
index 0cb05beab..07aedfe9e 100644
--- a/numpy/core/src/multiarray/na_mask.c
+++ b/numpy/core/src/multiarray/na_mask.c
@@ -496,8 +496,7 @@ PyArray_IsNA(PyObject *obj)
}
}
-/*NUMPY_API
- *
+/*
* This function performs a reduction on the masks for an array.
* The masks are provided in raw form, with their strides conformed
* for the reduction.
@@ -511,8 +510,8 @@ PyArray_IsNA(PyObject *obj)
*
* Returns 0 on success, -1 on failure.
*/
-NPY_NO_EXPORT int
-PyArray_ReduceMaskNAArray(int ndim, npy_intp *shape,
+static int
+raw_reduce_maskna_array(int ndim, npy_intp *shape,
PyArray_Descr *src_dtype, char *src_data, npy_intp *src_strides,
PyArray_Descr *dst_dtype, char *dst_data, npy_intp *dst_strides)
{
@@ -600,6 +599,70 @@ PyArray_ReduceMaskNAArray(int ndim, npy_intp *shape,
return 0;
}
+/*NUMPY_API
+ *
+ * This function performs a reduction on the masks for an array.
+ *
+ * This is for use with a reduction where 'skipna=False'.
+ *
+ * result: The result array, which should have the same 'ndim' as
+ * 'operand' but with dimensions of size one for every reduction
+ * axis. This array must have an NA mask.
+ * operand: The operand for which the reduction is being done. This array
+ * must have an NA mask.
+ *
+ * Returns 0 on success, -1 on failure.
+ */
+NPY_NO_EXPORT int
+PyArray_ReduceMaskNAArray(PyArrayObject *result, PyArrayObject *operand)
+{
+ int idim, ndim;
+ npy_intp result_strides[NPY_MAXDIMS];
+ npy_intp *result_shape, *operand_shape;
+ npy_intp *result_maskna_strides;
+
+ ndim = PyArray_NDIM(operand);
+ if (ndim != PyArray_NDIM(result)) {
+ PyErr_SetString(PyExc_ValueError,
+ "result and operand must have the same 'ndim' in "
+ "ReduceMaskNAArray");
+ return -1;
+ }
+ if (!PyArray_HASMASKNA(result) || !PyArray_HASMASKNA(operand)) {
+ PyErr_SetString(PyExc_ValueError,
+ "both result and operand must have NA masks in "
+ "ReduceMaskNAArray");
+ return -1;
+ }
+
+ /* Need to make sure the appropriate strides are 0 in 'result' */
+ result_shape = PyArray_SHAPE(result);
+ operand_shape = PyArray_SHAPE(operand);
+ result_maskna_strides = PyArray_MASKNA_STRIDES(result);
+ for (idim = 0; idim < ndim; ++idim) {
+ if (result_shape[idim] == 1) {
+ result_strides[idim] = 0;
+ }
+ else if (result_shape[idim] != operand_shape[idim]) {
+ PyErr_SetString(PyExc_ValueError,
+ "the result shape must match the operand shape wherever "
+ "it is not 1 in ReduceMaskNAArray");
+ return -1;
+ }
+ else {
+ result_strides[idim] = result_maskna_strides[idim];
+ }
+ }
+
+ return raw_reduce_maskna_array(ndim, PyArray_DIMS(operand),
+ PyArray_MASKNA_DTYPE(operand),
+ PyArray_MASKNA_DATA(operand),
+ PyArray_MASKNA_STRIDES(operand),
+ PyArray_MASKNA_DTYPE(result),
+ PyArray_MASKNA_DATA(result),
+ result_strides);
+}
+
static void
_strided_bool_mask_inversion(char *dst, npy_intp dst_stride,
char *src, npy_intp src_stride,
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index a4f9bb105..c64563950 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -2505,6 +2505,7 @@ get_masked_binary_op_function(PyUFuncObject *self, PyArrayObject *arr,
PyArrayObject *op[3] = {arr, arr, NULL};
PyArray_Descr *dtype[3] = {NULL, NULL, NULL};
PyObject *type_tup = NULL;
+ char *ufunc_name = self->name ? self->name : "(unknown)";
NPY_UF_DBG_PRINT1("Getting masked binary op function for type number %d\n",
*otype);
@@ -2546,9 +2547,9 @@ get_masked_binary_op_function(PyUFuncObject *self, PyArrayObject *arr,
for (i = 0; i < 3; ++i) {
Py_DECREF(dtype[i]);
}
- PyErr_SetString(PyExc_RuntimeError,
+ PyErr_Format(PyExc_RuntimeError,
"could not find a masked binary loop appropriate for "
- "reduce ufunc");
+ "reduce ufunc %s", ufunc_name);
return -1;
}
@@ -2607,7 +2608,7 @@ initialize_reduce_result(int identity, PyArrayObject *result,
* The axes must already be bounds-checked by the calling function,
* this function does not validate them.
*/
-static PyObject *
+static PyArrayObject *
PyUFunc_Reduce(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out,
int naxes, int *axes, int otype, int skipna)
{
@@ -2659,17 +2660,15 @@ PyUFunc_Reduce(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out,
use_maskna = PyArray_HASMASKNA(arr);
/* Detect whether to ignore the MASKNA */
- if (use_maskna) {
- if (!skipna && out != NULL && !PyArray_HASMASKNA(out)) {
- if (PyArray_ContainsNA(arr)) {
- PyErr_SetString(PyExc_ValueError,
- "Cannot assign NA value to an array which "
- "does not support NAs");
- return NULL;
- }
- else {
- use_maskna = 0;
- }
+ if (use_maskna && !skipna && out != NULL && !PyArray_HASMASKNA(out)) {
+ if (PyArray_ContainsNA(arr)) {
+ PyErr_SetString(PyExc_ValueError,
+ "Cannot assign NA value to an array which "
+ "does not support NAs");
+ return NULL;
+ }
+ else {
+ use_maskna = 0;
}
}
@@ -2737,27 +2736,7 @@ PyUFunc_Reduce(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out,
* the required NA masking semantics.
*/
if (!skipna) {
- int idim;
- npy_intp result_strides[NPY_MAXDIMS];
- /* Need to make sure the appropriate strides are 0 in 'result' */
- for (idim = 0; idim < PyArray_NDIM(arr); ++idim) {
- if (PyArray_DIMS(result)[idim] == 1) {
- result_strides[idim] = 0;
- }
- else {
- result_strides[idim] = PyArray_MASKNA_STRIDES(result)[idim];
- }
- }
- if (!PyArray_HASMASKNA(arr) || !PyArray_HASMASKNA(result))
- printf ("hasmaskna %d %d\n", PyArray_HASMASKNA(arr),
- PyArray_HASMASKNA(result));
- if (PyArray_ReduceMaskNAArray(ndim, PyArray_DIMS(arr),
- PyArray_MASKNA_DTYPE(arr),
- PyArray_MASKNA_DATA(arr),
- PyArray_MASKNA_STRIDES(arr),
- PyArray_MASKNA_DTYPE(result),
- PyArray_MASKNA_DATA(result),
- result_strides) < 0) {
+ if (PyArray_ReduceMaskNAArray(result, arr) < 0) {
goto fail;
}
@@ -2948,7 +2927,7 @@ finish:
Py_XDECREF(arr_view);
Py_XDECREF(otype_dtype);
NPY_AUXDATA_FREE(maskedinnerloopdata);
- return (PyObject *)result;
+ return result;
fail:
if (iter != NULL) {
@@ -3952,7 +3931,7 @@ PyUFunc_GenericReduction(PyUFuncObject *self, PyObject *args,
switch(operation) {
case UFUNC_REDUCE:
- ret = (PyArrayObject *)PyUFunc_Reduce(self, mp, out, naxes, axes,
+ ret = PyUFunc_Reduce(self, mp, out, naxes, axes,
otype->type_num, skipna);
break;
case UFUNC_ACCUMULATE: