summaryrefslogtreecommitdiff
path: root/numpy
diff options
context:
space:
mode:
authorMark Wiebe <mwiebe@enthought.com>2011-08-01 11:50:56 -0500
committerCharles Harris <charlesr.harris@gmail.com>2011-08-27 07:26:49 -0600
commit9aa599c90f49cd3b435b0ce9a637fee5fa94321a (patch)
treed27ec7d43c9a4213b402d2da4a4dd74a9ea40ab9 /numpy
parent3441a3e7b0314488a697a839357e453f21de0577 (diff)
downloadnumpy-9aa599c90f49cd3b435b0ce9a637fee5fa94321a.tar.gz
ENH: missingdata: Write function for reducing the NA mask array
Diffstat (limited to 'numpy')
-rw-r--r--numpy/core/code_generators/numpy_api.py1
-rw-r--r--numpy/core/src/multiarray/dtype_transfer.c26
-rw-r--r--numpy/core/src/multiarray/item_selection.c42
-rw-r--r--numpy/core/src/multiarray/na_mask.c182
-rw-r--r--numpy/core/src/private/lowlevel_strided_loops.h5
5 files changed, 173 insertions, 83 deletions
diff --git a/numpy/core/code_generators/numpy_api.py b/numpy/core/code_generators/numpy_api.py
index 49d38c32f..7b1fa26c3 100644
--- a/numpy/core/code_generators/numpy_api.py
+++ b/numpy/core/code_generators/numpy_api.py
@@ -326,6 +326,7 @@ multiarray_funcs_api = {
'PyArray_AllocateMaskNA': 286,
'NpyIter_GetFirstMaskNAOp': 288,
'NpyIter_GetMaskNAIndices': 289,
+ 'PyArray_ReduceMaskArray': 290,
}
ufunc_types_api = {
diff --git a/numpy/core/src/multiarray/dtype_transfer.c b/numpy/core/src/multiarray/dtype_transfer.c
index aaea290b1..23c1f4e76 100644
--- a/numpy/core/src/multiarray/dtype_transfer.c
+++ b/numpy/core/src/multiarray/dtype_transfer.c
@@ -4115,10 +4115,10 @@ PyArray_CastRawNDimArrays(int ndim, npy_intp *shape,
int idim;
npy_intp src_align, dst_align;
int aligned, needs_api = 0;
- npy_intp coords[NPY_MAXDIMS];
- npy_intp shape_copy[NPY_MAXDIMS];
- npy_intp src_strides_copy[NPY_MAXDIMS];
- npy_intp dst_strides_copy[NPY_MAXDIMS];
+ npy_intp coord[NPY_MAXDIMS];
+ npy_intp shape_it[NPY_MAXDIMS];
+ npy_intp src_strides_it[NPY_MAXDIMS];
+ npy_intp dst_strides_it[NPY_MAXDIMS];
/* Determine data alignment */
src_align = (npy_intp)src;
@@ -4135,9 +4135,9 @@ PyArray_CastRawNDimArrays(int ndim, npy_intp *shape,
if (PyArray_PrepareTwoRawArrayIter(ndim, shape,
dst, dst_strides,
src, src_strides,
- &ndim, shape_copy,
- &dst, dst_strides_copy,
- &src, src_strides_copy) < 0) {
+ &ndim, shape_it,
+ &dst, dst_strides_it,
+ &src, src_strides_it) < 0) {
return NPY_FAIL;
}
@@ -4151,13 +4151,13 @@ PyArray_CastRawNDimArrays(int ndim, npy_intp *shape,
return NPY_FAIL;
}
- NPY_RAW_ITER_START(idim, ndim, coords, shape_copy) {
- stransfer(dst, dst_strides_copy[0],
- src, src_strides_copy[0], shape_copy[0],
+ NPY_RAW_ITER_START(idim, ndim, coord, shape_it) {
+ stransfer(dst, dst_strides_it[0],
+ src, src_strides_it[0], shape_it[0],
src_dtype->elsize, transferdata);
- } NPY_RAW_ITER_TWO_NEXT(idim, ndim, coords, shape_copy,
- src, src_strides,
- dst, dst_strides);
+ } NPY_RAW_ITER_TWO_NEXT(idim, ndim, coord, shape_it,
+ src, src_strides_it,
+ dst, dst_strides_it);
/* Cleanup */
NPY_AUXDATA_FREE(transferdata);
diff --git a/numpy/core/src/multiarray/item_selection.c b/numpy/core/src/multiarray/item_selection.c
index 08098d0b1..5be85aaef 100644
--- a/numpy/core/src/multiarray/item_selection.c
+++ b/numpy/core/src/multiarray/item_selection.c
@@ -19,8 +19,6 @@
#include "lowlevel_strided_loops.h"
#include "item_selection.h"
-#define _check_axis PyArray_CheckAxis
-
/*NUMPY_API
* Take
*/
@@ -37,7 +35,7 @@ PyArray_TakeFrom(PyArrayObject *self0, PyObject *indices0, int axis,
int err;
indices = NULL;
- self = (PyArrayObject *)_check_axis(self0, &axis, NPY_ARRAY_CARRAY);
+ self = (PyArrayObject *)PyArray_CheckAxis(self0, &axis, NPY_ARRAY_CARRAY);
if (self == NULL) {
return NULL;
}
@@ -507,7 +505,7 @@ PyArray_Repeat(PyArrayObject *aop, PyObject *op, int axis)
nd = PyArray_NDIM(repeats);
counts = (npy_intp *)PyArray_DATA(repeats);
- if ((ap=_check_axis(aop, &axis, NPY_ARRAY_CARRAY))==NULL) {
+ if ((ap=PyArray_CheckAxis(aop, &axis, NPY_ARRAY_CARRAY))==NULL) {
Py_DECREF(repeats);
return NULL;
}
@@ -1080,7 +1078,7 @@ PyArray_ArgSort(PyArrayObject *op, int axis, NPY_SORTKIND which)
}
/* Creates new reference op2 */
- if ((op2=(PyArrayObject *)_check_axis(op, &axis, 0)) == NULL) {
+ if ((op2=(PyArrayObject *)PyArray_CheckAxis(op, &axis, 0)) == NULL) {
return NULL;
}
/* Determine if we should use new algorithm or not */
@@ -1719,51 +1717,25 @@ count_boolean_trues(int ndim, char *data, npy_intp *ashape, npy_intp *astrides)
return 0;
}
- /* Do the iteration */
- memset(coord, 0, ndim * sizeof(npy_intp));
/* Special case for contiguous inner loop */
if (strides[0] == 1) {
- do {
+ NPY_RAW_ITER_START(idim, ndim, coord, shape) {
char *d = data;
/* Process the innermost dimension */
for (i = 0; i < shape[0]; ++i, ++d) {
count += (*d != 0);
}
-
- /* Increment to the next n-dimensional coordinate */
- for (idim = 1; idim < ndim; ++idim) {
- if (++coord[idim] == shape[idim]) {
- coord[idim] = 0;
- data -= (shape[idim] - 1) * strides[idim];
- }
- else {
- data += strides[idim];
- break;
- }
- }
- } while (i < ndim);
+ } NPY_RAW_ITER_ONE_NEXT(idim, ndim, coord, shape, data, strides);
}
/* General inner loop */
else {
- do {
+ NPY_RAW_ITER_START(idim, ndim, coord, shape) {
char *d = data;
/* Process the innermost dimension */
for (i = 0; i < shape[0]; ++i, d += strides[0]) {
count += (*d != 0);
}
-
- /* Increment to the next n-dimensional coordinate */
- for (idim = 1; idim < ndim; ++idim) {
- if (++coord[idim] == shape[idim]) {
- coord[idim] = 0;
- data -= (shape[idim] - 1) * strides[idim];
- }
- else {
- data += strides[idim];
- break;
- }
- }
- } while (i < ndim);
+ } NPY_RAW_ITER_ONE_NEXT(idim, ndim, coord, shape, data, strides);
}
return count;
diff --git a/numpy/core/src/multiarray/na_mask.c b/numpy/core/src/multiarray/na_mask.c
index ecb54bc7e..6406dd82e 100644
--- a/numpy/core/src/multiarray/na_mask.c
+++ b/numpy/core/src/multiarray/na_mask.c
@@ -80,6 +80,50 @@ PyArray_ContainsNA(PyArrayObject *arr)
}
/*
+ * Fills a raw array whose dtype has size one with the specified byte
+ *
+ * Returns 0 on success, -1 on failure.
+ */
+static int
+fill_raw_byte_array(int ndim, npy_intp *shape,
+ char *data, npy_intp *strides, char fillvalue)
+{
+ int idim;
+ npy_intp shape_it[NPY_MAXDIMS], strides_it[NPY_MAXDIMS];
+ npy_intp i, coord[NPY_MAXDIMS];
+
+ /* Use raw iteration with no heap memory allocation */
+ if (PyArray_PrepareOneRawArrayIter(
+ ndim, shape,
+ data, strides,
+ &ndim, shape_it,
+ &data, strides_it) < 0) {
+ PyErr_Clear();
+ return 1;
+ }
+
+ /* Special case contiguous inner stride */
+ if (strides_it[0] == 1) {
+ NPY_RAW_ITER_START(idim, ndim, coord, shape_it) {
+ /* Process the innermost dimension */
+ memset(data, fillvalue, shape_it[0]);
+ } NPY_RAW_ITER_ONE_NEXT(idim, ndim, coord, shape_it, data, strides_it);
+ }
+ /* General inner stride */
+ else {
+ NPY_RAW_ITER_START(idim, ndim, coord, shape_it) {
+ char *d = data;
+ /* Process the innermost dimension */
+ for (i = 0; i < shape_it[0]; ++i, d += strides_it[0]) {
+ *d = fillvalue;
+ }
+ } NPY_RAW_ITER_ONE_NEXT(idim, ndim, coord, shape_it, data, strides_it);
+ }
+
+ return 0;
+}
+
+/*
* Assigns the mask value to all the NA mask elements of
* the array.
*
@@ -88,11 +132,6 @@ PyArray_ContainsNA(PyArrayObject *arr)
NPY_NO_EXPORT int
PyArray_AssignMaskNA(PyArrayObject *arr, npy_mask maskvalue)
{
- int idim, ndim;
- char *data;
- npy_intp shape[NPY_MAXDIMS], strides[NPY_MAXDIMS];
- npy_intp i, coord[NPY_MAXDIMS];
-
/* Need NA support to fill the NA mask */
if (!PyArray_HASMASKNA(arr)) {
PyErr_SetString(PyExc_ValueError,
@@ -107,35 +146,10 @@ PyArray_AssignMaskNA(PyArrayObject *arr, npy_mask maskvalue)
return -1;
}
- /* Use raw iteration with no heap memory allocation */
- if (PyArray_PrepareOneRawArrayIter(
+ return fill_raw_byte_array(
PyArray_NDIM(arr), PyArray_DIMS(arr),
PyArray_MASKNA_DATA(arr), PyArray_MASKNA_STRIDES(arr),
- &ndim, shape,
- &data, strides) < 0) {
- PyErr_Clear();
- return 1;
- }
-
- /* Special case contiguous inner stride */
- if (strides[0] == 1) {
- NPY_RAW_ITER_START(idim, ndim, coord, shape) {
- /* Process the innermost dimension */
- memset(data, maskvalue, shape[0]);
- } NPY_RAW_ITER_ONE_NEXT(idim, ndim, coord, shape, data, strides);
- }
- /* General inner stride */
- else {
- NPY_RAW_ITER_START(idim, ndim, coord, shape) {
- char *d = data;
- /* Process the innermost dimension */
- for (i = 0; i < shape[0]; ++i, d += strides[0]) {
- *(npy_mask *)d = maskvalue;
- }
- } NPY_RAW_ITER_ONE_NEXT(idim, ndim, coord, shape, data, strides);
- }
-
- return 0;
+ (char)maskvalue);
}
/*NUMPY_API
@@ -432,3 +446,107 @@ PyArray_IsNA(PyObject *obj)
return (PyObject *)ret;
}
}
+
+/*NUMPY_API
+ *
+ * This function performs a reduction on the masks for an array.
+ * The masks are provided in raw form, with their strides conformed
+ * for the reduction.
+ *
+ * This is for use with a reduction where 'skipna=False'.
+ *
+ * ndim, shape: The geometry of the arrays
+ * src_dtype, dst_dtype: The NA mask dtypes.
+ * src_data, dst_data: The NA mask data pointers.
+ * src_strides, dst_strides: The NA mask strides, matching the geometry.
+ *
+ * Returns 0 on success, -1 on failure.
+ */
+NPY_NO_EXPORT int
+PyArray_ReduceMaskArray(int ndim, npy_intp *shape,
+ PyArray_Descr *src_dtype, char *src_data, npy_intp *src_strides,
+ PyArray_Descr *dst_dtype, char *dst_data, npy_intp *dst_strides)
+{
+ int idim;
+ npy_intp i, coord[NPY_MAXDIMS];
+ npy_intp shape_it[NPY_MAXDIMS];
+ npy_intp src_strides_it[NPY_MAXDIMS];
+ npy_intp dst_strides_it[NPY_MAXDIMS];
+
+ /* Confirm that dst is not larger than src */
+ for (idim = 0; idim < ndim; ++idim) {
+ if (src_strides[idim] == 0 && dst_strides[idim] != 0) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "ReduceMaskArray cannot reduce into a larger array");
+ return -1;
+ }
+ }
+
+ if (src_dtype->type_num != NPY_BOOL || dst_dtype->type_num != NPY_BOOL) {
+ PyErr_SetString(PyExc_ValueError,
+ "multi-NA and field-NA are not yet supported");
+ return -1;
+ }
+
+ /* Initialize the destination mask to all ones, exposed data */
+ if (fill_raw_byte_array(ndim, shape, dst_data, dst_strides, 1) < 0) {
+ return -1;
+ }
+
+ /*
+ * Sort axes based on 'src', which has more non-zero strides,
+ * by making it the first operand here
+ */
+ if (PyArray_PrepareTwoRawArrayIter(ndim, shape,
+ src, src_strides,
+ dst, dst_strides,
+ &ndim, shape_it,
+ &src, src_strides_it,
+ &dst, dst_strides_it) < 0) {
+ return NPY_FAIL;
+ }
+
+ /* Special case a reduction in the inner loop */
+ if (dst_strides_it[0] == 0) {
+ /* Special case a contiguous reduction in the inner loop */
+ if (src_strides_it[0] == 1) {
+ NPY_RAW_ITER_START(idim, ndim, coord, shape_it) {
+ /* If there's a zero in src, set dst to zero */
+ if (memchr(src, 0, shape_it[0]) != NULL) {
+ *dst = 0;
+ }
+ } NPY_RAW_ITER_TWO_NEXT(idim, ndim, coord, shape_it,
+ src, src_strides_it,
+ dst, dst_strides_it);
+ }
+ else {
+ NPY_RAW_ITER_START(idim, ndim, coord, shape_it) {
+ char *src_d = src;
+ /* If there's a zero in src, set dst to zero */
+ for (i = 0; i < shape_it[0]; ++i) {
+ if (*src_d == 0) {
+ *dst = 0;
+ break;
+ }
+ src_d += src_strides_it[0]
+ }
+ } NPY_RAW_ITER_TWO_NEXT(idim, ndim, coord, shape_it,
+ src, src_strides_it,
+ dst, dst_strides_it);
+ }
+ }
+ else {
+ NPY_RAW_ITER_START(idim, ndim, coord, shape_it) {
+ char *src_d = src, dst_d = dst;
+ for (i = 0; i < shape_it[0]; ++i) {
+ *dst_d &= *src_d;
+ src_d += src_strides_it[0]
+ dst_d += dst_strides_it[0]
+ }
+ } NPY_RAW_ITER_TWO_NEXT(idim, ndim, coord, shape_it,
+ src, src_strides_it,
+ dst, dst_strides_it);
+ }
+
+ return 0;
+}
diff --git a/numpy/core/src/private/lowlevel_strided_loops.h b/numpy/core/src/private/lowlevel_strided_loops.h
index 840f4ec04..a4ba0526d 100644
--- a/numpy/core/src/private/lowlevel_strided_loops.h
+++ b/numpy/core/src/private/lowlevel_strided_loops.h
@@ -380,7 +380,7 @@ PyArray_PrepareTwoRawArrayIter(int ndim, npy_intp *shape,
break; \
} \
} \
- } while ((idim) < (ndim)); \
+ } while ((idim) < (ndim))
/* Increment to the next n-dimensional coordinate for two raw arrays */
#define NPY_RAW_ITER_TWO_NEXT(idim, ndim, coord, shape, \
@@ -397,8 +397,7 @@ PyArray_PrepareTwoRawArrayIter(int ndim, npy_intp *shape,
break; \
} \
} \
- } while ((idim) < (ndim)); \
-
+ } while ((idim) < (ndim))
/*