summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Wiebe <mwiebe@enthought.com>2011-08-03 09:53:51 -0500
committerCharles Harris <charlesr.harris@gmail.com>2011-08-27 07:26:51 -0600
commitb471b5aace551d294f2ffe4f7be569fd6f148f50 (patch)
treede42d13fac9e056246326b49a570399e955901cc
parentae79b71a89d4c25c028202410ef4b538081c0dce (diff)
downloadnumpy-b471b5aace551d294f2ffe4f7be569fd6f148f50.tar.gz
ENH: missingdata: Change boolean indexing to broadcast to the left manually
I've also restricted it just to allow one-dimensional masks or masks which match the number of dimensions of the array with the data. This will require further discussion on the list at some point.
-rw-r--r--numpy/core/arrayprint.py5
-rw-r--r--numpy/core/src/multiarray/iterators.c4
-rw-r--r--numpy/core/src/multiarray/mapping.c83
-rw-r--r--numpy/core/src/multiarray/nditer_constr.c4
-rw-r--r--numpy/core/src/multiarray/shape.c10
-rw-r--r--numpy/core/src/umath/ufunc_object.c4
6 files changed, 80 insertions, 30 deletions
diff --git a/numpy/core/arrayprint.py b/numpy/core/arrayprint.py
index 525fcab8a..d5ef01fe8 100644
--- a/numpy/core/arrayprint.py
+++ b/numpy/core/arrayprint.py
@@ -218,16 +218,13 @@ def _boolFormatter(x):
def _array2string(a, max_line_width, precision, suppress_small, separator=' ',
prefix="", formatter=None):
- print "DEBUG: in array2string!"
if max_line_width is None:
max_line_width = _line_width
- print "DEBUG: A"
if precision is None:
precision = _float_output_precision
- print "DEBUG: B"
if suppress_small is None:
suppress_small = _float_output_suppress_small
@@ -241,7 +238,6 @@ def _array2string(a, max_line_width, precision, suppress_small, separator=' ',
summary_insert = ""
data = ravel(a)
- print "DEBUG: making formatdict"
formatdict = {'bool' : _boolFormatter,
'int' : IntegerFormat(data),
'float' : FloatFormat(data, precision, suppress_small),
@@ -253,7 +249,6 @@ def _array2string(a, max_line_width, precision, suppress_small, separator=' ',
'timedelta' : TimedeltaFormat(data),
'numpystr' : repr,
'str' : str}
- print "DEBUG: made formatdict"
if formatter is not None:
fkeys = [k for k in formatter.keys() if formatter[k] is not None]
diff --git a/numpy/core/src/multiarray/iterators.c b/numpy/core/src/multiarray/iterators.c
index 45fc40366..f59cdb7d1 100644
--- a/numpy/core/src/multiarray/iterators.c
+++ b/numpy/core/src/multiarray/iterators.c
@@ -105,8 +105,6 @@ parse_index(PyArrayObject *self, PyObject *op,
PyObject *op1 = NULL;
int is_slice;
- printf("parsing index...\n");
-
if (PySlice_Check(op) || op == Py_Ellipsis || op == Py_None) {
n = 1;
op1 = op;
@@ -175,7 +173,6 @@ parse_index(PyArrayObject *self, PyObject *op,
}
}
else {
- printf("index %d %d %d\n", (int)start, (int)n_steps, (int)step_size);
if (nd_old >= PyArray_NDIM(self)) {
PyErr_SetString(PyExc_IndexError, "too many indices");
return -1;
@@ -212,7 +209,6 @@ parse_index(PyArrayObject *self, PyObject *op,
}
*out_offset = offset;
if (out_maskna_offset != NULL) {
- printf ("maskna offset %d\n", (int)maskna_offset);
*out_maskna_offset = maskna_offset;
}
return nd_new;
diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c
index 4608c2604..d122eb214 100644
--- a/numpy/core/src/multiarray/mapping.c
+++ b/numpy/core/src/multiarray/mapping.c
@@ -698,6 +698,8 @@ array_boolean_subscript(PyArrayObject *self,
PyArrayObject *ret;
int self_has_maskna = PyArray_HASMASKNA(self), needs_api = 0;
npy_intp bmask_size;
+ int bmask_axes[NPY_MAXDIMS];
+ int *op_axes[2] = {NULL, NULL};
if (PyArray_DESCR(bmask)->type_num != NPY_BOOL) {
PyErr_SetString(PyExc_TypeError,
@@ -714,6 +716,30 @@ array_boolean_subscript(PyArrayObject *self,
}
/*
+ * If the boolean mask has one dimension, broadcast to
+ * the left instead of to the right. Other broadcasting
+ * is disallowed to minimize inconsistency with NumPy in
+ * general.
+ */
+ if (PyArray_NDIM(bmask) != PyArray_NDIM(self)) {
+ int i;
+
+ if (PyArray_NDIM(bmask) != 1) {
+ PyErr_SetString(PyExc_ValueError,
+ "The boolean mask indexing array "
+ "is neither one-dimensional nor "
+ "matches the operand's number of "
+ "dimensions");
+ return NULL;
+ }
+ op_axes[1] = bmask_axes;
+ bmask_axes[0] = 0;
+ for (i = 1; i < PyArray_NDIM(self); ++i) {
+ bmask_axes[i] = -1;
+ }
+ }
+
+ /*
* Since we've checked that the mask contains no NAs, we
* can do a straightforward count of the boolean True values
* in the raw mask data array.
@@ -777,8 +803,9 @@ array_boolean_subscript(PyArrayObject *self,
*/
op_flags[1] = NPY_ITER_READONLY | NPY_ITER_IGNORE_MASKNA;
- iter = NpyIter_MultiNew(2, op, flags, order, NPY_NO_CASTING,
- op_flags, NULL);
+ iter = NpyIter_AdvancedNew(2, op, flags, order, NPY_NO_CASTING,
+ op_flags, NULL,
+ PyArray_NDIM(self), op_axes, NULL, 0);
if (iter == NULL) {
Py_DECREF(ret);
return NULL;
@@ -921,6 +948,8 @@ array_ass_boolean_subscript(PyArrayObject *self,
int needs_api = 0;
npy_intp bmask_size;
char constant_valid_mask = 1;
+ int bmask_axes[NPY_MAXDIMS];
+ int *op_axes[2] = {NULL, NULL};
if (PyArray_DESCR(bmask)->type_num != NPY_BOOL) {
PyErr_SetString(PyExc_TypeError,
@@ -936,6 +965,30 @@ array_ass_boolean_subscript(PyArrayObject *self,
return -1;
}
+ /*
+ * If the boolean mask has one dimension, broadcast to
+ * the left instead of to the right. Other broadcasting
+ * is disallowed to minimize inconsistency with NumPy in
+ * general.
+ */
+ if (PyArray_NDIM(bmask) != PyArray_NDIM(self)) {
+ int i;
+
+ if (PyArray_NDIM(bmask) != 1) {
+ PyErr_SetString(PyExc_ValueError,
+ "The boolean mask indexing array "
+ "is neither one-dimensional nor "
+ "matches the operand's number of "
+ "dimensions");
+ return -1;
+ }
+ op_axes[1] = bmask_axes;
+ bmask_axes[0] = 0;
+ for (i = 1; i < PyArray_NDIM(self); ++i) {
+ bmask_axes[i] = -1;
+ }
+ }
+
/* See the Boolean Indexing section of the missing data NEP */
if (PyArray_ContainsNA(bmask)) {
PyErr_SetString(PyExc_ValueError,
@@ -1033,8 +1086,9 @@ array_ass_boolean_subscript(PyArrayObject *self,
*/
op_flags[1] = NPY_ITER_READONLY | NPY_ITER_IGNORE_MASKNA;
- iter = NpyIter_MultiNew(2, op, flags, order, NPY_NO_CASTING,
- op_flags, NULL);
+ iter = NpyIter_AdvancedNew(2, op, flags, order, NPY_NO_CASTING,
+ op_flags, NULL,
+ PyArray_NDIM(self), op_axes, NULL, 0);
if (iter == NULL) {
return -1;
}
@@ -2238,8 +2292,8 @@ PyArray_MapIterNew(PyObject *indexobj, int oned, int fancy)
int i, n, started, nonindex;
if (fancy == SOBJ_BADARRAY) {
- PyErr_SetString(PyExc_IndexError, \
- "arrays used as indices must be of " \
+ PyErr_SetString(PyExc_IndexError,
+ "arrays used as indices must be of "
"integer (or boolean) type");
return NULL;
}
@@ -2291,6 +2345,23 @@ PyArray_MapIterNew(PyObject *indexobj, int oned, int fancy)
*/
/* convert all inputs to iterators */
+ if (PyArray_Check(indexobj) &&
+ (PyArray_TYPE((PyArrayObject *)indexobj) == NPY_BOOL)) {
+ mit->numiter = _nonzero_indices(indexobj, mit->iters);
+ if (mit->numiter < 0) {
+ goto fail;
+ }
+ mit->nd = 1;
+ mit->dimensions[0] = mit->iters[0]->dims_m1[0]+1;
+ Py_DECREF(mit->indexobj);
+ mit->indexobj = PyTuple_New(mit->numiter);
+ if (mit->indexobj == NULL) {
+ goto fail;
+ }
+ for (i = 0; i < mit->numiter; i++) {
+ PyTuple_SET_ITEM(mit->indexobj, i, PyInt_FromLong(0));
+ }
+ }
if (PyArray_Check(indexobj) || !PyTuple_Check(indexobj)) {
mit->numiter = 1;
indtype = PyArray_DescrFromType(NPY_INTP);
diff --git a/numpy/core/src/multiarray/nditer_constr.c b/numpy/core/src/multiarray/nditer_constr.c
index 3002ca2a9..f20e200e4 100644
--- a/numpy/core/src/multiarray/nditer_constr.c
+++ b/numpy/core/src/multiarray/nditer_constr.c
@@ -3252,11 +3252,11 @@ npyiter_fill_maskna_axisdata(NpyIter *iter, int **op_axes)
else {
int i;
- if (op_axes == NULL || op_axes[iop] == NULL) {
+ if (op_axes == NULL || op_axes[iop_maskna] == NULL) {
i = PyArray_NDIM(op_cur) - idim - 1;
}
else {
- i = op_axes[iop][ndim-idim-1];
+ i = op_axes[iop_maskna][ndim-idim-1];
}
strides[iop] = PyArray_MASKNA_STRIDES(op_cur)[i];
diff --git a/numpy/core/src/multiarray/shape.c b/numpy/core/src/multiarray/shape.c
index f9c079208..e2af79afd 100644
--- a/numpy/core/src/multiarray/shape.c
+++ b/numpy/core/src/multiarray/shape.c
@@ -190,8 +190,6 @@ PyArray_Newshape(PyArrayObject *self, PyArray_Dims *newdims,
npy_intp newmasknastrides[NPY_MAXDIMS];
int flags, build_maskna_strides = 0;
- printf("in newshape\n"); fflush(stdout);
-
if (order == NPY_ANYORDER) {
order = PyArray_ISFORTRAN(self);
}
@@ -206,7 +204,6 @@ PyArray_Newshape(PyArrayObject *self, PyArray_Dims *newdims,
i++;
}
if (same) {
- printf("returning view\n"); fflush(stdout);
return PyArray_View(self, NULL, NULL);
}
}
@@ -221,14 +218,12 @@ PyArray_Newshape(PyArrayObject *self, PyArray_Dims *newdims,
*/
i = _check_ones(self, ndim, dimensions, newstrides, newmasknastrides);
if (i == 0) {
- printf("setting strides to newstrides\n"); fflush(stdout);
strides = newstrides;
}
flags = PyArray_FLAGS(self) & ~(NPY_ARRAY_OWNMASKNA |
NPY_ARRAY_MASKNA);
if (strides == NULL) {
- printf("strides are null\n"); fflush(stdout);
/*
* we are really re-shaping not just adding ones to the shape somewhere
* fix any -1 dimensions and check new-dimensions against old size
@@ -251,12 +246,10 @@ PyArray_Newshape(PyArrayObject *self, PyArray_Dims *newdims,
success = _attempt_nocopy_reshape(self, ndim, dimensions,
newstrides, newmasknastrides, order);
if (success) {
- printf("nocopy reshape succeeded\n"); fflush(stdout);
/* no need to copy the array after all */
strides = newstrides;
}
else {
- printf("nocopy reshape failed\n"); fflush(stdout);
PyObject *newcopy;
newcopy = PyArray_NewCopy(self, order);
if (newcopy == NULL) {
@@ -343,15 +336,12 @@ PyArray_Newshape(PyArrayObject *self, PyArray_Dims *newdims,
if (build_maskna_strides) {
npy_intp stride = 1;
if (order == NPY_FORTRANORDER) {
- printf("building fortran strides\n"); fflush(stdout);
for (i = 0; i < ndim; ++i) {
fa->maskna_strides[i] = stride;
- printf("stride %d\n", (int)stride);
stride *= fa->dimensions[i];
}
}
else {
- printf("building C strides\n"); fflush(stdout);
for (i = ndim; i >= 0; --i) {
fa->maskna_strides[i] = stride;
stride *= fa->dimensions[i];
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index 1dd0ef730..f7d775ca8 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -2650,7 +2650,6 @@ allocate_or_conform_reduce_result(PyArrayObject *arr, PyArrayObject *out,
if (out == NULL) {
PyArrayObject *result;
- printf("allocating result\n"); fflush(stdout);
Py_INCREF(otype_dtype);
result = allocate_reduce_result(arr, axis_flags, otype_dtype);
@@ -2665,7 +2664,6 @@ allocate_or_conform_reduce_result(PyArrayObject *arr, PyArrayObject *out,
return result;
}
else {
- printf("conforming result\n"); fflush(stdout);
return conform_reduce_result(PyArray_NDIM(arr), axis_flags, out);
}
}
@@ -2926,7 +2924,7 @@ PyUFunc_Reduce(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out,
/* Prepare the NA mask if there is one */
if (use_maskna) {
- printf("doing masked %s.reduce\n", ufunc_name); fflush(stdout);
+ //printf("doing masked %s.reduce\n", ufunc_name); fflush(stdout);
/*
* Do the reduction on the NA mask before the data. This way
* we can avoid modifying the outputs which end up masked, obeying