diff options
author | Mark Wiebe <mwiebe@enthought.com> | 2011-08-03 09:53:51 -0500 |
---|---|---|
committer | Charles Harris <charlesr.harris@gmail.com> | 2011-08-27 07:26:51 -0600 |
commit | b471b5aace551d294f2ffe4f7be569fd6f148f50 (patch) | |
tree | de42d13fac9e056246326b49a570399e955901cc | |
parent | ae79b71a89d4c25c028202410ef4b538081c0dce (diff) | |
download | numpy-b471b5aace551d294f2ffe4f7be569fd6f148f50.tar.gz |
ENH: missingdata: Change boolean indexing to broadcast to the left manually
I've also restricted it just to allow one-dimensional masks or masks
which match the number of dimensions of the array with the data. This
will require further discussion on the list at some point.
-rw-r--r-- | numpy/core/arrayprint.py | 5 | ||||
-rw-r--r-- | numpy/core/src/multiarray/iterators.c | 4 | ||||
-rw-r--r-- | numpy/core/src/multiarray/mapping.c | 83 | ||||
-rw-r--r-- | numpy/core/src/multiarray/nditer_constr.c | 4 | ||||
-rw-r--r-- | numpy/core/src/multiarray/shape.c | 10 | ||||
-rw-r--r-- | numpy/core/src/umath/ufunc_object.c | 4 |
6 files changed, 80 insertions, 30 deletions
diff --git a/numpy/core/arrayprint.py b/numpy/core/arrayprint.py index 525fcab8a..d5ef01fe8 100644 --- a/numpy/core/arrayprint.py +++ b/numpy/core/arrayprint.py @@ -218,16 +218,13 @@ def _boolFormatter(x): def _array2string(a, max_line_width, precision, suppress_small, separator=' ', prefix="", formatter=None): - print "DEBUG: in array2string!" if max_line_width is None: max_line_width = _line_width - print "DEBUG: A" if precision is None: precision = _float_output_precision - print "DEBUG: B" if suppress_small is None: suppress_small = _float_output_suppress_small @@ -241,7 +238,6 @@ def _array2string(a, max_line_width, precision, suppress_small, separator=' ', summary_insert = "" data = ravel(a) - print "DEBUG: making formatdict" formatdict = {'bool' : _boolFormatter, 'int' : IntegerFormat(data), 'float' : FloatFormat(data, precision, suppress_small), @@ -253,7 +249,6 @@ def _array2string(a, max_line_width, precision, suppress_small, separator=' ', 'timedelta' : TimedeltaFormat(data), 'numpystr' : repr, 'str' : str} - print "DEBUG: made formatdict" if formatter is not None: fkeys = [k for k in formatter.keys() if formatter[k] is not None] diff --git a/numpy/core/src/multiarray/iterators.c b/numpy/core/src/multiarray/iterators.c index 45fc40366..f59cdb7d1 100644 --- a/numpy/core/src/multiarray/iterators.c +++ b/numpy/core/src/multiarray/iterators.c @@ -105,8 +105,6 @@ parse_index(PyArrayObject *self, PyObject *op, PyObject *op1 = NULL; int is_slice; - printf("parsing index...\n"); - if (PySlice_Check(op) || op == Py_Ellipsis || op == Py_None) { n = 1; op1 = op; @@ -175,7 +173,6 @@ parse_index(PyArrayObject *self, PyObject *op, } } else { - printf("index %d %d %d\n", (int)start, (int)n_steps, (int)step_size); if (nd_old >= PyArray_NDIM(self)) { PyErr_SetString(PyExc_IndexError, "too many indices"); return -1; @@ -212,7 +209,6 @@ parse_index(PyArrayObject *self, PyObject *op, } *out_offset = offset; if (out_maskna_offset != NULL) { - printf ("maskna offset %d\n", (int)maskna_offset); *out_maskna_offset = maskna_offset; } return nd_new; diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c index 4608c2604..d122eb214 100644 --- a/numpy/core/src/multiarray/mapping.c +++ b/numpy/core/src/multiarray/mapping.c @@ -698,6 +698,8 @@ array_boolean_subscript(PyArrayObject *self, PyArrayObject *ret; int self_has_maskna = PyArray_HASMASKNA(self), needs_api = 0; npy_intp bmask_size; + int bmask_axes[NPY_MAXDIMS]; + int *op_axes[2] = {NULL, NULL}; if (PyArray_DESCR(bmask)->type_num != NPY_BOOL) { PyErr_SetString(PyExc_TypeError, @@ -714,6 +716,30 @@ array_boolean_subscript(PyArrayObject *self, } /* + * If the boolean mask has one dimension, broadcast to + * the left instead of to the right. Other broadcasting + * is disallowed to minimize inconsistency with NumPy in + * general. + */ + if (PyArray_NDIM(bmask) != PyArray_NDIM(self)) { + int i; + + if (PyArray_NDIM(bmask) != 1) { + PyErr_SetString(PyExc_ValueError, + "The boolean mask indexing array " + "is neither one-dimensional nor " + "matches the operand's number of " + "dimensions"); + return NULL; + } + op_axes[1] = bmask_axes; + bmask_axes[0] = 0; + for (i = 1; i < PyArray_NDIM(self); ++i) { + bmask_axes[i] = -1; + } + } + + /* * Since we've checked that the mask contains no NAs, we * can do a straightforward count of the boolean True values * in the raw mask data array. @@ -777,8 +803,9 @@ array_boolean_subscript(PyArrayObject *self, */ op_flags[1] = NPY_ITER_READONLY | NPY_ITER_IGNORE_MASKNA; - iter = NpyIter_MultiNew(2, op, flags, order, NPY_NO_CASTING, - op_flags, NULL); + iter = NpyIter_AdvancedNew(2, op, flags, order, NPY_NO_CASTING, + op_flags, NULL, + PyArray_NDIM(self), op_axes, NULL, 0); if (iter == NULL) { Py_DECREF(ret); return NULL; @@ -921,6 +948,8 @@ array_ass_boolean_subscript(PyArrayObject *self, int needs_api = 0; npy_intp bmask_size; char constant_valid_mask = 1; + int bmask_axes[NPY_MAXDIMS]; + int *op_axes[2] = {NULL, NULL}; if (PyArray_DESCR(bmask)->type_num != NPY_BOOL) { PyErr_SetString(PyExc_TypeError, @@ -936,6 +965,30 @@ array_ass_boolean_subscript(PyArrayObject *self, return -1; } + /* + * If the boolean mask has one dimension, broadcast to + * the left instead of to the right. Other broadcasting + * is disallowed to minimize inconsistency with NumPy in + * general. + */ + if (PyArray_NDIM(bmask) != PyArray_NDIM(self)) { + int i; + + if (PyArray_NDIM(bmask) != 1) { + PyErr_SetString(PyExc_ValueError, + "The boolean mask indexing array " + "is neither one-dimensional nor " + "matches the operand's number of " + "dimensions"); + return -1; + } + op_axes[1] = bmask_axes; + bmask_axes[0] = 0; + for (i = 1; i < PyArray_NDIM(self); ++i) { + bmask_axes[i] = -1; + } + } + /* See the Boolean Indexing section of the missing data NEP */ if (PyArray_ContainsNA(bmask)) { PyErr_SetString(PyExc_ValueError, @@ -1033,8 +1086,9 @@ array_ass_boolean_subscript(PyArrayObject *self, */ op_flags[1] = NPY_ITER_READONLY | NPY_ITER_IGNORE_MASKNA; - iter = NpyIter_MultiNew(2, op, flags, order, NPY_NO_CASTING, - op_flags, NULL); + iter = NpyIter_AdvancedNew(2, op, flags, order, NPY_NO_CASTING, + op_flags, NULL, + PyArray_NDIM(self), op_axes, NULL, 0); if (iter == NULL) { return -1; } @@ -2238,8 +2292,8 @@ PyArray_MapIterNew(PyObject *indexobj, int oned, int fancy) int i, n, started, nonindex; if (fancy == SOBJ_BADARRAY) { - PyErr_SetString(PyExc_IndexError, \ - "arrays used as indices must be of " \ + PyErr_SetString(PyExc_IndexError, + "arrays used as indices must be of " "integer (or boolean) type"); return NULL; } @@ -2291,6 +2345,23 @@ PyArray_MapIterNew(PyObject *indexobj, int oned, int fancy) */ /* convert all inputs to iterators */ + if (PyArray_Check(indexobj) && + (PyArray_TYPE((PyArrayObject *)indexobj) == NPY_BOOL)) { + mit->numiter = _nonzero_indices(indexobj, mit->iters); + if (mit->numiter < 0) { + goto fail; + } + mit->nd = 1; + mit->dimensions[0] = mit->iters[0]->dims_m1[0]+1; + Py_DECREF(mit->indexobj); + mit->indexobj = PyTuple_New(mit->numiter); + if (mit->indexobj == NULL) { + goto fail; + } + for (i = 0; i < mit->numiter; i++) { + PyTuple_SET_ITEM(mit->indexobj, i, PyInt_FromLong(0)); + } + } if (PyArray_Check(indexobj) || !PyTuple_Check(indexobj)) { mit->numiter = 1; indtype = PyArray_DescrFromType(NPY_INTP); diff --git a/numpy/core/src/multiarray/nditer_constr.c b/numpy/core/src/multiarray/nditer_constr.c index 3002ca2a9..f20e200e4 100644 --- a/numpy/core/src/multiarray/nditer_constr.c +++ b/numpy/core/src/multiarray/nditer_constr.c @@ -3252,11 +3252,11 @@ npyiter_fill_maskna_axisdata(NpyIter *iter, int **op_axes) else { int i; - if (op_axes == NULL || op_axes[iop] == NULL) { + if (op_axes == NULL || op_axes[iop_maskna] == NULL) { i = PyArray_NDIM(op_cur) - idim - 1; } else { - i = op_axes[iop][ndim-idim-1]; + i = op_axes[iop_maskna][ndim-idim-1]; } strides[iop] = PyArray_MASKNA_STRIDES(op_cur)[i]; diff --git a/numpy/core/src/multiarray/shape.c b/numpy/core/src/multiarray/shape.c index f9c079208..e2af79afd 100644 --- a/numpy/core/src/multiarray/shape.c +++ b/numpy/core/src/multiarray/shape.c @@ -190,8 +190,6 @@ PyArray_Newshape(PyArrayObject *self, PyArray_Dims *newdims, npy_intp newmasknastrides[NPY_MAXDIMS]; int flags, build_maskna_strides = 0; - printf("in newshape\n"); fflush(stdout); - if (order == NPY_ANYORDER) { order = PyArray_ISFORTRAN(self); } @@ -206,7 +204,6 @@ PyArray_Newshape(PyArrayObject *self, PyArray_Dims *newdims, i++; } if (same) { - printf("returning view\n"); fflush(stdout); return PyArray_View(self, NULL, NULL); } } @@ -221,14 +218,12 @@ PyArray_Newshape(PyArrayObject *self, PyArray_Dims *newdims, */ i = _check_ones(self, ndim, dimensions, newstrides, newmasknastrides); if (i == 0) { - printf("setting strides to newstrides\n"); fflush(stdout); strides = newstrides; } flags = PyArray_FLAGS(self) & ~(NPY_ARRAY_OWNMASKNA | NPY_ARRAY_MASKNA); if (strides == NULL) { - printf("strides are null\n"); fflush(stdout); /* * we are really re-shaping not just adding ones to the shape somewhere * fix any -1 dimensions and check new-dimensions against old size @@ -251,12 +246,10 @@ PyArray_Newshape(PyArrayObject *self, PyArray_Dims *newdims, success = _attempt_nocopy_reshape(self, ndim, dimensions, newstrides, newmasknastrides, order); if (success) { - printf("nocopy reshape succeeded\n"); fflush(stdout); /* no need to copy the array after all */ strides = newstrides; } else { - printf("nocopy reshape failed\n"); fflush(stdout); PyObject *newcopy; newcopy = PyArray_NewCopy(self, order); if (newcopy == NULL) { @@ -343,15 +336,12 @@ PyArray_Newshape(PyArrayObject *self, PyArray_Dims *newdims, if (build_maskna_strides) { npy_intp stride = 1; if (order == NPY_FORTRANORDER) { - printf("building fortran strides\n"); fflush(stdout); for (i = 0; i < ndim; ++i) { fa->maskna_strides[i] = stride; - printf("stride %d\n", (int)stride); stride *= fa->dimensions[i]; } } else { - printf("building C strides\n"); fflush(stdout); for (i = ndim; i >= 0; --i) { fa->maskna_strides[i] = stride; stride *= fa->dimensions[i]; diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index 1dd0ef730..f7d775ca8 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -2650,7 +2650,6 @@ allocate_or_conform_reduce_result(PyArrayObject *arr, PyArrayObject *out, if (out == NULL) { PyArrayObject *result; - printf("allocating result\n"); fflush(stdout); Py_INCREF(otype_dtype); result = allocate_reduce_result(arr, axis_flags, otype_dtype); @@ -2665,7 +2664,6 @@ allocate_or_conform_reduce_result(PyArrayObject *arr, PyArrayObject *out, return result; } else { - printf("conforming result\n"); fflush(stdout); return conform_reduce_result(PyArray_NDIM(arr), axis_flags, out); } } @@ -2926,7 +2924,7 @@ PyUFunc_Reduce(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out, /* Prepare the NA mask if there is one */ if (use_maskna) { - printf("doing masked %s.reduce\n", ufunc_name); fflush(stdout); + //printf("doing masked %s.reduce\n", ufunc_name); fflush(stdout); /* * Do the reduction on the NA mask before the data. This way * we can avoid modifying the outputs which end up masked, obeying |