ENH: missingdata: Change boolean indexing to broadcast to the left manually

I've also restricted it just to allow one-dimensional masks or masks which match the number of dimensions of the array with the data. This will require further discussion on the list at some point.
author: Mark Wiebe <mwiebe@enthought.com> 2011-08-03 09:53:51 -0500
committer: Charles Harris <charlesr.harris@gmail.com> 2011-08-27 07:26:51 -0600
commit: b471b5aace551d294f2ffe4f7be569fd6f148f50 (patch)
tree: de42d13fac9e056246326b49a570399e955901cc
parent: ae79b71a89d4c25c028202410ef4b538081c0dce (diff)
download: numpy-b471b5aace551d294f2ffe4f7be569fd6f148f50.tar.gz
6 files changed, 80 insertions, 30 deletions
diff --git a/numpy/core/arrayprint.py b/numpy/core/arrayprint.py
index 525fcab8a..d5ef01fe8 100644
--- a/numpy/core/arrayprint.py
+++ b/numpy/core/arrayprint.py
@@ -218,16 +218,13 @@ def _boolFormatter(x):
 
 def _array2string(a, max_line_width, precision, suppress_small, separator=' ',
                   prefix="", formatter=None):
-    print "DEBUG: in array2string!"
 
     if max_line_width is None:
         max_line_width = _line_width
 
-    print "DEBUG: A"
     if precision is None:
         precision = _float_output_precision
 
-    print "DEBUG: B"
     if suppress_small is None:
         suppress_small = _float_output_suppress_small
 
@@ -241,7 +238,6 @@ def _array2string(a, max_line_width, precision, suppress_small, separator=' ',
         summary_insert = ""
         data = ravel(a)
 
-    print "DEBUG: making formatdict"
     formatdict = {'bool' : _boolFormatter,
                   'int' : IntegerFormat(data),
                   'float' : FloatFormat(data, precision, suppress_small),
@@ -253,7 +249,6 @@ def _array2string(a, max_line_width, precision, suppress_small, separator=' ',
                   'timedelta' : TimedeltaFormat(data),
                   'numpystr' : repr,
                   'str' : str}
-    print "DEBUG: made formatdict"
 
     if formatter is not None:
         fkeys = [k for k in formatter.keys() if formatter[k] is not None]
diff --git a/numpy/core/src/multiarray/iterators.c b/numpy/core/src/multiarray/iterators.c
index 45fc40366..f59cdb7d1 100644
--- a/numpy/core/src/multiarray/iterators.c
+++ b/numpy/core/src/multiarray/iterators.c
@@ -105,8 +105,6 @@ parse_index(PyArrayObject *self, PyObject *op,
     PyObject *op1 = NULL;
     int is_slice;
 
-    printf("parsing index...\n");
-
     if (PySlice_Check(op) || op == Py_Ellipsis || op == Py_None) {
         n = 1;
         op1 = op;
@@ -175,7 +173,6 @@ parse_index(PyArrayObject *self, PyObject *op,
             }
         }
         else {
-            printf("index %d %d %d\n", (int)start, (int)n_steps, (int)step_size);
             if (nd_old >= PyArray_NDIM(self)) {
                 PyErr_SetString(PyExc_IndexError, "too many indices");
                 return -1;
@@ -212,7 +209,6 @@ parse_index(PyArrayObject *self, PyObject *op,
     }
     *out_offset = offset;
     if (out_maskna_offset != NULL) {
-        printf ("maskna offset %d\n", (int)maskna_offset);
         *out_maskna_offset = maskna_offset;
     }
     return nd_new;
diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c
index 4608c2604..d122eb214 100644
--- a/numpy/core/src/multiarray/mapping.c
+++ b/numpy/core/src/multiarray/mapping.c
@@ -698,6 +698,8 @@ array_boolean_subscript(PyArrayObject *self,
     PyArrayObject *ret;
     int self_has_maskna = PyArray_HASMASKNA(self), needs_api = 0;
     npy_intp bmask_size;
+    int bmask_axes[NPY_MAXDIMS];
+    int *op_axes[2] = {NULL, NULL};
 
     if (PyArray_DESCR(bmask)->type_num != NPY_BOOL) {
         PyErr_SetString(PyExc_TypeError,
@@ -714,6 +716,30 @@ array_boolean_subscript(PyArrayObject *self,
     }
 
     /*
+     * If the boolean mask has one dimension, broadcast to
+     * the left instead of to the right. Other broadcasting
+     * is disallowed to minimize inconsistency with NumPy in
+     * general.
+     */
+    if (PyArray_NDIM(bmask) != PyArray_NDIM(self)) {
+        int i;
+
+        if (PyArray_NDIM(bmask) != 1) {
+            PyErr_SetString(PyExc_ValueError,
+                    "The boolean mask indexing array "
+                    "is neither one-dimensional nor "
+                    "matches the operand's number of "
+                    "dimensions");
+            return NULL;
+        }
+        op_axes[1] = bmask_axes;
+        bmask_axes[0] = 0;
+        for (i = 1; i < PyArray_NDIM(self); ++i) {
+            bmask_axes[i] = -1;
+        }
+    }
+
+    /*
      * Since we've checked that the mask contains no NAs, we
      * can do a straightforward count of the boolean True values
      * in the raw mask data array.
@@ -777,8 +803,9 @@ array_boolean_subscript(PyArrayObject *self,
          */
         op_flags[1] = NPY_ITER_READONLY | NPY_ITER_IGNORE_MASKNA;
 
-        iter = NpyIter_MultiNew(2, op, flags, order, NPY_NO_CASTING,
-                                op_flags, NULL);
+        iter = NpyIter_AdvancedNew(2, op, flags, order, NPY_NO_CASTING,
+                                op_flags, NULL,
+                                PyArray_NDIM(self), op_axes, NULL, 0);
         if (iter == NULL) {
             Py_DECREF(ret);
             return NULL;
@@ -921,6 +948,8 @@ array_ass_boolean_subscript(PyArrayObject *self,
     int needs_api = 0;
     npy_intp bmask_size;
     char constant_valid_mask = 1;
+    int bmask_axes[NPY_MAXDIMS];
+    int *op_axes[2] = {NULL, NULL};
 
     if (PyArray_DESCR(bmask)->type_num != NPY_BOOL) {
         PyErr_SetString(PyExc_TypeError,
@@ -936,6 +965,30 @@ array_ass_boolean_subscript(PyArrayObject *self,
         return -1;
     }
 
+    /*
+     * If the boolean mask has one dimension, broadcast to
+     * the left instead of to the right. Other broadcasting
+     * is disallowed to minimize inconsistency with NumPy in
+     * general.
+     */
+    if (PyArray_NDIM(bmask) != PyArray_NDIM(self)) {
+        int i;
+
+        if (PyArray_NDIM(bmask) != 1) {
+            PyErr_SetString(PyExc_ValueError,
+                    "The boolean mask indexing array "
+                    "is neither one-dimensional nor "
+                    "matches the operand's number of "
+                    "dimensions");
+            return -1;
+        }
+        op_axes[1] = bmask_axes;
+        bmask_axes[0] = 0;
+        for (i = 1; i < PyArray_NDIM(self); ++i) {
+            bmask_axes[i] = -1;
+        }
+    }
+
     /* See the Boolean Indexing section of the missing data NEP */
     if (PyArray_ContainsNA(bmask)) {
         PyErr_SetString(PyExc_ValueError,
@@ -1033,8 +1086,9 @@ array_ass_boolean_subscript(PyArrayObject *self,
          */
         op_flags[1] = NPY_ITER_READONLY | NPY_ITER_IGNORE_MASKNA;
 
-        iter = NpyIter_MultiNew(2, op, flags, order, NPY_NO_CASTING,
-                                op_flags, NULL);
+        iter = NpyIter_AdvancedNew(2, op, flags, order, NPY_NO_CASTING,
+                                op_flags, NULL,
+                                PyArray_NDIM(self), op_axes, NULL, 0);
         if (iter == NULL) {
             return -1;
         }
@@ -2238,8 +2292,8 @@ PyArray_MapIterNew(PyObject *indexobj, int oned, int fancy)
     int i, n, started, nonindex;
 
     if (fancy == SOBJ_BADARRAY) {
-        PyErr_SetString(PyExc_IndexError,                       \
-                        "arrays used as indices must be of "    \
+        PyErr_SetString(PyExc_IndexError,
+                        "arrays used as indices must be of "
                         "integer (or boolean) type");
         return NULL;
     }
@@ -2291,6 +2345,23 @@ PyArray_MapIterNew(PyObject *indexobj, int oned, int fancy)
      */
 
     /* convert all inputs to iterators */
+    if (PyArray_Check(indexobj) &&
+                    (PyArray_TYPE((PyArrayObject *)indexobj) == NPY_BOOL)) {
+        mit->numiter = _nonzero_indices(indexobj, mit->iters);
+        if (mit->numiter < 0) {
+            goto fail;
+        }
+        mit->nd = 1;
+        mit->dimensions[0] = mit->iters[0]->dims_m1[0]+1;
+        Py_DECREF(mit->indexobj);
+        mit->indexobj = PyTuple_New(mit->numiter);
+        if (mit->indexobj == NULL) {
+            goto fail;
+        }
+        for (i = 0; i < mit->numiter; i++) {
+            PyTuple_SET_ITEM(mit->indexobj, i, PyInt_FromLong(0));
+        }
+    }
     if (PyArray_Check(indexobj) || !PyTuple_Check(indexobj)) {
         mit->numiter = 1;
         indtype = PyArray_DescrFromType(NPY_INTP);
diff --git a/numpy/core/src/multiarray/nditer_constr.c b/numpy/core/src/multiarray/nditer_constr.c
index 3002ca2a9..f20e200e4 100644
--- a/numpy/core/src/multiarray/nditer_constr.c
+++ b/numpy/core/src/multiarray/nditer_constr.c
@@ -3252,11 +3252,11 @@ npyiter_fill_maskna_axisdata(NpyIter *iter, int **op_axes)
             else {
                 int i;
 
-                if (op_axes == NULL || op_axes[iop] == NULL) {
+                if (op_axes == NULL || op_axes[iop_maskna] == NULL) {
                     i = PyArray_NDIM(op_cur) - idim - 1;
                 }
                 else {
-                    i = op_axes[iop][ndim-idim-1];
+                    i = op_axes[iop_maskna][ndim-idim-1];
                 }
 
                 strides[iop] = PyArray_MASKNA_STRIDES(op_cur)[i];
diff --git a/numpy/core/src/multiarray/shape.c b/numpy/core/src/multiarray/shape.c
index f9c079208..e2af79afd 100644
--- a/numpy/core/src/multiarray/shape.c
+++ b/numpy/core/src/multiarray/shape.c
@@ -190,8 +190,6 @@ PyArray_Newshape(PyArrayObject *self, PyArray_Dims *newdims,
     npy_intp newmasknastrides[NPY_MAXDIMS];
     int flags, build_maskna_strides = 0;
 
-    printf("in newshape\n"); fflush(stdout);
-
     if (order == NPY_ANYORDER) {
         order = PyArray_ISFORTRAN(self);
     }
@@ -206,7 +204,6 @@ PyArray_Newshape(PyArrayObject *self, PyArray_Dims *newdims,
             i++;
         }
         if (same) {
-            printf("returning view\n"); fflush(stdout);
             return PyArray_View(self, NULL, NULL);
         }
     }
@@ -221,14 +218,12 @@ PyArray_Newshape(PyArrayObject *self, PyArray_Dims *newdims,
      */
     i = _check_ones(self, ndim, dimensions, newstrides, newmasknastrides);
     if (i == 0) {
-        printf("setting strides to newstrides\n"); fflush(stdout);
         strides = newstrides;
     }
     flags = PyArray_FLAGS(self) & ~(NPY_ARRAY_OWNMASKNA |
                                     NPY_ARRAY_MASKNA);
 
     if (strides == NULL) {
-        printf("strides are null\n"); fflush(stdout);
         /*
          * we are really re-shaping not just adding ones to the shape somewhere
          * fix any -1 dimensions and check new-dimensions against old size
@@ -251,12 +246,10 @@ PyArray_Newshape(PyArrayObject *self, PyArray_Dims *newdims,
             success = _attempt_nocopy_reshape(self, ndim, dimensions,
                                           newstrides, newmasknastrides, order);
             if (success) {
-                printf("nocopy reshape succeeded\n"); fflush(stdout);
                 /* no need to copy the array after all */
                 strides = newstrides;
             }
             else {
-                printf("nocopy reshape failed\n"); fflush(stdout);
                 PyObject *newcopy;
                 newcopy = PyArray_NewCopy(self, order);
                 if (newcopy == NULL) {
@@ -343,15 +336,12 @@ PyArray_Newshape(PyArrayObject *self, PyArray_Dims *newdims,
         if (build_maskna_strides) {
             npy_intp stride = 1;
             if (order == NPY_FORTRANORDER) {
-                printf("building fortran strides\n"); fflush(stdout);
                 for (i = 0; i < ndim; ++i) {
                     fa->maskna_strides[i] = stride;
-                    printf("stride %d\n", (int)stride);
                     stride *= fa->dimensions[i];
                 }
             }
             else {
-                printf("building C strides\n"); fflush(stdout);
                 for (i = ndim; i >= 0; --i) {
                     fa->maskna_strides[i] = stride;
                     stride *= fa->dimensions[i];
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index 1dd0ef730..f7d775ca8 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -2650,7 +2650,6 @@ allocate_or_conform_reduce_result(PyArrayObject *arr, PyArrayObject *out,
     if (out == NULL) {
         PyArrayObject *result;
 
-        printf("allocating result\n"); fflush(stdout);
         Py_INCREF(otype_dtype);
         result = allocate_reduce_result(arr, axis_flags, otype_dtype);
 
@@ -2665,7 +2664,6 @@ allocate_or_conform_reduce_result(PyArrayObject *arr, PyArrayObject *out,
         return result;
     }
     else {
-        printf("conforming result\n"); fflush(stdout);
         return conform_reduce_result(PyArray_NDIM(arr), axis_flags, out);
     }
 }
@@ -2926,7 +2924,7 @@ PyUFunc_Reduce(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out,
 
     /* Prepare the NA mask if there is one */
     if (use_maskna) {
-        printf("doing masked %s.reduce\n", ufunc_name); fflush(stdout);
+        //printf("doing masked %s.reduce\n", ufunc_name); fflush(stdout);
         /*
          * Do the reduction on the NA mask before the data. This way
          * we can avoid modifying the outputs which end up masked, obeying
author	Mark Wiebe <mwiebe@enthought.com>	2011-08-03 09:53:51 -0500
committer	Charles Harris <charlesr.harris@gmail.com>	2011-08-27 07:26:51 -0600
commit	b471b5aace551d294f2ffe4f7be569fd6f148f50 (patch)
tree	de42d13fac9e056246326b49a570399e955901cc
parent	ae79b71a89d4c25c028202410ef4b538081c0dce (diff)
download	numpy-b471b5aace551d294f2ffe4f7be569fd6f148f50.tar.gz