Merge branch 'pull-108'

* pull-108: STY: Put some spaces around '&'. DOC: core: Document the mask-based nditer flags and new inline mask functions ENH: core: Performance optimization in CopyInto, when both casting and broadcasting ENH: umath: Use masked iteration to allow buffered 'where=' output operands ENH: nditer: Add tests for writemasked iteration, also some small fixes STY: Rename test_iterator.py to test_nditer.py for consistency ENH: core: Add a typedef, macro, and inline functions for dealing with masks ENH: nditer: Finish implementation of masked iteration
author: Charles Harris <charlesr.harris@gmail.com> 2011-07-11 09:25:03 -0600
committer: Charles Harris <charlesr.harris@gmail.com> 2011-07-11 09:25:03 -0600
commit: 92ede9cc86fe112d0e9335a1fe94dc66cc5ad6ca (patch)
tree: 0e9db1a575077c4184cd9e4c04f4a21fe335fd2f /numpy
parent: b5cdaee35bab2a06604f204ba18e00bf465879a7 (diff)
parent: fb0f2524a84757fd92ce028c474b88cf9ced40df (diff)
download: numpy-92ede9cc86fe112d0e9335a1fe94dc66cc5ad6ca.tar.gz
12 files changed, 567 insertions, 50 deletions
diff --git a/numpy/core/include/numpy/ndarraytypes.h b/numpy/core/include/numpy/ndarraytypes.h
index 66aa15820..0f0673825 100644
--- a/numpy/core/include/numpy/ndarraytypes.h
+++ b/numpy/core/include/numpy/ndarraytypes.h
@@ -1440,6 +1440,40 @@ struct NpyAuxData_tag {
 #define NPY_AUXDATA_CLONE(auxdata) \
     ((auxdata)->clone(auxdata))
 
+/*********************************************************************
+ * NumPy functions for dealing with masks, such as in masked iteration
+ *********************************************************************/
+
+typedef npy_uint8 npy_mask;
+#define NPY_MASK NPY_UINT8
+
+/*
+ * Bit 0 of the mask indicates whether a value is exposed
+ * or hidden. This is compatible with a 'where=' boolean
+ * mask, because NumPy booleans are 1 byte, and contain
+ * either the value 0 or 1.
+ */
+static NPY_INLINE npy_bool
+NpyMask_IsExposed(npy_mask mask)
+{
+    return (mask & 0x01) != 0;
+}
+
+/*
+ * Bits 1 through 7 of the mask contain the payload.
+ */
+static NPY_INLINE npy_uint8
+NpyMask_GetPayload(npy_mask mask)
+{
+    return ((npy_uint8)mask) >> 1;
+}
+
+static NPY_INLINE npy_mask
+NpyMask_Create(npy_bool exposed, npy_uint8 payload)
+{
+    return (npy_mask)(exposed != 0) | (npy_mask)(payload << 1);
+}
+
 /*
  * This is the form of the struct that's returned pointed by the
  * PyCObject attribute of an array __array_struct__. See
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index 9388c41a7..6100fe7ee 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -2799,7 +2799,9 @@ PyArray_CopyInto(PyArrayObject *dst, PyArrayObject *src)
     else {
         PyArrayObject *op[2];
         npy_uint32 op_flags[2];
+        PyArray_Descr *op_dtypes_values[2], **op_dtypes = NULL;
         NpyIter *iter;
+        npy_intp src_size;
 
         NpyIter_IterNextFunc *iternext;
         char **dataptr;
@@ -2811,7 +2813,7 @@ PyArray_CopyInto(PyArrayObject *dst, PyArrayObject *src)
         op[0] = dst;
         op[1] = src;
         /*
-         * TODO: In NumPy 2.0, renable NPY_ITER_NO_BROADCAST. This
+         * TODO: In NumPy 2.0, reenable NPY_ITER_NO_BROADCAST. This
          *       was removed during NumPy 1.6 testing for compatibility
          *       with NumPy 1.5, as per Travis's -10 veto power.
          */
@@ -2819,14 +2821,30 @@ PyArray_CopyInto(PyArrayObject *dst, PyArrayObject *src)
         op_flags[0] = NPY_ITER_WRITEONLY;
         op_flags[1] = NPY_ITER_READONLY;
 
+        /*
+         * If 'src' is being broadcast to 'dst', and it is smaller
+         * than the default NumPy buffer size, allow the iterator to
+         * make a copy of 'src' with the 'dst' dtype if necessary.
+         *
+         * This is a performance operation, to allow fewer casts followed
+         * by more plain copies.
+         */
+        src_size = PyArray_SIZE(src);
+        if (src_size <= NPY_BUFSIZE && src_size < PyArray_SIZE(dst)) {
+            op_flags[1] |= NPY_ITER_COPY;
+            op_dtypes = op_dtypes_values;
+            op_dtypes_values[0] = NULL;
+            op_dtypes_values[1] = PyArray_DESCR(dst);
+        }
+
         iter = NpyIter_MultiNew(2, op,
                             NPY_ITER_EXTERNAL_LOOP|
                             NPY_ITER_REFS_OK|
                             NPY_ITER_ZEROSIZE_OK,
                             NPY_KEEPORDER,
-                            NPY_NO_CASTING,
+                            NPY_UNSAFE_CASTING,
                             op_flags,
-                            NULL);
+                            op_dtypes);
         if (iter == NULL) {
             return -1;
         }
@@ -2852,7 +2870,7 @@ PyArray_CopyInto(PyArrayObject *dst, PyArrayObject *src)
         if (PyArray_GetDTypeTransferFunction(
                         PyArray_ISALIGNED(src) && PyArray_ISALIGNED(dst),
                         stride[1], stride[0],
-                        PyArray_DESCR(src), PyArray_DESCR(dst),
+                        NpyIter_GetDescrArray(iter)[1], PyArray_DESCR(dst),
                         0,
                         &stransfer, &transferdata,
                         &needs_api) != NPY_SUCCEED) {
@@ -2983,7 +3001,9 @@ PyArray_MaskedCopyInto(PyArrayObject *dst, PyArrayObject *src,
     else {
         PyArrayObject *op[3];
         npy_uint32 op_flags[3];
+        PyArray_Descr *op_dtypes_values[3], **op_dtypes = NULL;
         NpyIter *iter;
+        npy_intp src_size;
 
         NpyIter_IterNextFunc *iternext;
         char **dataptr;
@@ -3005,14 +3025,31 @@ PyArray_MaskedCopyInto(PyArrayObject *dst, PyArrayObject *src,
         op_flags[1] = NPY_ITER_READONLY;
         op_flags[2] = NPY_ITER_READONLY;
 
+        /*
+         * If 'src' is being broadcast to 'dst', and it is smaller
+         * than the default NumPy buffer size, allow the iterator to
+         * make a copy of 'src' with the 'dst' dtype if necessary.
+         *
+         * This is a performance operation, to allow fewer casts followed
+         * by more plain copies.
+         */
+        src_size = PyArray_SIZE(src);
+        if (src_size <= NPY_BUFSIZE && src_size < PyArray_SIZE(dst)) {
+            op_flags[1] |= NPY_ITER_COPY;
+            op_dtypes = op_dtypes_values;
+            op_dtypes_values[0] = NULL;
+            op_dtypes_values[1] = PyArray_DESCR(dst);
+            op_dtypes_values[2] = NULL;
+        }
+
         iter = NpyIter_MultiNew(3, op,
                             NPY_ITER_EXTERNAL_LOOP|
                             NPY_ITER_REFS_OK|
                             NPY_ITER_ZEROSIZE_OK,
                             NPY_KEEPORDER,
-                            NPY_NO_CASTING,
+                            NPY_UNSAFE_CASTING,
                             op_flags,
-                            NULL);
+                            op_dtypes);
         if (iter == NULL) {
             return -1;
         }
@@ -3038,7 +3075,7 @@ PyArray_MaskedCopyInto(PyArrayObject *dst, PyArrayObject *src,
         if (PyArray_GetMaskedDTypeTransferFunction(
                         PyArray_ISALIGNED(src) && PyArray_ISALIGNED(dst),
                         stride[1], stride[0], stride[2],
-                        PyArray_DESCR(src),
+                        NpyIter_GetDescrArray(iter)[1],
                         PyArray_DESCR(dst),
                         PyArray_DESCR(mask),
                         0,
diff --git a/numpy/core/src/multiarray/dtype_transfer.c b/numpy/core/src/multiarray/dtype_transfer.c
index 47182b82a..ce688efd5 100644
--- a/numpy/core/src/multiarray/dtype_transfer.c
+++ b/numpy/core/src/multiarray/dtype_transfer.c
@@ -3059,7 +3059,7 @@ NpyAuxData *_masked_wrapper_transfer_data_clone(NpyAuxData *data)
 void _strided_masked_wrapper_decsrcref_transfer_function(
                                     char *dst, npy_intp dst_stride,
                                     char *src, npy_intp src_stride,
-                                    npy_uint8 *mask, npy_intp mask_stride,
+                                    npy_mask *mask, npy_intp mask_stride,
                                     npy_intp N, npy_intp src_itemsize,
                                     NpyAuxData *transferdata)
 {
@@ -3077,7 +3077,7 @@ void _strided_masked_wrapper_decsrcref_transfer_function(
     while (N > 0) {
         /* Skip masked values, still calling decsrcref for move_references */
         subloopsize = 0;
-        while (subloopsize < N && ((*mask)&0x01) == 0) {
+        while (subloopsize < N && !NpyMask_IsExposed(*mask)) {
             ++subloopsize;
             mask += mask_stride;
         }
@@ -3088,7 +3088,7 @@ void _strided_masked_wrapper_decsrcref_transfer_function(
         N -= subloopsize;
         /* Process unmasked values */
         subloopsize = 0;
-        while (subloopsize < N && ((*mask)&0x01) != 0) {
+        while (subloopsize < N && NpyMask_IsExposed(*mask)) {
             ++subloopsize;
             mask += mask_stride;
         }
@@ -3103,7 +3103,7 @@ void _strided_masked_wrapper_decsrcref_transfer_function(
 void _strided_masked_wrapper_transfer_function(
                                     char *dst, npy_intp dst_stride,
                                     char *src, npy_intp src_stride,
-                                    npy_uint8 *mask, npy_intp mask_stride,
+                                    npy_mask *mask, npy_intp mask_stride,
                                     npy_intp N, npy_intp src_itemsize,
                                     NpyAuxData *transferdata)
 {
@@ -3120,7 +3120,7 @@ void _strided_masked_wrapper_transfer_function(
     while (N > 0) {
         /* Skip masked values */
         subloopsize = 0;
-        while (subloopsize < N && ((*mask)&0x01) == 0) {
+        while (subloopsize < N && !NpyMask_IsExposed(*mask)) {
             ++subloopsize;
             mask += mask_stride;
         }
@@ -3129,7 +3129,7 @@ void _strided_masked_wrapper_transfer_function(
         N -= subloopsize;
         /* Process unmasked values */
         subloopsize = 0;
-        while (subloopsize < N && ((*mask)&0x01) != 0) {
+        while (subloopsize < N && NpyMask_IsExposed(*mask)) {
             ++subloopsize;
             mask += mask_stride;
         }
@@ -3764,8 +3764,8 @@ PyArray_GetMaskedDTypeTransferFunction(int aligned,
 
     /* TODO: Add struct-based mask_dtype support later */
     if (mask_dtype->type_num != NPY_BOOL &&
-                            mask_dtype->type_num != NPY_UINT8) {
-        PyErr_SetString(PyExc_RuntimeError,
+                            mask_dtype->type_num != NPY_MASK) {
+        PyErr_SetString(PyExc_TypeError,
                 "Only bool and uint8 masks are supported at the moment, "
                 "structs of bool/uint8 is planned for the future");
         return NPY_FAIL;
diff --git a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
index ab1918e0e..f33e39861 100644
--- a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
+++ b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
@@ -1156,3 +1156,139 @@ PyArray_TransferStridedToNDim(npy_intp ndim,
         }
     }
 }
+
+NPY_NO_EXPORT npy_intp
+PyArray_TransferMaskedStridedToNDim(npy_intp ndim,
+                char *dst, npy_intp *dst_strides, npy_intp dst_strides_inc,
+                char *src, npy_intp src_stride,
+                npy_uint8 *mask, npy_intp mask_stride,
+                npy_intp *coords, npy_intp coords_inc,
+                npy_intp *shape, npy_intp shape_inc,
+                npy_intp count, npy_intp src_itemsize,
+                PyArray_MaskedStridedTransferFn *stransfer,
+                NpyAuxData *data)
+{
+    npy_intp i, M, N, coord0, shape0, dst_stride0, coord1, shape1, dst_stride1;
+
+    /* Finish off dimension 0 */
+    coord0 = coords[0];
+    shape0 = shape[0];
+    dst_stride0 = dst_strides[0];
+    N = shape0 - coord0;
+    if (N >= count) {
+        stransfer(dst, dst_stride0,
+                    src, src_stride,
+                    mask, mask_stride,
+                    count, src_itemsize, data);
+        return 0;
+    }
+    stransfer(dst, dst_stride0,
+                src, src_stride,
+                mask, mask_stride,
+                N, src_itemsize, data);
+    count -= N;
+
+    /* If it's 1-dimensional, there's no more to copy */
+    if (ndim == 1) {
+        return count;
+    }
+
+    /* Adjust the src and dst pointers */
+    coord1 = (coords + coords_inc)[0];
+    shape1 = (shape + shape_inc)[0];
+    dst_stride1 = (dst_strides + dst_strides_inc)[0];
+    dst = dst - coord0*dst_stride0 + dst_stride1;
+    src += N*src_stride;
+    mask += N*mask_stride;
+
+    /* Finish off dimension 1 */
+    M = (shape1 - coord1 - 1);
+    N = shape0*M;
+    for (i = 0; i < M; ++i) {
+        if (shape0 >= count) {
+            stransfer(dst, dst_stride0,
+                        src, src_stride,
+                        mask, mask_stride,
+                        count, src_itemsize, data);
+            return 0;
+        }
+        else {
+            stransfer(dst, dst_stride0,
+                        src, src_stride,
+                        mask, mask_stride,
+                        shape0, src_itemsize, data);
+        }
+        count -= shape0;
+        dst += dst_stride1;
+        src += shape0*src_stride;
+        mask += shape0*mask_stride;
+    }
+
+    /* If it's 2-dimensional, there's no more to copy */
+    if (ndim == 2) {
+        return count;
+    }
+
+    /* General-case loop for everything else */
+    else {
+        /* Iteration structure for dimensions 2 and up */
+        struct {
+            npy_intp coord, shape, dst_stride;
+        } it[NPY_MAXDIMS];
+
+        /* Copy the coordinates and shape */
+        coords += 2*coords_inc;
+        shape += 2*shape_inc;
+        dst_strides += 2*dst_strides_inc;
+        for (i = 0; i < ndim-2; ++i) {
+            it[i].coord = coords[0];
+            it[i].shape = shape[0];
+            it[i].dst_stride = dst_strides[0];
+            coords += coords_inc;
+            shape += shape_inc;
+            dst_strides += dst_strides_inc;
+        }
+
+        for (;;) {
+            /* Adjust the dst pointer from the dimension 0 and 1 loop */
+            dst = dst - shape1*dst_stride1;
+
+            /* Increment to the next coordinate */
+            for (i = 0; i < ndim-2; ++i) {
+                dst += it[i].dst_stride;
+                if (++it[i].coord >= it[i].shape) {
+                    it[i].coord = 0;
+                    dst -= it[i].dst_stride*it[i].shape;
+                }
+                else {
+                    break;
+                }
+            }
+            /* If the last dimension rolled over, we're done */
+            if (i == ndim-2) {
+                return count;
+            }
+
+            /* A loop for dimensions 0 and 1 */
+            for (i = 0; i < shape1; ++i) {
+                if (shape0 >= count) {
+                    stransfer(dst, dst_stride0,
+                                src, src_stride,
+                                mask, mask_stride,
+                                count, src_itemsize, data);
+                    return 0;
+                }
+                else {
+                    stransfer(dst, dst_stride0,
+                                src, src_stride,
+                                mask, mask_stride,
+                                shape0, src_itemsize, data);
+                }
+                count -= shape0;
+                dst += dst_stride1;
+                src += shape0*src_stride;
+                mask += shape0*mask_stride;
+            }
+        }
+    }
+}
diff --git a/numpy/core/src/multiarray/nditer_api.c b/numpy/core/src/multiarray/nditer_api.c
index 1328ebc38..4eda968db 100644
--- a/numpy/core/src/multiarray/nditer_api.c
+++ b/numpy/core/src/multiarray/nditer_api.c
@@ -1745,6 +1745,7 @@ npyiter_copy_from_buffers(NpyIter *iter)
     npy_uint32 itflags = NIT_ITFLAGS(iter);
     int ndim = NIT_NDIM(iter);
     int iop, nop = NIT_NOP(iter);
+    int maskop = NIT_MASKOP(iter);
 
     char *op_itflags = NIT_OPITFLAGS(iter);
     NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
@@ -1862,14 +1863,42 @@ npyiter_copy_from_buffers(NpyIter *iter)
                                     "operand %d (%d items)\n",
                                     (int)iop, (int)op_transfersize);
 
-                PyArray_TransferStridedToNDim(ndim_transfer,
-                        ad_ptrs[iop], dst_strides, axisdata_incr,
-                        buffer, src_stride,
-                        dst_coords, axisdata_incr,
-                        dst_shape, axisdata_incr,
-                        op_transfersize, dtypes[iop]->elsize,
-                        stransfer,
-                        transferdata);
+                if (op_itflags[iop] & NPY_OP_ITFLAG_WRITEMASKED) {
+                    npy_uint8 *maskptr;
+
+                    /*
+                     * The mask pointer may be in the buffer or in
+                     * the array, detect which one.
+                     */
+                    delta = (ptrs[maskop] - buffers[maskop]);
+                    if (0 <= delta &&
+                                delta <= buffersize*dtypes[maskop]->elsize) {
+                        maskptr = buffers[maskop];
+                    }
+                    else {
+                        maskptr = ad_ptrs[maskop];
+                    }
+
+                    PyArray_TransferMaskedStridedToNDim(ndim_transfer,
+                            ad_ptrs[iop], dst_strides, axisdata_incr,
+                            buffer, src_stride,
+                            maskptr, strides[maskop],
+                            dst_coords, axisdata_incr,
+                            dst_shape, axisdata_incr,
+                            op_transfersize, dtypes[iop]->elsize,
+                            (PyArray_MaskedStridedTransferFn *)stransfer,
+                            transferdata);
+                }
+                else {
+                    PyArray_TransferStridedToNDim(ndim_transfer,
+                            ad_ptrs[iop], dst_strides, axisdata_incr,
+                            buffer, src_stride,
+                            dst_coords, axisdata_incr,
+                            dst_shape, axisdata_incr,
+                            op_transfersize, dtypes[iop]->elsize,
+                            stransfer,
+                            transferdata);
+                }
             }
         }
         /* If there's no copy back, we may have to decrement refs.  In
diff --git a/numpy/core/src/multiarray/nditer_constr.c b/numpy/core/src/multiarray/nditer_constr.c
index e99a0fb0a..b9f79f1d3 100644
--- a/numpy/core/src/multiarray/nditer_constr.c
+++ b/numpy/core/src/multiarray/nditer_constr.c
@@ -189,6 +189,7 @@ NpyIter_AdvancedNew(int nop, PyArrayObject **op_in, npy_uint32 flags,
     NIT_ITFLAGS(iter) = itflags;
     NIT_NDIM(iter) = ndim;
     NIT_NOP(iter) = nop;
+    NIT_MASKOP(iter) = -1;
     NIT_ITERINDEX(iter) = 0;
     memset(NIT_BASEOFFSETS(iter), 0, (nop+1)*NPY_SIZEOF_INTP);
 
@@ -902,7 +903,7 @@ npyiter_check_per_op_flags(npy_uint32 op_flags, char *op_itflags)
 
     /* Check the flag for a write masked operands */
     if (op_flags & NPY_ITER_WRITEMASKED) {
-        if (!(*op_itflags) & NPY_OP_ITFLAG_WRITE) {
+        if (!((*op_itflags) & NPY_OP_ITFLAG_WRITE)) {
             PyErr_SetString(PyExc_ValueError,
                 "The iterator flag WRITEMASKED may only "
                 "be used with READWRITE or WRITEONLY");
@@ -1125,6 +1126,7 @@ npyiter_prepare_operands(int nop, PyArrayObject **op_in,
 {
     int iop, i;
     npy_int8 maskop = -1;
+    int any_writemasked_ops = 0;
 
     for (iop = 0; iop < nop; ++iop) {
         op[iop] = op_in[iop];
@@ -1157,6 +1159,10 @@ npyiter_prepare_operands(int nop, PyArrayObject **op_in,
             *out_maskop = iop;
         }
 
+        if (op_flags[iop] & NPY_ITER_WRITEMASKED) {
+            any_writemasked_ops = 1;
+        }
+
         /*
          * Prepare the operand.  This produces an op_dtype[iop] reference
          * on success.
@@ -1196,6 +1202,21 @@ npyiter_prepare_operands(int nop, PyArrayObject **op_in,
         }
     }
 
+    if (any_writemasked_ops && maskop < 0) {
+        PyErr_SetString(PyExc_ValueError,
+                "An iterator operand was flagged as WRITEMASKED, "
+                "but no ARRAYMASK operand was given to supply "
+                "the mask");
+        return 0;
+    }
+    else if (!any_writemasked_ops && maskop >= 0) {
+        PyErr_SetString(PyExc_ValueError,
+                "An iterator operand was flagged as the ARRAYMASK, "
+                "but no WRITEMASKED operands were given to use "
+                "the mask");
+        return 0;
+    }
+
     return 1;
 }
 
@@ -1348,6 +1369,57 @@ npyiter_check_casting(int nop, PyArrayObject **op,
 }
 
 /*
+ * Checks that the mask broadcasts to the WRITEMASK REDUCE
+ * operand 'iop', but 'iop' never broadcasts to the mask.
+ * If 'iop' broadcasts to the mask, the result would be more
+ * than one mask value per reduction element, something which
+ * is invalid.
+ *
+ * This check should only be called after all the operands
+ * have been filled in.
+ *
+ * Returns 1 on success, 0 on error.
+ */
+static int
+check_mask_for_writemasked_reduction(NpyIter *iter, int iop)
+{
+    npy_uint32 itflags = NIT_ITFLAGS(iter);
+    int idim, ndim = NIT_NDIM(iter);
+    int nop = NIT_NOP(iter);
+    int maskop = NIT_MASKOP(iter);
+
+    NpyIter_AxisData *axisdata;
+    npy_intp sizeof_axisdata;
+
+    axisdata = NIT_AXISDATA(iter);
+    sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
+
+    for(idim = 0; idim < ndim; ++idim) {
+        npy_intp maskstride, istride;
+
+        istride = NAD_STRIDES(axisdata)[iop];
+        maskstride = NAD_STRIDES(axisdata)[maskop];
+
+        /*
+         * If 'iop' is being broadcast to 'maskop', we have
+         * the invalid situation described above.
+         */
+        if (maskstride != 0 && istride == 0) {
+            PyErr_SetString(PyExc_ValueError,
+                    "Iterator reduction operand is WRITEMASKED, "
+                    "but also broadcasts to multiple mask values. "
+                    "There can be only one mask value per WRITEMASKED "
+                    "element.");
+            return 0;
+        }
+
+        NIT_ADVANCE_AXISDATA(axisdata, 1);
+    }
+
+    return 1;
+}
+
+/*
  * Fills in the AXISDATA for the 'nop' operands, broadcasting
  * the dimensionas as necessary.  Also fills
  * in the ITERSIZE data member.
@@ -1367,6 +1439,7 @@ npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, char *op_itflags,
     npy_uint32 itflags = NIT_ITFLAGS(iter);
     int idim, ndim = NIT_NDIM(iter);
     int iop, nop = NIT_NOP(iter);
+    int maskop = NIT_MASKOP(iter);
 
     int ondim;
     NpyIter_AxisData *axisdata;
@@ -1487,7 +1560,7 @@ npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, char *op_itflags,
                         }
                     }
                     else if (idim >= ondim ||
-                                        PyArray_DIM(op_cur, ondim-idim-1) == 1) {
+                                    PyArray_DIM(op_cur, ondim-idim-1) == 1) {
                         strides[iop] = 0;
                         if (op_flags[iop] & NPY_ITER_NO_BROADCAST) {
                             goto operand_different_than_broadcast;
@@ -1496,17 +1569,37 @@ npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, char *op_itflags,
                         if (op_itflags[iop] & NPY_OP_ITFLAG_WRITE) {
                             if (!(flags & NPY_ITER_REDUCE_OK)) {
                                 PyErr_SetString(PyExc_ValueError,
-                                        "output operand requires a reduction, but "
-                                        "reduction is not enabled");
+                                        "output operand requires a "
+                                        "reduction, but reduction is "
+                                        "not enabled");
                                 return 0;
                             }
                             if (!(op_itflags[iop] & NPY_OP_ITFLAG_READ)) {
                                 PyErr_SetString(PyExc_ValueError,
-                                        "output operand requires a reduction, but "
-                                        "is flagged as write-only, not "
-                                        "read-write");
+                                        "output operand requires a "
+                                        "reduction, but is flagged as "
+                                        "write-only, not read-write");
                                 return 0;
                             }
+                            /*
+                             * The ARRAYMASK can't be a reduction, because
+                             * it would be possible to write back to the
+                             * array once when the ARRAYMASK says 'True',
+                             * then have the reduction on the ARRAYMASK
+                             * later flip to 'False', indicating that the
+                             * write back should never have been done,
+                             * and violating the strict masking semantics
+                             */
+                            if (iop == maskop) {
+                                PyErr_SetString(PyExc_ValueError,
+                                        "output operand requires a "
+                                        "reduction, but is flagged as "
+                                        "the ARRAYMASK operand which "
+                                        "is not permitted to be the "
+                                        "result of a reduction");
+                                return 0;
+                            }
+
                             NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE;
                             op_itflags[iop] |= NPY_OP_ITFLAG_REDUCE;
                         }
@@ -2642,6 +2735,8 @@ npyiter_allocate_arrays(NpyIter *iter,
     int idim, ndim = NIT_NDIM(iter);
     int iop, nop = NIT_NOP(iter);
 
+    int check_writemasked_reductions = 0;
+
     NpyIter_BufferData *bufferdata = NULL;
     PyArrayObject **op = NIT_OPERANDS(iter);
 
@@ -2649,8 +2744,18 @@ npyiter_allocate_arrays(NpyIter *iter,
         bufferdata = NIT_BUFFERDATA(iter);
     }
 
-
     for (iop = 0; iop < nop; ++iop) {
+        /*
+         * Check whether there are any WRITEMASKED REDUCE operands
+         * which should be validated after all the strides are filled
+         * in.
+         */
+        if ((op_itflags[iop] &
+                (NPY_OP_ITFLAG_WRITEMASKED | NPY_OP_ITFLAG_REDUCE)) ==
+                        (NPY_OP_ITFLAG_WRITEMASKED | NPY_OP_ITFLAG_REDUCE)) {
+            check_writemasked_reductions = 1;
+        }
+
         /* NULL means an output the iterator should allocate */
         if (op[iop] == NULL) {
             PyArrayObject *out;
@@ -2820,7 +2925,8 @@ npyiter_allocate_arrays(NpyIter *iter,
          * the inner stride of this operand works for the whole
          * array, we can set NPY_OP_ITFLAG_BUFNEVER.
          */
-        if ((itflags & NPY_ITFLAG_BUFFER) && !(op_itflags[iop] & NPY_OP_ITFLAG_CAST)) {
+        if ((itflags & NPY_ITFLAG_BUFFER) &&
+                                !(op_itflags[iop] & NPY_OP_ITFLAG_CAST)) {
             NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
             if (ndim == 1) {
                 op_itflags[iop] |= NPY_OP_ITFLAG_BUFNEVER;
@@ -2872,6 +2978,31 @@ npyiter_allocate_arrays(NpyIter *iter,
         }
     }
 
+    if (check_writemasked_reductions) {
+        for (iop = 0; iop < nop; ++iop) {
+            /*
+             * Check whether there are any WRITEMASKED REDUCE operands
+             * which should be validated now that all the strides are filled
+             * in.
+             */
+            if ((op_itflags[iop] &
+                    (NPY_OP_ITFLAG_WRITEMASKED | NPY_OP_ITFLAG_REDUCE)) ==
+                        (NPY_OP_ITFLAG_WRITEMASKED | NPY_OP_ITFLAG_REDUCE)) {
+                /*
+                 * If the ARRAYMASK has 'bigger' dimensions
+                 * than this REDUCE WRITEMASKED operand,
+                 * the result would be more than one mask
+                 * value per reduction element, something which
+                 * is invalid. This function provides validation
+                 * for that.
+                 */
+                if (!check_mask_for_writemasked_reduction(iter, iop)) {
+                    return 0;
+                }
+            }
+        }
+    }
+
     return 1;
 }
 
@@ -2958,7 +3089,38 @@ npyiter_allocate_transfer_functions(NpyIter *iter)
             }
             if (flags & NPY_OP_ITFLAG_WRITE) {
                 int move_references = 1;
-                if (PyArray_GetDTypeTransferFunction(
+
+                /*
+                 * If the operand is WRITEMASKED, use a masked transfer fn.
+                 */
+                if (flags & NPY_OP_ITFLAG_WRITEMASKED) {
+                    int maskop = NIT_MASKOP(iter);
+                    PyArray_Descr *mask_dtype = PyArray_DESCR(op[maskop]);
+
+                    /*
+                     * If the mask's stride is contiguous, use it, otherwise
+                     * the mask may or may not be buffered, so the stride
+                     * could be inconsistent.
+                     */
+                    if (PyArray_GetMaskedDTypeTransferFunction(
+                                (flags & NPY_OP_ITFLAG_ALIGNED) != 0,
+                                op_dtype[iop]->elsize,
+                                op_stride,
+                                (strides[maskop] == mask_dtype->elsize) ?
+                                                mask_dtype->elsize :
+                                                NPY_MAX_INTP,
+                                op_dtype[iop],
+                                PyArray_DESCR(op[iop]),
+                                mask_dtype,
+                                move_references,
+                                (PyArray_MaskedStridedTransferFn **)&stransfer,
+                                &transferdata,
+                                &needs_api) != NPY_SUCCEED) {
+                        goto fail;
+                    }
+                }
+                else {
+                    if (PyArray_GetDTypeTransferFunction(
                                         (flags & NPY_OP_ITFLAG_ALIGNED) != 0,
                                         op_dtype[iop]->elsize,
                                         op_stride,
@@ -2968,7 +3130,8 @@ npyiter_allocate_transfer_functions(NpyIter *iter)
                                         &stransfer,
                                         &transferdata,
                                         &needs_api) != NPY_SUCCEED) {
-                    goto fail;
+                        goto fail;
+                    }
                 }
                 writetransferfn[iop] = stransfer;
                 writetransferdata[iop] = transferdata;
diff --git a/numpy/core/src/private/lowlevel_strided_loops.h b/numpy/core/src/private/lowlevel_strided_loops.h
index b4cd79f9a..088b15040 100644
--- a/numpy/core/src/private/lowlevel_strided_loops.h
+++ b/numpy/core/src/private/lowlevel_strided_loops.h
@@ -32,11 +32,11 @@ typedef void (PyArray_StridedTransferFn)(char *dst, npy_intp dst_stride,
  * which values are transferred.
  *
  * In particular, the 'i'-th element is transfered if and only if
- * (((mask[i*mask_stride])&0x01) == 0x01).
+ * NpyMask_IsExposed(mask[i*mask_stride]).
  */
 typedef void (PyArray_MaskedStridedTransferFn)(char *dst, npy_intp dst_stride,
                                     char *src, npy_intp src_stride,
-                                    npy_uint8 *mask, npy_intp mask_stride,
+                                    npy_mask *mask, npy_intp mask_stride,
                                     npy_intp N, npy_intp src_itemsize,
                                     NpyAuxData *transferdata);
 
@@ -191,7 +191,7 @@ PyArray_GetDTypeTransferFunction(int aligned,
  * This is identical to PyArray_GetDTypeTransferFunction, but
  * returns a transfer function which also takes a mask as a parameter.
  * Bit zero of the mask is used to determine which values to copy,
- * data is transfered exactly when ((mask[i])&0x01) == 0x01.
+ * and data is transfered exactly when NpyMask_IsExposed(mask[i*mask_stride]).
  *
  * If move_references is true, values which are not copied to the
  * destination will still have their source reference decremented.
@@ -291,6 +291,18 @@ PyArray_TransferStridedToNDim(npy_intp ndim,
                 PyArray_StridedTransferFn *stransfer,
                 NpyAuxData *transferdata);
 
+NPY_NO_EXPORT npy_intp
+PyArray_TransferMaskedStridedToNDim(npy_intp ndim,
+                char *dst, npy_intp *dst_strides, npy_intp dst_strides_inc,
+                char *src, npy_intp src_stride,
+                npy_mask *mask, npy_intp mask_stride,
+                npy_intp *coords, npy_intp coords_inc,
+                npy_intp *shape, npy_intp shape_inc,
+                npy_intp count, npy_intp src_itemsize,
+                PyArray_MaskedStridedTransferFn *stransfer,
+                NpyAuxData *data);
+
+
 /*
  *            TRIVIAL ITERATION
  *
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index a89c0f235..2ffca63d2 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -1480,9 +1480,10 @@ execute_ufunc_masked_loop(PyUFuncObject *self,
                       NPY_ITER_ALIGNED|
                       NPY_ITER_ALLOCATE|
                       NPY_ITER_NO_BROADCAST|
-                      NPY_ITER_NO_SUBTYPE;
+                      NPY_ITER_NO_SUBTYPE|
+                      NPY_ITER_WRITEMASKED;
     }
-    op_flags[nop] = NPY_ITER_READONLY;
+    op_flags[nop] = NPY_ITER_READONLY|NPY_ITER_ARRAYMASK;
 
     NPY_UF_DBG_PRINT("Making iterator\n");
 
diff --git a/numpy/core/src/umath/ufunc_type_resolution.c b/numpy/core/src/umath/ufunc_type_resolution.c
index 8fb17a441..b08a7f165 100644
--- a/numpy/core/src/umath/ufunc_type_resolution.c
+++ b/numpy/core/src/umath/ufunc_type_resolution.c
@@ -1407,7 +1407,8 @@ unmasked_ufunc_loop_as_masked(
     do {
         /* Skip masked values */
         subloopsize = 0;
-        while (subloopsize < loopsize && ((*(npy_uint8 *)mask)&0x01) == 0) {
+        while (subloopsize < loopsize &&
+                        !NpyMask_IsExposed(*(npy_mask *)mask)) {
             ++subloopsize;
             mask += mask_stride;
         }
@@ -1420,7 +1421,8 @@ unmasked_ufunc_loop_as_masked(
          * mess with the 'args' pointer values)
          */
         subloopsize = 0;
-        while (subloopsize < loopsize && ((*(npy_uint8 *)mask)&0x01) != 0) {
+        while (subloopsize < loopsize &&
+                        NpyMask_IsExposed(*(npy_mask *)mask)) {
             ++subloopsize;
             mask += mask_stride;
         }
diff --git a/numpy/core/tests/test_api.py b/numpy/core/tests/test_api.py
index 7ebcb932b..d2d8241f2 100644
--- a/numpy/core/tests/test_api.py
+++ b/numpy/core/tests/test_api.py
@@ -110,7 +110,7 @@ def test_copyto():
     assert_raises(TypeError, np.copyto, a, 3.5, where=[True,False,True])
 
     # Lists of integer 0's and 1's is ok too
-    np.copyto(a, 4, where=[[0,1,1], [1,0,0]])
+    np.copyto(a, 4.0, casting='unsafe', where=[[0,1,1], [1,0,0]])
     assert_equal(a, [[3,4,4], [4,1,3]])
 
     # Overlapping copy with mask should work
diff --git a/numpy/core/tests/test_iterator.py b/numpy/core/tests/test_nditer.py
index f39d9584f..cf0a44c63 100644
--- a/numpy/core/tests/test_iterator.py
+++ b/numpy/core/tests/test_nditer.py
@@ -457,7 +457,7 @@ def test_iter_no_inner_dim_coalescing():
     i = nditer(a, ['external_loop'], [['readonly']])
     assert_equal(i.ndim, 1)
     assert_equal(i[0].shape, (12,))
-    
+
     # Even with lots of 1-sized dimensions, should still coalesce
     a = arange(24).reshape(1,1,2,1,1,3,1,1,4,1,1)
     i = nditer(a, ['external_loop'], [['readonly']])
@@ -658,7 +658,7 @@ def test_iter_broadcasting_errors():
         # The message should contain the itershape parameter
         assert_(msg.find('(4,3)') >= 0,
                 'Message "%s" doesn\'t contain itershape parameter (4,3)' % msg)
-    
+
     try:
         i = nditer([np.zeros((2,1,1)), np.zeros((2,))],
                     [],
@@ -1579,7 +1579,7 @@ def test_iter_buffered_cast_simple():
                    buffersize=3)
     for v in i:
         v[...] *= 2
-    
+
     assert_equal(a, 2*np.arange(10, dtype='f4'))
 
 def test_iter_buffered_cast_byteswapped():
@@ -1593,7 +1593,7 @@ def test_iter_buffered_cast_byteswapped():
                    buffersize=3)
     for v in i:
         v[...] *= 2
-    
+
     assert_equal(a, 2*np.arange(10, dtype='f4'))
 
     try:
@@ -1607,7 +1607,7 @@ def test_iter_buffered_cast_byteswapped():
                        buffersize=3)
         for v in i:
             v[...] *= 2
-        
+
         assert_equal(a, 2*np.arange(10, dtype='f8'))
     finally:
         warnings.simplefilter("default", np.ComplexWarning)
@@ -1966,7 +1966,7 @@ def test_iter_buffering_badwriteback():
     i = nditer([a,b],['buffered','external_loop'],
                         [['readonly'],['writeonly']],
                         order='C')
-    
+
     # If a has just one element, it's fine too (constant 0 stride, a reduction)
     a = np.arange(1).reshape(1,1,1)
     i = nditer([a,b],['buffered','external_loop','reduce_ok'],
@@ -2192,7 +2192,7 @@ def test_iter_nested_iters_dtype_buffered():
     assert_equal(a, [[1,2,3],[4,5,6]])
 
 def test_iter_reduction_error():
-    
+
     a = np.arange(6)
     assert_raises(ValueError, nditer, [a,None], [],
                     [['readonly'], ['readwrite','allocate']],
@@ -2295,5 +2295,109 @@ def test_iter_buffering_reduction():
     it.reset()
     assert_equal(it[0], [1,2,1,2])
 
+def test_iter_writemasked_badinput():
+    a = np.zeros((2,3))
+    b = np.zeros((3,))
+    m = np.array([[True,True,False],[False,True,False]])
+    m2 = np.array([True,True,False])
+    m3 = np.array([0,1,1], dtype='u1')
+    mbad1 = np.array([0,1,1], dtype='i1')
+    mbad2 = np.array([0,1,1], dtype='f4')
+
+    # Need an 'arraymask' if any operand is 'writemasked'
+    assert_raises(ValueError, nditer, [a,m], [],
+                    [['readwrite','writemasked'],['readonly']])
+
+    # A 'writemasked' operand must not be readonly
+    assert_raises(ValueError, nditer, [a,m], [],
+                    [['readonly','writemasked'],['readonly','arraymask']])
+
+    # 'writemasked' and 'arraymask' may not be used together
+    assert_raises(ValueError, nditer, [a,m], [],
+                    [['readonly'],['readwrite','arraymask','writemasked']])
+
+    # 'arraymask' may only be specified once
+    assert_raises(ValueError, nditer, [a,m, m2], [],
+                    [['readwrite','writemasked'],
+                     ['readonly','arraymask'],
+                     ['readonly','arraymask']])
+
+    # An 'arraymask' with nothing 'writemasked' also doesn't make sense
+    assert_raises(ValueError, nditer, [a,m], [],
+                    [['readwrite'],['readonly','arraymask']])
+
+    # A writemasked reduction requires a similarly smaller mask
+    assert_raises(ValueError, nditer, [a,b,m], ['reduce_ok'],
+                    [['readonly'],
+                     ['readwrite','writemasked'],
+                     ['readonly','arraymask']])
+    # But this should work with a smaller/equal mask to the reduction operand
+    np.nditer([a,b,m2], ['reduce_ok'],
+                    [['readonly'],
+                     ['readwrite','writemasked'],
+                     ['readonly','arraymask']])
+    # The arraymask itself cannot be a reduction
+    assert_raises(ValueError, nditer, [a,b,m2], ['reduce_ok'],
+                    [['readonly'],
+                     ['readwrite','writemasked'],
+                     ['readwrite','arraymask']])
+
+    # A uint8 mask is ok too
+    np.nditer([a,m3], ['buffered'],
+                    [['readwrite','writemasked'],
+                     ['readonly','arraymask']],
+                    op_dtypes=['f4',None],
+                    casting='same_kind')
+    # An int8 mask isn't ok
+    assert_raises(TypeError, np.nditer, [a,mbad1], ['buffered'],
+                    [['readwrite','writemasked'],
+                     ['readonly','arraymask']],
+                    op_dtypes=['f4',None],
+                    casting='same_kind')
+    # A float32 mask isn't ok
+    assert_raises(TypeError, np.nditer, [a,mbad2], ['buffered'],
+                    [['readwrite','writemasked'],
+                     ['readonly','arraymask']],
+                    op_dtypes=['f4',None],
+                    casting='same_kind')
+
+def test_iter_writemasked():
+    a = np.zeros((3,), dtype='f8')
+    msk = np.array([True,True,False])
+
+    # When buffering is unused, 'writemasked' effectively does nothing.
+    # It's up to the user of the iterator to obey the requested semantics.
+    it = np.nditer([a,msk], [],
+                [['readwrite','writemasked'],
+                 ['readonly','arraymask']])
+    for x, m in it:
+        x[...] = 1
+    # Because we violated the semantics, all the values became 1
+    assert_equal(a, [1,1,1])
+
+    # Even if buffering is enabled, we still may be accessing the array
+    # directly.
+    it = np.nditer([a,msk], ['buffered'],
+                [['readwrite','writemasked'],
+                 ['readonly','arraymask']])
+    for x, m in it:
+        x[...] = 2.5
+    # Because we violated the semantics, all the values became 2.5
+    assert_equal(a, [2.5,2.5,2.5])
+
+    # If buffering will definitely happening, for instance because of
+    # a cast, only the items selected by the mask will be copied back from
+    # the buffer.
+    it = np.nditer([a,msk], ['buffered'],
+                [['readwrite','writemasked'],
+                 ['readonly','arraymask']],
+                op_dtypes=['i8',None],
+                casting='unsafe')
+    for x, m in it:
+        x[...] = 3
+    # Even though we violated the semantics, only the selected values
+    # were copied back
+    assert_equal(a, [3,3,2.5])
+
 if __name__ == "__main__":
     run_module_suite()
diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py
index a7af19486..974a6d6f8 100644
--- a/numpy/core/tests/test_ufunc.py
+++ b/numpy/core/tests/test_ufunc.py
@@ -499,7 +499,6 @@ class TestUfunc(TestCase):
         np.subtract(a, 2, out=a, where=[True,False])
         assert_equal(a, [[0, 27], [14, 5]])
 
-    @dec.knownfailureif(True)
     def test_where_param_buffer_output(self):
         # This test is temporarily skipped because it requires
         # adding masking features to the nditer to work properly
author	Charles Harris <charlesr.harris@gmail.com>	2011-07-11 09:25:03 -0600
committer	Charles Harris <charlesr.harris@gmail.com>	2011-07-11 09:25:03 -0600
commit	92ede9cc86fe112d0e9335a1fe94dc66cc5ad6ca (patch)
tree	0e9db1a575077c4184cd9e4c04f4a21fe335fd2f /numpy
parent	b5cdaee35bab2a06604f204ba18e00bf465879a7 (diff)
parent	fb0f2524a84757fd92ce028c474b88cf9ced40df (diff)
download	numpy-92ede9cc86fe112d0e9335a1fe94dc66cc5ad6ca.tar.gz