summaryrefslogtreecommitdiff
path: root/numpy
diff options
context:
space:
mode:
authorCharles Harris <charlesr.harris@gmail.com>2011-07-11 09:25:03 -0600
committerCharles Harris <charlesr.harris@gmail.com>2011-07-11 09:25:03 -0600
commit92ede9cc86fe112d0e9335a1fe94dc66cc5ad6ca (patch)
tree0e9db1a575077c4184cd9e4c04f4a21fe335fd2f /numpy
parentb5cdaee35bab2a06604f204ba18e00bf465879a7 (diff)
parentfb0f2524a84757fd92ce028c474b88cf9ced40df (diff)
downloadnumpy-92ede9cc86fe112d0e9335a1fe94dc66cc5ad6ca.tar.gz
Merge branch 'pull-108'
* pull-108: STY: Put some spaces around '&'. DOC: core: Document the mask-based nditer flags and new inline mask functions ENH: core: Performance optimization in CopyInto, when both casting and broadcasting ENH: umath: Use masked iteration to allow buffered 'where=' output operands ENH: nditer: Add tests for writemasked iteration, also some small fixes STY: Rename test_iterator.py to test_nditer.py for consistency ENH: core: Add a typedef, macro, and inline functions for dealing with masks ENH: nditer: Finish implementation of masked iteration
Diffstat (limited to 'numpy')
-rw-r--r--numpy/core/include/numpy/ndarraytypes.h34
-rw-r--r--numpy/core/src/multiarray/ctors.c51
-rw-r--r--numpy/core/src/multiarray/dtype_transfer.c16
-rw-r--r--numpy/core/src/multiarray/lowlevel_strided_loops.c.src136
-rw-r--r--numpy/core/src/multiarray/nditer_api.c45
-rw-r--r--numpy/core/src/multiarray/nditer_constr.c185
-rw-r--r--numpy/core/src/private/lowlevel_strided_loops.h18
-rw-r--r--numpy/core/src/umath/ufunc_object.c5
-rw-r--r--numpy/core/src/umath/ufunc_type_resolution.c6
-rw-r--r--numpy/core/tests/test_api.py2
-rw-r--r--numpy/core/tests/test_nditer.py (renamed from numpy/core/tests/test_iterator.py)118
-rw-r--r--numpy/core/tests/test_ufunc.py1
12 files changed, 567 insertions, 50 deletions
diff --git a/numpy/core/include/numpy/ndarraytypes.h b/numpy/core/include/numpy/ndarraytypes.h
index 66aa15820..0f0673825 100644
--- a/numpy/core/include/numpy/ndarraytypes.h
+++ b/numpy/core/include/numpy/ndarraytypes.h
@@ -1440,6 +1440,40 @@ struct NpyAuxData_tag {
#define NPY_AUXDATA_CLONE(auxdata) \
((auxdata)->clone(auxdata))
+/*********************************************************************
+ * NumPy functions for dealing with masks, such as in masked iteration
+ *********************************************************************/
+
+typedef npy_uint8 npy_mask;
+#define NPY_MASK NPY_UINT8
+
+/*
+ * Bit 0 of the mask indicates whether a value is exposed
+ * or hidden. This is compatible with a 'where=' boolean
+ * mask, because NumPy booleans are 1 byte, and contain
+ * either the value 0 or 1.
+ */
+static NPY_INLINE npy_bool
+NpyMask_IsExposed(npy_mask mask)
+{
+ return (mask & 0x01) != 0;
+}
+
+/*
+ * Bits 1 through 7 of the mask contain the payload.
+ */
+static NPY_INLINE npy_uint8
+NpyMask_GetPayload(npy_mask mask)
+{
+ return ((npy_uint8)mask) >> 1;
+}
+
+static NPY_INLINE npy_mask
+NpyMask_Create(npy_bool exposed, npy_uint8 payload)
+{
+ return (npy_mask)(exposed != 0) | (npy_mask)(payload << 1);
+}
+
/*
* This is the form of the struct that's returned pointed by the
* PyCObject attribute of an array __array_struct__. See
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index 9388c41a7..6100fe7ee 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -2799,7 +2799,9 @@ PyArray_CopyInto(PyArrayObject *dst, PyArrayObject *src)
else {
PyArrayObject *op[2];
npy_uint32 op_flags[2];
+ PyArray_Descr *op_dtypes_values[2], **op_dtypes = NULL;
NpyIter *iter;
+ npy_intp src_size;
NpyIter_IterNextFunc *iternext;
char **dataptr;
@@ -2811,7 +2813,7 @@ PyArray_CopyInto(PyArrayObject *dst, PyArrayObject *src)
op[0] = dst;
op[1] = src;
/*
- * TODO: In NumPy 2.0, renable NPY_ITER_NO_BROADCAST. This
+ * TODO: In NumPy 2.0, reenable NPY_ITER_NO_BROADCAST. This
* was removed during NumPy 1.6 testing for compatibility
* with NumPy 1.5, as per Travis's -10 veto power.
*/
@@ -2819,14 +2821,30 @@ PyArray_CopyInto(PyArrayObject *dst, PyArrayObject *src)
op_flags[0] = NPY_ITER_WRITEONLY;
op_flags[1] = NPY_ITER_READONLY;
+ /*
+ * If 'src' is being broadcast to 'dst', and it is smaller
+ * than the default NumPy buffer size, allow the iterator to
+ * make a copy of 'src' with the 'dst' dtype if necessary.
+ *
+ * This is a performance operation, to allow fewer casts followed
+ * by more plain copies.
+ */
+ src_size = PyArray_SIZE(src);
+ if (src_size <= NPY_BUFSIZE && src_size < PyArray_SIZE(dst)) {
+ op_flags[1] |= NPY_ITER_COPY;
+ op_dtypes = op_dtypes_values;
+ op_dtypes_values[0] = NULL;
+ op_dtypes_values[1] = PyArray_DESCR(dst);
+ }
+
iter = NpyIter_MultiNew(2, op,
NPY_ITER_EXTERNAL_LOOP|
NPY_ITER_REFS_OK|
NPY_ITER_ZEROSIZE_OK,
NPY_KEEPORDER,
- NPY_NO_CASTING,
+ NPY_UNSAFE_CASTING,
op_flags,
- NULL);
+ op_dtypes);
if (iter == NULL) {
return -1;
}
@@ -2852,7 +2870,7 @@ PyArray_CopyInto(PyArrayObject *dst, PyArrayObject *src)
if (PyArray_GetDTypeTransferFunction(
PyArray_ISALIGNED(src) && PyArray_ISALIGNED(dst),
stride[1], stride[0],
- PyArray_DESCR(src), PyArray_DESCR(dst),
+ NpyIter_GetDescrArray(iter)[1], PyArray_DESCR(dst),
0,
&stransfer, &transferdata,
&needs_api) != NPY_SUCCEED) {
@@ -2983,7 +3001,9 @@ PyArray_MaskedCopyInto(PyArrayObject *dst, PyArrayObject *src,
else {
PyArrayObject *op[3];
npy_uint32 op_flags[3];
+ PyArray_Descr *op_dtypes_values[3], **op_dtypes = NULL;
NpyIter *iter;
+ npy_intp src_size;
NpyIter_IterNextFunc *iternext;
char **dataptr;
@@ -3005,14 +3025,31 @@ PyArray_MaskedCopyInto(PyArrayObject *dst, PyArrayObject *src,
op_flags[1] = NPY_ITER_READONLY;
op_flags[2] = NPY_ITER_READONLY;
+ /*
+ * If 'src' is being broadcast to 'dst', and it is smaller
+ * than the default NumPy buffer size, allow the iterator to
+ * make a copy of 'src' with the 'dst' dtype if necessary.
+ *
+ * This is a performance operation, to allow fewer casts followed
+ * by more plain copies.
+ */
+ src_size = PyArray_SIZE(src);
+ if (src_size <= NPY_BUFSIZE && src_size < PyArray_SIZE(dst)) {
+ op_flags[1] |= NPY_ITER_COPY;
+ op_dtypes = op_dtypes_values;
+ op_dtypes_values[0] = NULL;
+ op_dtypes_values[1] = PyArray_DESCR(dst);
+ op_dtypes_values[2] = NULL;
+ }
+
iter = NpyIter_MultiNew(3, op,
NPY_ITER_EXTERNAL_LOOP|
NPY_ITER_REFS_OK|
NPY_ITER_ZEROSIZE_OK,
NPY_KEEPORDER,
- NPY_NO_CASTING,
+ NPY_UNSAFE_CASTING,
op_flags,
- NULL);
+ op_dtypes);
if (iter == NULL) {
return -1;
}
@@ -3038,7 +3075,7 @@ PyArray_MaskedCopyInto(PyArrayObject *dst, PyArrayObject *src,
if (PyArray_GetMaskedDTypeTransferFunction(
PyArray_ISALIGNED(src) && PyArray_ISALIGNED(dst),
stride[1], stride[0], stride[2],
- PyArray_DESCR(src),
+ NpyIter_GetDescrArray(iter)[1],
PyArray_DESCR(dst),
PyArray_DESCR(mask),
0,
diff --git a/numpy/core/src/multiarray/dtype_transfer.c b/numpy/core/src/multiarray/dtype_transfer.c
index 47182b82a..ce688efd5 100644
--- a/numpy/core/src/multiarray/dtype_transfer.c
+++ b/numpy/core/src/multiarray/dtype_transfer.c
@@ -3059,7 +3059,7 @@ NpyAuxData *_masked_wrapper_transfer_data_clone(NpyAuxData *data)
void _strided_masked_wrapper_decsrcref_transfer_function(
char *dst, npy_intp dst_stride,
char *src, npy_intp src_stride,
- npy_uint8 *mask, npy_intp mask_stride,
+ npy_mask *mask, npy_intp mask_stride,
npy_intp N, npy_intp src_itemsize,
NpyAuxData *transferdata)
{
@@ -3077,7 +3077,7 @@ void _strided_masked_wrapper_decsrcref_transfer_function(
while (N > 0) {
/* Skip masked values, still calling decsrcref for move_references */
subloopsize = 0;
- while (subloopsize < N && ((*mask)&0x01) == 0) {
+ while (subloopsize < N && !NpyMask_IsExposed(*mask)) {
++subloopsize;
mask += mask_stride;
}
@@ -3088,7 +3088,7 @@ void _strided_masked_wrapper_decsrcref_transfer_function(
N -= subloopsize;
/* Process unmasked values */
subloopsize = 0;
- while (subloopsize < N && ((*mask)&0x01) != 0) {
+ while (subloopsize < N && NpyMask_IsExposed(*mask)) {
++subloopsize;
mask += mask_stride;
}
@@ -3103,7 +3103,7 @@ void _strided_masked_wrapper_decsrcref_transfer_function(
void _strided_masked_wrapper_transfer_function(
char *dst, npy_intp dst_stride,
char *src, npy_intp src_stride,
- npy_uint8 *mask, npy_intp mask_stride,
+ npy_mask *mask, npy_intp mask_stride,
npy_intp N, npy_intp src_itemsize,
NpyAuxData *transferdata)
{
@@ -3120,7 +3120,7 @@ void _strided_masked_wrapper_transfer_function(
while (N > 0) {
/* Skip masked values */
subloopsize = 0;
- while (subloopsize < N && ((*mask)&0x01) == 0) {
+ while (subloopsize < N && !NpyMask_IsExposed(*mask)) {
++subloopsize;
mask += mask_stride;
}
@@ -3129,7 +3129,7 @@ void _strided_masked_wrapper_transfer_function(
N -= subloopsize;
/* Process unmasked values */
subloopsize = 0;
- while (subloopsize < N && ((*mask)&0x01) != 0) {
+ while (subloopsize < N && NpyMask_IsExposed(*mask)) {
++subloopsize;
mask += mask_stride;
}
@@ -3764,8 +3764,8 @@ PyArray_GetMaskedDTypeTransferFunction(int aligned,
/* TODO: Add struct-based mask_dtype support later */
if (mask_dtype->type_num != NPY_BOOL &&
- mask_dtype->type_num != NPY_UINT8) {
- PyErr_SetString(PyExc_RuntimeError,
+ mask_dtype->type_num != NPY_MASK) {
+ PyErr_SetString(PyExc_TypeError,
"Only bool and uint8 masks are supported at the moment, "
"structs of bool/uint8 is planned for the future");
return NPY_FAIL;
diff --git a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
index ab1918e0e..f33e39861 100644
--- a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
+++ b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
@@ -1156,3 +1156,139 @@ PyArray_TransferStridedToNDim(npy_intp ndim,
}
}
}
+
+NPY_NO_EXPORT npy_intp
+PyArray_TransferMaskedStridedToNDim(npy_intp ndim,
+ char *dst, npy_intp *dst_strides, npy_intp dst_strides_inc,
+ char *src, npy_intp src_stride,
+ npy_uint8 *mask, npy_intp mask_stride,
+ npy_intp *coords, npy_intp coords_inc,
+ npy_intp *shape, npy_intp shape_inc,
+ npy_intp count, npy_intp src_itemsize,
+ PyArray_MaskedStridedTransferFn *stransfer,
+ NpyAuxData *data)
+{
+ npy_intp i, M, N, coord0, shape0, dst_stride0, coord1, shape1, dst_stride1;
+
+ /* Finish off dimension 0 */
+ coord0 = coords[0];
+ shape0 = shape[0];
+ dst_stride0 = dst_strides[0];
+ N = shape0 - coord0;
+ if (N >= count) {
+ stransfer(dst, dst_stride0,
+ src, src_stride,
+ mask, mask_stride,
+ count, src_itemsize, data);
+ return 0;
+ }
+ stransfer(dst, dst_stride0,
+ src, src_stride,
+ mask, mask_stride,
+ N, src_itemsize, data);
+ count -= N;
+
+ /* If it's 1-dimensional, there's no more to copy */
+ if (ndim == 1) {
+ return count;
+ }
+
+ /* Adjust the src and dst pointers */
+ coord1 = (coords + coords_inc)[0];
+ shape1 = (shape + shape_inc)[0];
+ dst_stride1 = (dst_strides + dst_strides_inc)[0];
+ dst = dst - coord0*dst_stride0 + dst_stride1;
+ src += N*src_stride;
+ mask += N*mask_stride;
+
+ /* Finish off dimension 1 */
+ M = (shape1 - coord1 - 1);
+ N = shape0*M;
+ for (i = 0; i < M; ++i) {
+ if (shape0 >= count) {
+ stransfer(dst, dst_stride0,
+ src, src_stride,
+ mask, mask_stride,
+ count, src_itemsize, data);
+ return 0;
+ }
+ else {
+ stransfer(dst, dst_stride0,
+ src, src_stride,
+ mask, mask_stride,
+ shape0, src_itemsize, data);
+ }
+ count -= shape0;
+ dst += dst_stride1;
+ src += shape0*src_stride;
+ mask += shape0*mask_stride;
+ }
+
+ /* If it's 2-dimensional, there's no more to copy */
+ if (ndim == 2) {
+ return count;
+ }
+
+ /* General-case loop for everything else */
+ else {
+ /* Iteration structure for dimensions 2 and up */
+ struct {
+ npy_intp coord, shape, dst_stride;
+ } it[NPY_MAXDIMS];
+
+ /* Copy the coordinates and shape */
+ coords += 2*coords_inc;
+ shape += 2*shape_inc;
+ dst_strides += 2*dst_strides_inc;
+ for (i = 0; i < ndim-2; ++i) {
+ it[i].coord = coords[0];
+ it[i].shape = shape[0];
+ it[i].dst_stride = dst_strides[0];
+ coords += coords_inc;
+ shape += shape_inc;
+ dst_strides += dst_strides_inc;
+ }
+
+ for (;;) {
+ /* Adjust the dst pointer from the dimension 0 and 1 loop */
+ dst = dst - shape1*dst_stride1;
+
+ /* Increment to the next coordinate */
+ for (i = 0; i < ndim-2; ++i) {
+ dst += it[i].dst_stride;
+ if (++it[i].coord >= it[i].shape) {
+ it[i].coord = 0;
+ dst -= it[i].dst_stride*it[i].shape;
+ }
+ else {
+ break;
+ }
+ }
+ /* If the last dimension rolled over, we're done */
+ if (i == ndim-2) {
+ return count;
+ }
+
+ /* A loop for dimensions 0 and 1 */
+ for (i = 0; i < shape1; ++i) {
+ if (shape0 >= count) {
+ stransfer(dst, dst_stride0,
+ src, src_stride,
+ mask, mask_stride,
+ count, src_itemsize, data);
+ return 0;
+ }
+ else {
+ stransfer(dst, dst_stride0,
+ src, src_stride,
+ mask, mask_stride,
+ shape0, src_itemsize, data);
+ }
+ count -= shape0;
+ dst += dst_stride1;
+ src += shape0*src_stride;
+ mask += shape0*mask_stride;
+ }
+ }
+ }
+}
diff --git a/numpy/core/src/multiarray/nditer_api.c b/numpy/core/src/multiarray/nditer_api.c
index 1328ebc38..4eda968db 100644
--- a/numpy/core/src/multiarray/nditer_api.c
+++ b/numpy/core/src/multiarray/nditer_api.c
@@ -1745,6 +1745,7 @@ npyiter_copy_from_buffers(NpyIter *iter)
npy_uint32 itflags = NIT_ITFLAGS(iter);
int ndim = NIT_NDIM(iter);
int iop, nop = NIT_NOP(iter);
+ int maskop = NIT_MASKOP(iter);
char *op_itflags = NIT_OPITFLAGS(iter);
NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
@@ -1862,14 +1863,42 @@ npyiter_copy_from_buffers(NpyIter *iter)
"operand %d (%d items)\n",
(int)iop, (int)op_transfersize);
- PyArray_TransferStridedToNDim(ndim_transfer,
- ad_ptrs[iop], dst_strides, axisdata_incr,
- buffer, src_stride,
- dst_coords, axisdata_incr,
- dst_shape, axisdata_incr,
- op_transfersize, dtypes[iop]->elsize,
- stransfer,
- transferdata);
+ if (op_itflags[iop] & NPY_OP_ITFLAG_WRITEMASKED) {
+ npy_uint8 *maskptr;
+
+ /*
+ * The mask pointer may be in the buffer or in
+ * the array, detect which one.
+ */
+ delta = (ptrs[maskop] - buffers[maskop]);
+ if (0 <= delta &&
+ delta <= buffersize*dtypes[maskop]->elsize) {
+ maskptr = buffers[maskop];
+ }
+ else {
+ maskptr = ad_ptrs[maskop];
+ }
+
+ PyArray_TransferMaskedStridedToNDim(ndim_transfer,
+ ad_ptrs[iop], dst_strides, axisdata_incr,
+ buffer, src_stride,
+ maskptr, strides[maskop],
+ dst_coords, axisdata_incr,
+ dst_shape, axisdata_incr,
+ op_transfersize, dtypes[iop]->elsize,
+ (PyArray_MaskedStridedTransferFn *)stransfer,
+ transferdata);
+ }
+ else {
+ PyArray_TransferStridedToNDim(ndim_transfer,
+ ad_ptrs[iop], dst_strides, axisdata_incr,
+ buffer, src_stride,
+ dst_coords, axisdata_incr,
+ dst_shape, axisdata_incr,
+ op_transfersize, dtypes[iop]->elsize,
+ stransfer,
+ transferdata);
+ }
}
}
/* If there's no copy back, we may have to decrement refs. In
diff --git a/numpy/core/src/multiarray/nditer_constr.c b/numpy/core/src/multiarray/nditer_constr.c
index e99a0fb0a..b9f79f1d3 100644
--- a/numpy/core/src/multiarray/nditer_constr.c
+++ b/numpy/core/src/multiarray/nditer_constr.c
@@ -189,6 +189,7 @@ NpyIter_AdvancedNew(int nop, PyArrayObject **op_in, npy_uint32 flags,
NIT_ITFLAGS(iter) = itflags;
NIT_NDIM(iter) = ndim;
NIT_NOP(iter) = nop;
+ NIT_MASKOP(iter) = -1;
NIT_ITERINDEX(iter) = 0;
memset(NIT_BASEOFFSETS(iter), 0, (nop+1)*NPY_SIZEOF_INTP);
@@ -902,7 +903,7 @@ npyiter_check_per_op_flags(npy_uint32 op_flags, char *op_itflags)
/* Check the flag for a write masked operands */
if (op_flags & NPY_ITER_WRITEMASKED) {
- if (!(*op_itflags) & NPY_OP_ITFLAG_WRITE) {
+ if (!((*op_itflags) & NPY_OP_ITFLAG_WRITE)) {
PyErr_SetString(PyExc_ValueError,
"The iterator flag WRITEMASKED may only "
"be used with READWRITE or WRITEONLY");
@@ -1125,6 +1126,7 @@ npyiter_prepare_operands(int nop, PyArrayObject **op_in,
{
int iop, i;
npy_int8 maskop = -1;
+ int any_writemasked_ops = 0;
for (iop = 0; iop < nop; ++iop) {
op[iop] = op_in[iop];
@@ -1157,6 +1159,10 @@ npyiter_prepare_operands(int nop, PyArrayObject **op_in,
*out_maskop = iop;
}
+ if (op_flags[iop] & NPY_ITER_WRITEMASKED) {
+ any_writemasked_ops = 1;
+ }
+
/*
* Prepare the operand. This produces an op_dtype[iop] reference
* on success.
@@ -1196,6 +1202,21 @@ npyiter_prepare_operands(int nop, PyArrayObject **op_in,
}
}
+ if (any_writemasked_ops && maskop < 0) {
+ PyErr_SetString(PyExc_ValueError,
+ "An iterator operand was flagged as WRITEMASKED, "
+ "but no ARRAYMASK operand was given to supply "
+ "the mask");
+ return 0;
+ }
+ else if (!any_writemasked_ops && maskop >= 0) {
+ PyErr_SetString(PyExc_ValueError,
+ "An iterator operand was flagged as the ARRAYMASK, "
+ "but no WRITEMASKED operands were given to use "
+ "the mask");
+ return 0;
+ }
+
return 1;
}
@@ -1348,6 +1369,57 @@ npyiter_check_casting(int nop, PyArrayObject **op,
}
/*
+ * Checks that the mask broadcasts to the WRITEMASK REDUCE
+ * operand 'iop', but 'iop' never broadcasts to the mask.
+ * If 'iop' broadcasts to the mask, the result would be more
+ * than one mask value per reduction element, something which
+ * is invalid.
+ *
+ * This check should only be called after all the operands
+ * have been filled in.
+ *
+ * Returns 1 on success, 0 on error.
+ */
+static int
+check_mask_for_writemasked_reduction(NpyIter *iter, int iop)
+{
+ npy_uint32 itflags = NIT_ITFLAGS(iter);
+ int idim, ndim = NIT_NDIM(iter);
+ int nop = NIT_NOP(iter);
+ int maskop = NIT_MASKOP(iter);
+
+ NpyIter_AxisData *axisdata;
+ npy_intp sizeof_axisdata;
+
+ axisdata = NIT_AXISDATA(iter);
+ sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
+
+ for(idim = 0; idim < ndim; ++idim) {
+ npy_intp maskstride, istride;
+
+ istride = NAD_STRIDES(axisdata)[iop];
+ maskstride = NAD_STRIDES(axisdata)[maskop];
+
+ /*
+ * If 'iop' is being broadcast to 'maskop', we have
+ * the invalid situation described above.
+ */
+ if (maskstride != 0 && istride == 0) {
+ PyErr_SetString(PyExc_ValueError,
+ "Iterator reduction operand is WRITEMASKED, "
+ "but also broadcasts to multiple mask values. "
+ "There can be only one mask value per WRITEMASKED "
+ "element.");
+ return 0;
+ }
+
+ NIT_ADVANCE_AXISDATA(axisdata, 1);
+ }
+
+ return 1;
+}
+
+/*
* Fills in the AXISDATA for the 'nop' operands, broadcasting
* the dimensionas as necessary. Also fills
* in the ITERSIZE data member.
@@ -1367,6 +1439,7 @@ npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, char *op_itflags,
npy_uint32 itflags = NIT_ITFLAGS(iter);
int idim, ndim = NIT_NDIM(iter);
int iop, nop = NIT_NOP(iter);
+ int maskop = NIT_MASKOP(iter);
int ondim;
NpyIter_AxisData *axisdata;
@@ -1487,7 +1560,7 @@ npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, char *op_itflags,
}
}
else if (idim >= ondim ||
- PyArray_DIM(op_cur, ondim-idim-1) == 1) {
+ PyArray_DIM(op_cur, ondim-idim-1) == 1) {
strides[iop] = 0;
if (op_flags[iop] & NPY_ITER_NO_BROADCAST) {
goto operand_different_than_broadcast;
@@ -1496,17 +1569,37 @@ npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, char *op_itflags,
if (op_itflags[iop] & NPY_OP_ITFLAG_WRITE) {
if (!(flags & NPY_ITER_REDUCE_OK)) {
PyErr_SetString(PyExc_ValueError,
- "output operand requires a reduction, but "
- "reduction is not enabled");
+ "output operand requires a "
+ "reduction, but reduction is "
+ "not enabled");
return 0;
}
if (!(op_itflags[iop] & NPY_OP_ITFLAG_READ)) {
PyErr_SetString(PyExc_ValueError,
- "output operand requires a reduction, but "
- "is flagged as write-only, not "
- "read-write");
+ "output operand requires a "
+ "reduction, but is flagged as "
+ "write-only, not read-write");
return 0;
}
+ /*
+ * The ARRAYMASK can't be a reduction, because
+ * it would be possible to write back to the
+ * array once when the ARRAYMASK says 'True',
+ * then have the reduction on the ARRAYMASK
+ * later flip to 'False', indicating that the
+ * write back should never have been done,
+ * and violating the strict masking semantics
+ */
+ if (iop == maskop) {
+ PyErr_SetString(PyExc_ValueError,
+ "output operand requires a "
+ "reduction, but is flagged as "
+ "the ARRAYMASK operand which "
+ "is not permitted to be the "
+ "result of a reduction");
+ return 0;
+ }
+
NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE;
op_itflags[iop] |= NPY_OP_ITFLAG_REDUCE;
}
@@ -2642,6 +2735,8 @@ npyiter_allocate_arrays(NpyIter *iter,
int idim, ndim = NIT_NDIM(iter);
int iop, nop = NIT_NOP(iter);
+ int check_writemasked_reductions = 0;
+
NpyIter_BufferData *bufferdata = NULL;
PyArrayObject **op = NIT_OPERANDS(iter);
@@ -2649,8 +2744,18 @@ npyiter_allocate_arrays(NpyIter *iter,
bufferdata = NIT_BUFFERDATA(iter);
}
-
for (iop = 0; iop < nop; ++iop) {
+ /*
+ * Check whether there are any WRITEMASKED REDUCE operands
+ * which should be validated after all the strides are filled
+ * in.
+ */
+ if ((op_itflags[iop] &
+ (NPY_OP_ITFLAG_WRITEMASKED | NPY_OP_ITFLAG_REDUCE)) ==
+ (NPY_OP_ITFLAG_WRITEMASKED | NPY_OP_ITFLAG_REDUCE)) {
+ check_writemasked_reductions = 1;
+ }
+
/* NULL means an output the iterator should allocate */
if (op[iop] == NULL) {
PyArrayObject *out;
@@ -2820,7 +2925,8 @@ npyiter_allocate_arrays(NpyIter *iter,
* the inner stride of this operand works for the whole
* array, we can set NPY_OP_ITFLAG_BUFNEVER.
*/
- if ((itflags & NPY_ITFLAG_BUFFER) && !(op_itflags[iop] & NPY_OP_ITFLAG_CAST)) {
+ if ((itflags & NPY_ITFLAG_BUFFER) &&
+ !(op_itflags[iop] & NPY_OP_ITFLAG_CAST)) {
NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
if (ndim == 1) {
op_itflags[iop] |= NPY_OP_ITFLAG_BUFNEVER;
@@ -2872,6 +2978,31 @@ npyiter_allocate_arrays(NpyIter *iter,
}
}
+ if (check_writemasked_reductions) {
+ for (iop = 0; iop < nop; ++iop) {
+ /*
+ * Check whether there are any WRITEMASKED REDUCE operands
+ * which should be validated now that all the strides are filled
+ * in.
+ */
+ if ((op_itflags[iop] &
+ (NPY_OP_ITFLAG_WRITEMASKED | NPY_OP_ITFLAG_REDUCE)) ==
+ (NPY_OP_ITFLAG_WRITEMASKED | NPY_OP_ITFLAG_REDUCE)) {
+ /*
+ * If the ARRAYMASK has 'bigger' dimensions
+ * than this REDUCE WRITEMASKED operand,
+ * the result would be more than one mask
+ * value per reduction element, something which
+ * is invalid. This function provides validation
+ * for that.
+ */
+ if (!check_mask_for_writemasked_reduction(iter, iop)) {
+ return 0;
+ }
+ }
+ }
+ }
+
return 1;
}
@@ -2958,7 +3089,38 @@ npyiter_allocate_transfer_functions(NpyIter *iter)
}
if (flags & NPY_OP_ITFLAG_WRITE) {
int move_references = 1;
- if (PyArray_GetDTypeTransferFunction(
+
+ /*
+ * If the operand is WRITEMASKED, use a masked transfer fn.
+ */
+ if (flags & NPY_OP_ITFLAG_WRITEMASKED) {
+ int maskop = NIT_MASKOP(iter);
+ PyArray_Descr *mask_dtype = PyArray_DESCR(op[maskop]);
+
+ /*
+ * If the mask's stride is contiguous, use it, otherwise
+ * the mask may or may not be buffered, so the stride
+ * could be inconsistent.
+ */
+ if (PyArray_GetMaskedDTypeTransferFunction(
+ (flags & NPY_OP_ITFLAG_ALIGNED) != 0,
+ op_dtype[iop]->elsize,
+ op_stride,
+ (strides[maskop] == mask_dtype->elsize) ?
+ mask_dtype->elsize :
+ NPY_MAX_INTP,
+ op_dtype[iop],
+ PyArray_DESCR(op[iop]),
+ mask_dtype,
+ move_references,
+ (PyArray_MaskedStridedTransferFn **)&stransfer,
+ &transferdata,
+ &needs_api) != NPY_SUCCEED) {
+ goto fail;
+ }
+ }
+ else {
+ if (PyArray_GetDTypeTransferFunction(
(flags & NPY_OP_ITFLAG_ALIGNED) != 0,
op_dtype[iop]->elsize,
op_stride,
@@ -2968,7 +3130,8 @@ npyiter_allocate_transfer_functions(NpyIter *iter)
&stransfer,
&transferdata,
&needs_api) != NPY_SUCCEED) {
- goto fail;
+ goto fail;
+ }
}
writetransferfn[iop] = stransfer;
writetransferdata[iop] = transferdata;
diff --git a/numpy/core/src/private/lowlevel_strided_loops.h b/numpy/core/src/private/lowlevel_strided_loops.h
index b4cd79f9a..088b15040 100644
--- a/numpy/core/src/private/lowlevel_strided_loops.h
+++ b/numpy/core/src/private/lowlevel_strided_loops.h
@@ -32,11 +32,11 @@ typedef void (PyArray_StridedTransferFn)(char *dst, npy_intp dst_stride,
* which values are transferred.
*
* In particular, the 'i'-th element is transfered if and only if
- * (((mask[i*mask_stride])&0x01) == 0x01).
+ * NpyMask_IsExposed(mask[i*mask_stride]).
*/
typedef void (PyArray_MaskedStridedTransferFn)(char *dst, npy_intp dst_stride,
char *src, npy_intp src_stride,
- npy_uint8 *mask, npy_intp mask_stride,
+ npy_mask *mask, npy_intp mask_stride,
npy_intp N, npy_intp src_itemsize,
NpyAuxData *transferdata);
@@ -191,7 +191,7 @@ PyArray_GetDTypeTransferFunction(int aligned,
* This is identical to PyArray_GetDTypeTransferFunction, but
* returns a transfer function which also takes a mask as a parameter.
* Bit zero of the mask is used to determine which values to copy,
- * data is transfered exactly when ((mask[i])&0x01) == 0x01.
+ * and data is transfered exactly when NpyMask_IsExposed(mask[i*mask_stride]).
*
* If move_references is true, values which are not copied to the
* destination will still have their source reference decremented.
@@ -291,6 +291,18 @@ PyArray_TransferStridedToNDim(npy_intp ndim,
PyArray_StridedTransferFn *stransfer,
NpyAuxData *transferdata);
+NPY_NO_EXPORT npy_intp
+PyArray_TransferMaskedStridedToNDim(npy_intp ndim,
+ char *dst, npy_intp *dst_strides, npy_intp dst_strides_inc,
+ char *src, npy_intp src_stride,
+ npy_mask *mask, npy_intp mask_stride,
+ npy_intp *coords, npy_intp coords_inc,
+ npy_intp *shape, npy_intp shape_inc,
+ npy_intp count, npy_intp src_itemsize,
+ PyArray_MaskedStridedTransferFn *stransfer,
+ NpyAuxData *data);
+
+
/*
* TRIVIAL ITERATION
*
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index a89c0f235..2ffca63d2 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -1480,9 +1480,10 @@ execute_ufunc_masked_loop(PyUFuncObject *self,
NPY_ITER_ALIGNED|
NPY_ITER_ALLOCATE|
NPY_ITER_NO_BROADCAST|
- NPY_ITER_NO_SUBTYPE;
+ NPY_ITER_NO_SUBTYPE|
+ NPY_ITER_WRITEMASKED;
}
- op_flags[nop] = NPY_ITER_READONLY;
+ op_flags[nop] = NPY_ITER_READONLY|NPY_ITER_ARRAYMASK;
NPY_UF_DBG_PRINT("Making iterator\n");
diff --git a/numpy/core/src/umath/ufunc_type_resolution.c b/numpy/core/src/umath/ufunc_type_resolution.c
index 8fb17a441..b08a7f165 100644
--- a/numpy/core/src/umath/ufunc_type_resolution.c
+++ b/numpy/core/src/umath/ufunc_type_resolution.c
@@ -1407,7 +1407,8 @@ unmasked_ufunc_loop_as_masked(
do {
/* Skip masked values */
subloopsize = 0;
- while (subloopsize < loopsize && ((*(npy_uint8 *)mask)&0x01) == 0) {
+ while (subloopsize < loopsize &&
+ !NpyMask_IsExposed(*(npy_mask *)mask)) {
++subloopsize;
mask += mask_stride;
}
@@ -1420,7 +1421,8 @@ unmasked_ufunc_loop_as_masked(
* mess with the 'args' pointer values)
*/
subloopsize = 0;
- while (subloopsize < loopsize && ((*(npy_uint8 *)mask)&0x01) != 0) {
+ while (subloopsize < loopsize &&
+ NpyMask_IsExposed(*(npy_mask *)mask)) {
++subloopsize;
mask += mask_stride;
}
diff --git a/numpy/core/tests/test_api.py b/numpy/core/tests/test_api.py
index 7ebcb932b..d2d8241f2 100644
--- a/numpy/core/tests/test_api.py
+++ b/numpy/core/tests/test_api.py
@@ -110,7 +110,7 @@ def test_copyto():
assert_raises(TypeError, np.copyto, a, 3.5, where=[True,False,True])
# Lists of integer 0's and 1's is ok too
- np.copyto(a, 4, where=[[0,1,1], [1,0,0]])
+ np.copyto(a, 4.0, casting='unsafe', where=[[0,1,1], [1,0,0]])
assert_equal(a, [[3,4,4], [4,1,3]])
# Overlapping copy with mask should work
diff --git a/numpy/core/tests/test_iterator.py b/numpy/core/tests/test_nditer.py
index f39d9584f..cf0a44c63 100644
--- a/numpy/core/tests/test_iterator.py
+++ b/numpy/core/tests/test_nditer.py
@@ -457,7 +457,7 @@ def test_iter_no_inner_dim_coalescing():
i = nditer(a, ['external_loop'], [['readonly']])
assert_equal(i.ndim, 1)
assert_equal(i[0].shape, (12,))
-
+
# Even with lots of 1-sized dimensions, should still coalesce
a = arange(24).reshape(1,1,2,1,1,3,1,1,4,1,1)
i = nditer(a, ['external_loop'], [['readonly']])
@@ -658,7 +658,7 @@ def test_iter_broadcasting_errors():
# The message should contain the itershape parameter
assert_(msg.find('(4,3)') >= 0,
'Message "%s" doesn\'t contain itershape parameter (4,3)' % msg)
-
+
try:
i = nditer([np.zeros((2,1,1)), np.zeros((2,))],
[],
@@ -1579,7 +1579,7 @@ def test_iter_buffered_cast_simple():
buffersize=3)
for v in i:
v[...] *= 2
-
+
assert_equal(a, 2*np.arange(10, dtype='f4'))
def test_iter_buffered_cast_byteswapped():
@@ -1593,7 +1593,7 @@ def test_iter_buffered_cast_byteswapped():
buffersize=3)
for v in i:
v[...] *= 2
-
+
assert_equal(a, 2*np.arange(10, dtype='f4'))
try:
@@ -1607,7 +1607,7 @@ def test_iter_buffered_cast_byteswapped():
buffersize=3)
for v in i:
v[...] *= 2
-
+
assert_equal(a, 2*np.arange(10, dtype='f8'))
finally:
warnings.simplefilter("default", np.ComplexWarning)
@@ -1966,7 +1966,7 @@ def test_iter_buffering_badwriteback():
i = nditer([a,b],['buffered','external_loop'],
[['readonly'],['writeonly']],
order='C')
-
+
# If a has just one element, it's fine too (constant 0 stride, a reduction)
a = np.arange(1).reshape(1,1,1)
i = nditer([a,b],['buffered','external_loop','reduce_ok'],
@@ -2192,7 +2192,7 @@ def test_iter_nested_iters_dtype_buffered():
assert_equal(a, [[1,2,3],[4,5,6]])
def test_iter_reduction_error():
-
+
a = np.arange(6)
assert_raises(ValueError, nditer, [a,None], [],
[['readonly'], ['readwrite','allocate']],
@@ -2295,5 +2295,109 @@ def test_iter_buffering_reduction():
it.reset()
assert_equal(it[0], [1,2,1,2])
+def test_iter_writemasked_badinput():
+ a = np.zeros((2,3))
+ b = np.zeros((3,))
+ m = np.array([[True,True,False],[False,True,False]])
+ m2 = np.array([True,True,False])
+ m3 = np.array([0,1,1], dtype='u1')
+ mbad1 = np.array([0,1,1], dtype='i1')
+ mbad2 = np.array([0,1,1], dtype='f4')
+
+ # Need an 'arraymask' if any operand is 'writemasked'
+ assert_raises(ValueError, nditer, [a,m], [],
+ [['readwrite','writemasked'],['readonly']])
+
+ # A 'writemasked' operand must not be readonly
+ assert_raises(ValueError, nditer, [a,m], [],
+ [['readonly','writemasked'],['readonly','arraymask']])
+
+ # 'writemasked' and 'arraymask' may not be used together
+ assert_raises(ValueError, nditer, [a,m], [],
+ [['readonly'],['readwrite','arraymask','writemasked']])
+
+ # 'arraymask' may only be specified once
+ assert_raises(ValueError, nditer, [a,m, m2], [],
+ [['readwrite','writemasked'],
+ ['readonly','arraymask'],
+ ['readonly','arraymask']])
+
+ # An 'arraymask' with nothing 'writemasked' also doesn't make sense
+ assert_raises(ValueError, nditer, [a,m], [],
+ [['readwrite'],['readonly','arraymask']])
+
+ # A writemasked reduction requires a similarly smaller mask
+ assert_raises(ValueError, nditer, [a,b,m], ['reduce_ok'],
+ [['readonly'],
+ ['readwrite','writemasked'],
+ ['readonly','arraymask']])
+ # But this should work with a smaller/equal mask to the reduction operand
+ np.nditer([a,b,m2], ['reduce_ok'],
+ [['readonly'],
+ ['readwrite','writemasked'],
+ ['readonly','arraymask']])
+ # The arraymask itself cannot be a reduction
+ assert_raises(ValueError, nditer, [a,b,m2], ['reduce_ok'],
+ [['readonly'],
+ ['readwrite','writemasked'],
+ ['readwrite','arraymask']])
+
+ # A uint8 mask is ok too
+ np.nditer([a,m3], ['buffered'],
+ [['readwrite','writemasked'],
+ ['readonly','arraymask']],
+ op_dtypes=['f4',None],
+ casting='same_kind')
+ # An int8 mask isn't ok
+ assert_raises(TypeError, np.nditer, [a,mbad1], ['buffered'],
+ [['readwrite','writemasked'],
+ ['readonly','arraymask']],
+ op_dtypes=['f4',None],
+ casting='same_kind')
+ # A float32 mask isn't ok
+ assert_raises(TypeError, np.nditer, [a,mbad2], ['buffered'],
+ [['readwrite','writemasked'],
+ ['readonly','arraymask']],
+ op_dtypes=['f4',None],
+ casting='same_kind')
+
+def test_iter_writemasked():
+ a = np.zeros((3,), dtype='f8')
+ msk = np.array([True,True,False])
+
+ # When buffering is unused, 'writemasked' effectively does nothing.
+ # It's up to the user of the iterator to obey the requested semantics.
+ it = np.nditer([a,msk], [],
+ [['readwrite','writemasked'],
+ ['readonly','arraymask']])
+ for x, m in it:
+ x[...] = 1
+ # Because we violated the semantics, all the values became 1
+ assert_equal(a, [1,1,1])
+
+ # Even if buffering is enabled, we still may be accessing the array
+ # directly.
+ it = np.nditer([a,msk], ['buffered'],
+ [['readwrite','writemasked'],
+ ['readonly','arraymask']])
+ for x, m in it:
+ x[...] = 2.5
+ # Because we violated the semantics, all the values became 2.5
+ assert_equal(a, [2.5,2.5,2.5])
+
+ # If buffering will definitely happening, for instance because of
+ # a cast, only the items selected by the mask will be copied back from
+ # the buffer.
+ it = np.nditer([a,msk], ['buffered'],
+ [['readwrite','writemasked'],
+ ['readonly','arraymask']],
+ op_dtypes=['i8',None],
+ casting='unsafe')
+ for x, m in it:
+ x[...] = 3
+ # Even though we violated the semantics, only the selected values
+ # were copied back
+ assert_equal(a, [3,3,2.5])
+
if __name__ == "__main__":
run_module_suite()
diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py
index a7af19486..974a6d6f8 100644
--- a/numpy/core/tests/test_ufunc.py
+++ b/numpy/core/tests/test_ufunc.py
@@ -499,7 +499,6 @@ class TestUfunc(TestCase):
np.subtract(a, 2, out=a, where=[True,False])
assert_equal(a, [[0, 27], [14, 5]])
- @dec.knownfailureif(True)
def test_where_param_buffer_output(self):
# This test is temporarily skipped because it requires
# adding masking features to the nditer to work properly