summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Wiebe <mwwiebe@gmail.com>2011-07-09 23:48:55 -0500
committerCharles Harris <charlesr.harris@gmail.com>2011-07-11 09:15:31 -0600
commitad6d8f2fdae535d394920aac95276d701f27867b (patch)
tree5e9ad4297af9f901d448aae38ee777e4fa01bb15
parentb5cdaee35bab2a06604f204ba18e00bf465879a7 (diff)
downloadnumpy-ad6d8f2fdae535d394920aac95276d701f27867b.tar.gz
ENH: nditer: Finish implementation of masked iteration
This still needs tests, I've only validated that it doesn't break anything already in the tests.
-rw-r--r--numpy/core/src/multiarray/lowlevel_strided_loops.c.src136
-rw-r--r--numpy/core/src/multiarray/nditer_api.c30
-rw-r--r--numpy/core/src/multiarray/nditer_constr.c183
-rw-r--r--numpy/core/src/private/lowlevel_strided_loops.h12
4 files changed, 343 insertions, 18 deletions
diff --git a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
index ab1918e0e..f33e39861 100644
--- a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
+++ b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
@@ -1156,3 +1156,139 @@ PyArray_TransferStridedToNDim(npy_intp ndim,
}
}
}
+
+NPY_NO_EXPORT npy_intp
+PyArray_TransferMaskedStridedToNDim(npy_intp ndim,
+ char *dst, npy_intp *dst_strides, npy_intp dst_strides_inc,
+ char *src, npy_intp src_stride,
+ npy_uint8 *mask, npy_intp mask_stride,
+ npy_intp *coords, npy_intp coords_inc,
+ npy_intp *shape, npy_intp shape_inc,
+ npy_intp count, npy_intp src_itemsize,
+ PyArray_MaskedStridedTransferFn *stransfer,
+ NpyAuxData *data)
+{
+ npy_intp i, M, N, coord0, shape0, dst_stride0, coord1, shape1, dst_stride1;
+
+ /* Finish off dimension 0 */
+ coord0 = coords[0];
+ shape0 = shape[0];
+ dst_stride0 = dst_strides[0];
+ N = shape0 - coord0;
+ if (N >= count) {
+ stransfer(dst, dst_stride0,
+ src, src_stride,
+ mask, mask_stride,
+ count, src_itemsize, data);
+ return 0;
+ }
+ stransfer(dst, dst_stride0,
+ src, src_stride,
+ mask, mask_stride,
+ N, src_itemsize, data);
+ count -= N;
+
+ /* If it's 1-dimensional, there's no more to copy */
+ if (ndim == 1) {
+ return count;
+ }
+
+ /* Adjust the src and dst pointers */
+ coord1 = (coords + coords_inc)[0];
+ shape1 = (shape + shape_inc)[0];
+ dst_stride1 = (dst_strides + dst_strides_inc)[0];
+ dst = dst - coord0*dst_stride0 + dst_stride1;
+ src += N*src_stride;
+ mask += N*mask_stride;
+
+ /* Finish off dimension 1 */
+ M = (shape1 - coord1 - 1);
+ N = shape0*M;
+ for (i = 0; i < M; ++i) {
+ if (shape0 >= count) {
+ stransfer(dst, dst_stride0,
+ src, src_stride,
+ mask, mask_stride,
+ count, src_itemsize, data);
+ return 0;
+ }
+ else {
+ stransfer(dst, dst_stride0,
+ src, src_stride,
+ mask, mask_stride,
+ shape0, src_itemsize, data);
+ }
+ count -= shape0;
+ dst += dst_stride1;
+ src += shape0*src_stride;
+ mask += shape0*mask_stride;
+ }
+
+ /* If it's 2-dimensional, there's no more to copy */
+ if (ndim == 2) {
+ return count;
+ }
+
+ /* General-case loop for everything else */
+ else {
+ /* Iteration structure for dimensions 2 and up */
+ struct {
+ npy_intp coord, shape, dst_stride;
+ } it[NPY_MAXDIMS];
+
+ /* Copy the coordinates and shape */
+ coords += 2*coords_inc;
+ shape += 2*shape_inc;
+ dst_strides += 2*dst_strides_inc;
+ for (i = 0; i < ndim-2; ++i) {
+ it[i].coord = coords[0];
+ it[i].shape = shape[0];
+ it[i].dst_stride = dst_strides[0];
+ coords += coords_inc;
+ shape += shape_inc;
+ dst_strides += dst_strides_inc;
+ }
+
+ for (;;) {
+ /* Adjust the dst pointer from the dimension 0 and 1 loop */
+ dst = dst - shape1*dst_stride1;
+
+ /* Increment to the next coordinate */
+ for (i = 0; i < ndim-2; ++i) {
+ dst += it[i].dst_stride;
+ if (++it[i].coord >= it[i].shape) {
+ it[i].coord = 0;
+ dst -= it[i].dst_stride*it[i].shape;
+ }
+ else {
+ break;
+ }
+ }
+ /* If the last dimension rolled over, we're done */
+ if (i == ndim-2) {
+ return count;
+ }
+
+ /* A loop for dimensions 0 and 1 */
+ for (i = 0; i < shape1; ++i) {
+ if (shape0 >= count) {
+ stransfer(dst, dst_stride0,
+ src, src_stride,
+ mask, mask_stride,
+ count, src_itemsize, data);
+ return 0;
+ }
+ else {
+ stransfer(dst, dst_stride0,
+ src, src_stride,
+ mask, mask_stride,
+ shape0, src_itemsize, data);
+ }
+ count -= shape0;
+ dst += dst_stride1;
+ src += shape0*src_stride;
+ mask += shape0*mask_stride;
+ }
+ }
+ }
+}
diff --git a/numpy/core/src/multiarray/nditer_api.c b/numpy/core/src/multiarray/nditer_api.c
index 1328ebc38..3d95ee859 100644
--- a/numpy/core/src/multiarray/nditer_api.c
+++ b/numpy/core/src/multiarray/nditer_api.c
@@ -1745,6 +1745,7 @@ npyiter_copy_from_buffers(NpyIter *iter)
npy_uint32 itflags = NIT_ITFLAGS(iter);
int ndim = NIT_NDIM(iter);
int iop, nop = NIT_NOP(iter);
+ int maskop = NIT_MASKOP(iter);
char *op_itflags = NIT_OPITFLAGS(iter);
NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
@@ -1862,14 +1863,27 @@ npyiter_copy_from_buffers(NpyIter *iter)
"operand %d (%d items)\n",
(int)iop, (int)op_transfersize);
- PyArray_TransferStridedToNDim(ndim_transfer,
- ad_ptrs[iop], dst_strides, axisdata_incr,
- buffer, src_stride,
- dst_coords, axisdata_incr,
- dst_shape, axisdata_incr,
- op_transfersize, dtypes[iop]->elsize,
- stransfer,
- transferdata);
+ if (op_itflags[iop]&NPY_OP_ITFLAG_WRITEMASKED) {
+ PyArray_TransferMaskedStridedToNDim(ndim_transfer,
+ ad_ptrs[iop], dst_strides, axisdata_incr,
+ buffer, src_stride,
+ (npy_uint8 *)buffers[maskop], strides[maskop],
+ dst_coords, axisdata_incr,
+ dst_shape, axisdata_incr,
+ op_transfersize, dtypes[iop]->elsize,
+ (PyArray_MaskedStridedTransferFn *)stransfer,
+ transferdata);
+ }
+ else {
+ PyArray_TransferStridedToNDim(ndim_transfer,
+ ad_ptrs[iop], dst_strides, axisdata_incr,
+ buffer, src_stride,
+ dst_coords, axisdata_incr,
+ dst_shape, axisdata_incr,
+ op_transfersize, dtypes[iop]->elsize,
+ stransfer,
+ transferdata);
+ }
}
}
/* If there's no copy back, we may have to decrement refs. In
diff --git a/numpy/core/src/multiarray/nditer_constr.c b/numpy/core/src/multiarray/nditer_constr.c
index e99a0fb0a..1c8e3e9b6 100644
--- a/numpy/core/src/multiarray/nditer_constr.c
+++ b/numpy/core/src/multiarray/nditer_constr.c
@@ -189,6 +189,7 @@ NpyIter_AdvancedNew(int nop, PyArrayObject **op_in, npy_uint32 flags,
NIT_ITFLAGS(iter) = itflags;
NIT_NDIM(iter) = ndim;
NIT_NOP(iter) = nop;
+ NIT_MASKOP(iter) = -1;
NIT_ITERINDEX(iter) = 0;
memset(NIT_BASEOFFSETS(iter), 0, (nop+1)*NPY_SIZEOF_INTP);
@@ -1125,6 +1126,7 @@ npyiter_prepare_operands(int nop, PyArrayObject **op_in,
{
int iop, i;
npy_int8 maskop = -1;
+ int any_writemasked_ops = 0;
for (iop = 0; iop < nop; ++iop) {
op[iop] = op_in[iop];
@@ -1157,6 +1159,10 @@ npyiter_prepare_operands(int nop, PyArrayObject **op_in,
*out_maskop = iop;
}
+ if (op_flags[iop] & NPY_ITER_WRITEMASKED) {
+ any_writemasked_ops = 1;
+ }
+
/*
* Prepare the operand. This produces an op_dtype[iop] reference
* on success.
@@ -1196,6 +1202,21 @@ npyiter_prepare_operands(int nop, PyArrayObject **op_in,
}
}
+ if (any_writemasked_ops && maskop < 0) {
+ PyErr_SetString(PyExc_ValueError,
+ "An iterator operand was flagged as WRITEMASKED, "
+ "but no ARRAYMASK operand was given to supply "
+ "the mask");
+ return 0;
+ }
+ else if (!any_writemasked_ops && maskop >= 0) {
+ PyErr_SetString(PyExc_ValueError,
+ "An iterator operand was flagged as the ARRAYMASK, "
+ "but no WRITEMASKED operands were given to use "
+ "the mask");
+ return 0;
+ }
+
return 1;
}
@@ -1348,6 +1369,57 @@ npyiter_check_casting(int nop, PyArrayObject **op,
}
/*
+ * Checks that the mask broadcasts to the WRITEMASK REDUCE
+ * operand 'iop', but 'iop' never broadcasts to the mask.
+ * If 'iop' broadcasts to the mask, the result would be more
+ * than one mask value per reduction element, something which
+ * is invalid.
+ *
+ * This check should only be called after all the operands
+ * have been filled in.
+ *
+ * Returns 1 on success, 0 on error.
+ */
+static int
+check_mask_for_writemasked_reduction(NpyIter *iter, int iop)
+{
+ npy_uint32 itflags = NIT_ITFLAGS(iter);
+ int idim, ndim = NIT_NDIM(iter);
+ int nop = NIT_NOP(iter);
+ int maskop = NIT_MASKOP(iter);
+
+ NpyIter_AxisData *axisdata;
+ npy_intp sizeof_axisdata;
+
+ axisdata = NIT_AXISDATA(iter);
+ sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
+
+ for(idim = 0; idim < ndim; ++idim) {
+ npy_intp maskstride, istride;
+
+ istride = NAD_STRIDES(axisdata)[iop];
+ maskstride = NAD_STRIDES(axisdata)[maskop];
+
+ /*
+ * If 'iop' is being broadcast to 'maskop', we have
+ * the invalid situation described above.
+ */
+ if (maskstride != 0 && istride == 0) {
+ PyErr_SetString(PyExc_ValueError,
+ "Iterator reduction operand is WRITEMASKED, "
+ "but also broadcasts to multiple mask values. "
+ "There can be only one mask value per WRITEMASKED "
+ "element.");
+ return 0;
+ }
+
+ NIT_ADVANCE_AXISDATA(axisdata, 1);
+ }
+
+ return 1;
+}
+
+/*
* Fills in the AXISDATA for the 'nop' operands, broadcasting
* the dimensionas as necessary. Also fills
* in the ITERSIZE data member.
@@ -1367,6 +1439,7 @@ npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, char *op_itflags,
npy_uint32 itflags = NIT_ITFLAGS(iter);
int idim, ndim = NIT_NDIM(iter);
int iop, nop = NIT_NOP(iter);
+ int maskop = NIT_MASKOP(iter);
int ondim;
NpyIter_AxisData *axisdata;
@@ -1487,7 +1560,7 @@ npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, char *op_itflags,
}
}
else if (idim >= ondim ||
- PyArray_DIM(op_cur, ondim-idim-1) == 1) {
+ PyArray_DIM(op_cur, ondim-idim-1) == 1) {
strides[iop] = 0;
if (op_flags[iop] & NPY_ITER_NO_BROADCAST) {
goto operand_different_than_broadcast;
@@ -1496,17 +1569,37 @@ npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, char *op_itflags,
if (op_itflags[iop] & NPY_OP_ITFLAG_WRITE) {
if (!(flags & NPY_ITER_REDUCE_OK)) {
PyErr_SetString(PyExc_ValueError,
- "output operand requires a reduction, but "
- "reduction is not enabled");
+ "output operand requires a "
+ "reduction, but reduction is "
+ "not enabled");
return 0;
}
if (!(op_itflags[iop] & NPY_OP_ITFLAG_READ)) {
PyErr_SetString(PyExc_ValueError,
- "output operand requires a reduction, but "
- "is flagged as write-only, not "
- "read-write");
+ "output operand requires a "
+ "reduction, but is flagged as "
+ "write-only, not read-write");
return 0;
}
+ /*
+ * The ARRAYMASK can't be a reduction, because
+ * it would be possible to write back to the
+ * array once when the ARRAYMASK says 'True',
+ * then have the reduction on the ARRAYMASK
+ * later flip to 'False', indicating that the
+ * write back should never have been done,
+ * and violating the strict masking semantics
+ */
+ if (iop == maskop) {
+ PyErr_SetString(PyExc_ValueError,
+ "output operand requires a "
+ "reduction, but is flagged as "
+ "the ARRAYMASK operand which "
+ "is not permitted to be the "
+ "result of a reduction");
+ return 0;
+ }
+
NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE;
op_itflags[iop] |= NPY_OP_ITFLAG_REDUCE;
}
@@ -2642,6 +2735,8 @@ npyiter_allocate_arrays(NpyIter *iter,
int idim, ndim = NIT_NDIM(iter);
int iop, nop = NIT_NOP(iter);
+ int check_writemasked_reductions = 0;
+
NpyIter_BufferData *bufferdata = NULL;
PyArrayObject **op = NIT_OPERANDS(iter);
@@ -2649,8 +2744,18 @@ npyiter_allocate_arrays(NpyIter *iter,
bufferdata = NIT_BUFFERDATA(iter);
}
-
for (iop = 0; iop < nop; ++iop) {
+ /*
+ * Check whether there are any WRITEMASKED REDUCE operands
+ * which should be validated after all the strides are filled
+ * in.
+ */
+ if ((op_itflags[iop] &
+ (NPY_OP_ITFLAG_WRITEMASKED | NPY_OP_ITFLAG_REDUCE)) ==
+ (NPY_OP_ITFLAG_WRITEMASKED | NPY_OP_ITFLAG_REDUCE)) {
+ check_writemasked_reductions = 1;
+ }
+
/* NULL means an output the iterator should allocate */
if (op[iop] == NULL) {
PyArrayObject *out;
@@ -2820,7 +2925,8 @@ npyiter_allocate_arrays(NpyIter *iter,
* the inner stride of this operand works for the whole
* array, we can set NPY_OP_ITFLAG_BUFNEVER.
*/
- if ((itflags & NPY_ITFLAG_BUFFER) && !(op_itflags[iop] & NPY_OP_ITFLAG_CAST)) {
+ if ((itflags & NPY_ITFLAG_BUFFER) &&
+ !(op_itflags[iop] & NPY_OP_ITFLAG_CAST)) {
NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
if (ndim == 1) {
op_itflags[iop] |= NPY_OP_ITFLAG_BUFNEVER;
@@ -2872,6 +2978,31 @@ npyiter_allocate_arrays(NpyIter *iter,
}
}
+ if (check_writemasked_reductions) {
+ for (iop = 0; iop < nop; ++iop) {
+ /*
+ * Check whether there are any WRITEMASKED REDUCE operands
+ * which should be validated now that all the strides are filled
+ * in.
+ */
+ if ((op_itflags[iop] &
+ (NPY_OP_ITFLAG_WRITEMASKED | NPY_OP_ITFLAG_REDUCE)) ==
+ (NPY_OP_ITFLAG_WRITEMASKED | NPY_OP_ITFLAG_REDUCE)) {
+ /*
+ * If the ARRAYMASK has 'bigger' dimensions
+ * than this REDUCE WRITEMASKED operand,
+ * the result would be more than one mask
+ * value per reduction element, something which
+ * is invalid. This function provides validation
+ * for that.
+ */
+ if (!check_mask_for_writemasked_reduction(iter, iop)) {
+ return 0;
+ }
+ }
+ }
+ }
+
return 1;
}
@@ -2958,7 +3089,38 @@ npyiter_allocate_transfer_functions(NpyIter *iter)
}
if (flags & NPY_OP_ITFLAG_WRITE) {
int move_references = 1;
- if (PyArray_GetDTypeTransferFunction(
+
+ /*
+ * If the operand is WRITEMASKED, use a masked transfer fn.
+ */
+ if (flags & NPY_OP_ITFLAG_WRITEMASKED) {
+ int maskop = NIT_MASKOP(iter);
+ PyArray_Descr *mask_dtype = PyArray_DESCR(op[maskop]);
+
+ /*
+ * If the mask's stride is contiguous, use it, otherwise
+ * the mask may or may not be buffered, so the stride
+ * could be inconsistent.
+ */
+ if (PyArray_GetMaskedDTypeTransferFunction(
+ (flags & NPY_OP_ITFLAG_ALIGNED) != 0,
+ op_dtype[iop]->elsize,
+ op_stride,
+ (strides[maskop] == mask_dtype->elsize) ?
+ mask_dtype->elsize :
+ NPY_MAX_INTP,
+ op_dtype[iop],
+ PyArray_DESCR(op[iop]),
+ mask_dtype,
+ move_references,
+ (PyArray_MaskedStridedTransferFn **)&stransfer,
+ &transferdata,
+ &needs_api) != NPY_SUCCEED) {
+ goto fail;
+ }
+ }
+ else {
+ if (PyArray_GetDTypeTransferFunction(
(flags & NPY_OP_ITFLAG_ALIGNED) != 0,
op_dtype[iop]->elsize,
op_stride,
@@ -2968,7 +3130,8 @@ npyiter_allocate_transfer_functions(NpyIter *iter)
&stransfer,
&transferdata,
&needs_api) != NPY_SUCCEED) {
- goto fail;
+ goto fail;
+ }
}
writetransferfn[iop] = stransfer;
writetransferdata[iop] = transferdata;
diff --git a/numpy/core/src/private/lowlevel_strided_loops.h b/numpy/core/src/private/lowlevel_strided_loops.h
index b4cd79f9a..b1c80ea2f 100644
--- a/numpy/core/src/private/lowlevel_strided_loops.h
+++ b/numpy/core/src/private/lowlevel_strided_loops.h
@@ -291,6 +291,18 @@ PyArray_TransferStridedToNDim(npy_intp ndim,
PyArray_StridedTransferFn *stransfer,
NpyAuxData *transferdata);
+NPY_NO_EXPORT npy_intp
+PyArray_TransferMaskedStridedToNDim(npy_intp ndim,
+ char *dst, npy_intp *dst_strides, npy_intp dst_strides_inc,
+ char *src, npy_intp src_stride,
+ npy_uint8 *mask, npy_intp mask_stride,
+ npy_intp *coords, npy_intp coords_inc,
+ npy_intp *shape, npy_intp shape_inc,
+ npy_intp count, npy_intp src_itemsize,
+ PyArray_MaskedStridedTransferFn *stransfer,
+ NpyAuxData *data);
+
+
/*
* TRIVIAL ITERATION
*