diff options
author | Mark Wiebe <mwwiebe@gmail.com> | 2011-01-04 11:49:24 -0800 |
---|---|---|
committer | Mark Wiebe <mwwiebe@gmail.com> | 2011-01-09 01:55:01 -0800 |
commit | b3efccc0e7de4c73f3bd009714f3586360baeecb (patch) | |
tree | 78546a97b2ae8bc44445b2ecd0a63713fc28b7fc | |
parent | fe791db7b96ca4f19d490f43cf3dc5157d7e325f (diff) | |
download | numpy-b3efccc0e7de4c73f3bd009714f3586360baeecb.tar.gz |
ENH: iter: Change nested iteration pattern and add nested_iters Python function
Nested iteration now uses a function NpyIter_ResetBasePointers, and the
NPY_ITER_OFFSETS flag which was the previous way to do this has been
removed.
-rw-r--r-- | numpy/core/numeric.py | 3 | ||||
-rw-r--r-- | numpy/core/src/multiarray/multiarraymodule.c | 4 | ||||
-rw-r--r-- | numpy/core/src/multiarray/new_iterator.c.src | 281 | ||||
-rw-r--r-- | numpy/core/src/multiarray/new_iterator.h | 12 | ||||
-rw-r--r-- | numpy/core/src/multiarray/new_iterator_pywrap.c | 394 | ||||
-rw-r--r-- | numpy/core/tests/test_new_iterator.py | 175 |
6 files changed, 715 insertions, 154 deletions
diff --git a/numpy/core/numeric.py b/numpy/core/numeric.py index 915dc10fd..62fbd4186 100644 --- a/numpy/core/numeric.py +++ b/numpy/core/numeric.py @@ -1,4 +1,4 @@ -__all__ = ['newaxis', 'ndarray', 'flatiter', 'newiter', 'ufunc', +__all__ = ['newaxis', 'ndarray', 'flatiter', 'newiter', 'nested_iters', 'ufunc', 'arange', 'array', 'zeros', 'empty', 'broadcast', 'dtype', 'fromstring', 'fromfile', 'frombuffer', 'int_asbuffer', 'where', 'argwhere', @@ -55,6 +55,7 @@ BUFSIZE = multiarray.BUFSIZE ndarray = multiarray.ndarray flatiter = multiarray.flatiter newiter = multiarray.newiter +nested_iters = multiarray.nested_iters broadcast = multiarray.broadcast dtype = multiarray.dtype ufunc = type(sin) diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c index c6caf7188..04178f571 100644 --- a/numpy/core/src/multiarray/multiarraymodule.c +++ b/numpy/core/src/multiarray/multiarraymodule.c @@ -42,6 +42,7 @@ NPY_NO_EXPORT int NPY_NUMUSERTYPES = 0; #include "number.h" #include "scalartypes.h" #include "numpymemoryview.h" +#include "new_iterator_pywrap.h" /*NUMPY_API * Get Priority from object @@ -2731,6 +2732,9 @@ static struct PyMethodDef array_module_methods[] = { {"array", (PyCFunction)_array_fromobject, METH_VARARGS|METH_KEYWORDS, NULL}, + {"nested_iters", + (PyCFunction)NpyIter_NestedIters, + METH_VARARGS|METH_KEYWORDS, NULL}, {"arange", (PyCFunction)array_arange, METH_VARARGS|METH_KEYWORDS, NULL}, diff --git a/numpy/core/src/multiarray/new_iterator.c.src b/numpy/core/src/multiarray/new_iterator.c.src index 9a59379b0..9dc3bcf40 100644 --- a/numpy/core/src/multiarray/new_iterator.c.src +++ b/numpy/core/src/multiarray/new_iterator.c.src @@ -25,16 +25,14 @@ #define NPY_ITFLAG_HASINDEX 0x004 /* The iterator is tracking coordinates */ #define NPY_ITFLAG_HASCOORDS 0x008 -/* The iterator returns offsets instead of pointers */ -#define NPY_ITFLAG_HASOFFSETS 0x010 /* The iteration order was forced on construction */ -#define NPY_ITFLAG_FORCEDORDER 0x020 +#define NPY_ITFLAG_FORCEDORDER 0x010 /* The inner loop is handled outside the iterator */ -#define NPY_ITFLAG_NOINNER 0x040 +#define NPY_ITFLAG_NOINNER 0x020 /* The iterator is buffered */ -#define NPY_ITFLAG_BUFFER 0x080 +#define NPY_ITFLAG_BUFFER 0x040 /* The iterator is buffered */ -#define NPY_ITFLAG_GROWINNER 0x100 +#define NPY_ITFLAG_GROWINNER 0x080 /* Internal iterator per-operand iterator flags */ @@ -78,6 +76,8 @@ ((NPY_SIZEOF_INTP)*(niter)) #define NIT_RESETDATAPTR_SIZEOF(itflags, ndim, niter) \ ((NPY_SIZEOF_INTP)*(niter+1)) +#define NIT_BASEOFFSETS_SIZEOF(itflags, ndim, niter) \ + ((NPY_SIZEOF_INTP)*(niter+1)) #define NIT_OBJECTS_SIZEOF(itflags, ndim, niter) \ ((NPY_SIZEOF_INTP)*(niter)) #define NIT_OPITFLAGS_SIZEOF(itflags, ndim, niter) \ @@ -97,9 +97,12 @@ #define NIT_RESETDATAPTR_OFFSET(itflags, ndim, niter) \ (NIT_DTYPES_OFFSET(itflags, ndim, niter) + \ NIT_DTYPES_SIZEOF(itflags, ndim, niter)) -#define NIT_OBJECTS_OFFSET(itflags, ndim, niter) \ +#define NIT_BASEOFFSETS_OFFSET(itflags, ndim, niter) \ (NIT_RESETDATAPTR_OFFSET(itflags, ndim, niter) + \ NIT_RESETDATAPTR_SIZEOF(itflags, ndim, niter)) +#define NIT_OBJECTS_OFFSET(itflags, ndim, niter) \ + (NIT_BASEOFFSETS_OFFSET(itflags, ndim, niter) + \ + NIT_BASEOFFSETS_SIZEOF(itflags, ndim, niter)) #define NIT_OPITFLAGS_OFFSET(itflags, ndim, niter) \ (NIT_OBJECTS_OFFSET(itflags, ndim, niter) + \ NIT_OBJECTS_SIZEOF(itflags, ndim, niter)) @@ -125,6 +128,8 @@ (char*)(iter) + NIT_DTYPES_OFFSET(itflags, ndim, niter))) #define NIT_RESETDATAPTR(iter) ((char **)( \ (char*)(iter) + NIT_RESETDATAPTR_OFFSET(itflags, ndim, niter))) +#define NIT_BASEOFFSETS(iter) ((npy_intp *)( \ + (char*)(iter) + NIT_BASEOFFSETS_OFFSET(itflags, ndim, niter))) #define NIT_OBJECTS(iter) ((PyArrayObject **)( \ (char*)(iter) + NIT_OBJECTS_OFFSET(itflags, ndim, niter))) #define NIT_OPITFLAGS(iter) ( \ @@ -304,16 +309,21 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags, for (iiter = 0; iiter < niter; ++iiter) { npy_intp *axes = op_axes[iiter]; if (axes != NULL) { - memset(axes_dupcheck, 0, oa_ndim); + memset(axes_dupcheck, 0, NPY_MAXDIMS); for (idim = 0; idim < oa_ndim; ++idim) { npy_intp i = axes[idim]; if (i >= 0) { - if (i >= NPY_MAXDIMS || - axes_dupcheck[i] == 1) { + if (i >= NPY_MAXDIMS) { + PyErr_Format(PyExc_ValueError, + "The 'op_axes' provided to the iterator " + "constructor contained invalid " + "values %d", (int)i); + return NULL; + } else if(axes_dupcheck[i] == 1) { PyErr_Format(PyExc_ValueError, "The 'op_axes' provided to the iterator " "constructor contained duplicate " - "or invalid values"); + "value %d", (int)i); return NULL; } else { @@ -330,19 +340,6 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags, return NULL; } - /* If offsets were requested, make sure copying is disabled */ - if (itflags&NPY_ITFLAG_HASOFFSETS) { - for (iiter = 0; iiter < niter; ++iiter) { - if (op_flags[iiter]&(NPY_ITER_COPY| - NPY_ITER_UPDATEIFCOPY)) { - PyErr_SetString(PyExc_ValueError, - "If the iterator flag NPY_ITER_OFFSETS is used, " - "copying and buffering must not be enabled"); - return NULL; - } - } - } - /* * If buffering is enabled, and no buffersize was given, use a default * chosen to be big enough to get some amortization benefits, but @@ -445,48 +442,44 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags, for (iiter = 0; iiter < niter; ++iiter) { NIT_DTYPES(iter)[iiter] = op_dtype[iiter]; NIT_OBJECTS(iter)[iiter] = op[iiter]; + NIT_BASEOFFSETS(iter)[iiter] = 0; - if (itflags&NPY_ITFLAG_HASOFFSETS) { - /* If offsets were requested, set the data pointers to zero */ - op_dataptr[iiter] = 0; - } - else { - /* Get the data pointer for this operand */ - switch (op_type[iiter]) { - case NPY_ITER_OP_ARRAY: - /* - * Array casting/copying is handled later, once the - * iteration order is finalized. Here, we - * optimistically assume the array will be used - * as is. - */ - op_dataptr[iiter] = PyArray_DATA(op[iiter]); - break; + /* Get the data pointer for this operand */ + switch (op_type[iiter]) { + case NPY_ITER_OP_ARRAY: + /* + * Array casting/copying is handled later, once the + * iteration order is finalized. Here, we + * optimistically assume the array will be used + * as is. + */ + op_dataptr[iiter] = PyArray_DATA(op[iiter]); + break; - case NPY_ITER_OP_NULL: - op_dataptr[iiter] = NULL; - /* Now that ndim is fixed, outputs get the full ndim */ - if (allocate_output_scalars) { - op_ndim[iiter] = 0; - } - else { - op_ndim[iiter] = ndim; - } - /* Flag this so later we can avoid flipping axes */ - any_allocate_if_null = 1; - /* - * If the data type wasn't provided, will need to - * calculate it later. - */ - if (op_dtype[iiter] == NULL) { - any_missing_dtypes = 1; - } - break; - } + case NPY_ITER_OP_NULL: + op_dataptr[iiter] = NULL; + /* Now that ndim is fixed, outputs get the full ndim */ + if (allocate_output_scalars) { + op_ndim[iiter] = 0; + } + else { + op_ndim[iiter] = ndim; + } + /* Flag this so later we can avoid flipping axes */ + any_allocate_if_null = 1; + /* + * If the data type wasn't provided, will need to + * calculate it later. + */ + if (op_dtype[iiter] == NULL) { + any_missing_dtypes = 1; + } + break; } } /* Set resetindex to zero as well (it's just after the resetdataptr) */ op_dataptr[niter] = 0; + NIT_BASEOFFSETS(iter)[niter] = 0; /* * Initialize buffer data (must set the buffers and transferdata @@ -1012,6 +1005,50 @@ void NpyIter_Reset(NpyIter *iter) } } +/* Resets the iterator to its initial state, with new base data pointers */ +void NpyIter_ResetBasePointers(NpyIter *iter, char **baseptrs) +{ + npy_uint32 itflags = NIT_ITFLAGS(iter); + npy_intp idim, ndim = NIT_NDIM(iter); + npy_intp iiter, niter = NIT_NITER(iter); + + char **resetdataptr; + char *axisdata; + npy_intp *baseoffsets; + npy_intp sizeof_axisdata; + npy_intp istrides, nstrides; + + resetdataptr = NIT_RESETDATAPTR(iter); + baseoffsets = NIT_BASEOFFSETS(iter); + axisdata = NIT_AXISDATA(iter); + sizeof_axisdata = NIT_SIZEOF_AXISDATA(itflags, ndim, niter); + nstrides = NAD_NSTRIDES(); + + if (itflags&NPY_ITFLAG_BUFFER) { + /* Copy any data from the buffers back to the arrays */ + npyiter_copy_from_buffers(iter); + } + + /* The new data pointers for resetting */ + for (iiter = 0; iiter < niter; ++iiter) { + resetdataptr[iiter] = baseptrs[iiter] + baseoffsets[iiter]; + } + + for (idim = 0; idim < ndim; ++idim, axisdata += sizeof_axisdata) { + char **ptrs; + NAD_COORD(axisdata) = 0; + ptrs = NAD_PTRS(axisdata); + for (istrides = 0; istrides < nstrides; ++istrides) { + ptrs[istrides] = resetdataptr[istrides]; + } + } + + if (itflags&NPY_ITFLAG_BUFFER) { + /* Prepare the next buffers and set pos/size */ + npyiter_copy_to_buffers(iter); + } +} + /* * Sets the iterator to the specified coordinates, which must have the * correct number of entries for 'ndim'. It is only valid @@ -1609,11 +1646,6 @@ int NpyIter_HasIndex(NpyIter *iter) return (NIT_ITFLAGS(iter)&NPY_ITFLAG_HASINDEX) != 0; } -int NpyIter_HasOffsets(NpyIter *iter) -{ - return (NIT_ITFLAGS(iter)&NPY_ITFLAG_HASOFFSETS) != 0; -} - npy_intp NpyIter_GetNDim(NpyIter *iter) { return NIT_NDIM(iter); @@ -1717,13 +1749,6 @@ PyArrayObject *NpyIter_GetIterView(NpyIter *iter, npy_intp i) return NULL; } - /* Need data pointers to make the views */ - if (itflags&NPY_ITFLAG_HASOFFSETS) { - PyErr_SetString(PyExc_IndexError, - "cannot provide an iterator view when tracking offsets"); - return NULL; - } - /* Don't provide views if buffering is enabled */ if (itflags&NPY_ITFLAG_BUFFER) { PyErr_SetString(PyExc_ValueError, @@ -1875,17 +1900,6 @@ pyiter_check_global_flags(npy_uint32 flags, npy_uint32* itflags) */ (*itflags) |= NPY_ITFLAG_HASCOORDS; } - /* Check if offsets were requested instead of pointers */ - if (flags&NPY_ITER_OFFSETS) { - /* Buffering is incompatible with offsets */ - if (flags&(NPY_ITER_BUFFERED|NPY_ITER_BUFFERED_GROWINNER)) { - PyErr_SetString(PyExc_ValueError, - "Iterator flag BUFFERED cannot be used " - "with the flag OFFSETS"); - return 0; - } - (*itflags) |= NPY_ITFLAG_HASOFFSETS; - } /* Check if the caller wants to handle inner iteration */ if (flags&NPY_ITER_NO_INNER_ITERATION) { if ((*itflags)&(NPY_ITFLAG_HASINDEX|NPY_ITFLAG_HASCOORDS)) { @@ -2161,6 +2175,25 @@ pyiter_prepare_operand(PyArrayObject **op, PyArray_Descr *op_request_dtype, return 1; } +static const char * +npyiter_casting_to_string(NPY_CASTING casting) +{ + switch (casting) { + case NPY_NO_CASTING: + return "'no'"; + case NPY_EQUIV_CASTING: + return "'equiv'"; + case NPY_SAFE_CASTING: + return "'safe'"; + case NPY_SAME_KIND_CASTING: + return "'same_kind'"; + case NPY_UNSAFE_CASTING: + return "'unsafe'"; + default: + return "<unknown>"; + } +} + static int npyiter_check_casting(npy_intp niter, PyArrayObject **op, PyArray_Descr **op_dtype, @@ -2181,7 +2214,8 @@ npyiter_check_casting(npy_intp niter, PyArrayObject **op, PyErr_Format(PyExc_TypeError, "Iterator operand %d dtype could not be cast " "to the requested dtype, according to " - "the casting enabled", (int)iiter); + "the casting rule given, %s", (int)iiter, + npyiter_casting_to_string(casting)); return 0; } /* Check write (temp -> op) casting */ @@ -2192,7 +2226,8 @@ npyiter_check_casting(npy_intp niter, PyArrayObject **op, PyErr_Format(PyExc_TypeError, "Iterator requested dtype could not be cast " "to the operand %d dtype, according to " - "the casting enabled", (int)iiter); + "the casting rule given, %s", (int)iiter, + npyiter_casting_to_string(casting)); return 0; } @@ -2498,6 +2533,7 @@ npyiter_replace_axisdata(NpyIter *iter, npy_intp iiter, char *axisdata0, *axisdata; npy_intp sizeof_axisdata; npy_intp *perm; + npy_intp baseoffset = 0; perm = NIT_PERM(iter); axisdata0 = NIT_AXISDATA(iter); @@ -2536,7 +2572,7 @@ npyiter_replace_axisdata(NpyIter *iter, npy_intp iiter, if (p < 0) { /* If the perm entry is negative, flip the axis */ NAD_STRIDES(axisdata)[iiter] = -stride; - op_dataptr += stride*(shape-1); + baseoffset += stride*(shape-1); } else { NAD_STRIDES(axisdata)[iiter] = stride; @@ -2545,8 +2581,11 @@ npyiter_replace_axisdata(NpyIter *iter, npy_intp iiter, } } - /* Now the base data pointer is calculated, set it everywhere its needed */ + op_dataptr += baseoffset; + + /* Now the base data pointer is calculated, set it everywhere it's needed */ NIT_RESETDATAPTR(iter)[iiter] = op_dataptr; + NIT_BASEOFFSETS(iter)[iiter] = baseoffset; axisdata = axisdata0; for (idim = 0; idim < ndim; ++idim, axisdata += sizeof_axisdata) { NAD_PTRS(axisdata)[iiter] = op_dataptr; @@ -2685,12 +2724,12 @@ npyiter_flip_negative_strides(NpyIter *iter) npy_intp istrides, nstrides = NAD_NSTRIDES(); char *axisdata, *axisdata0; - npy_intp *ptrs0; + npy_intp *baseoffsets; npy_intp sizeof_axisdata = NIT_SIZEOF_AXISDATA(itflags, ndim, niter); int any_flipped = 0; axisdata0 = axisdata = NIT_AXISDATA(iter); - ptrs0 = (npy_intp*)NAD_PTRS(axisdata0); + baseoffsets = NIT_BASEOFFSETS(iter); for (idim = 0; idim < ndim; ++idim, axisdata += sizeof_axisdata) { npy_intp *strides = NAD_STRIDES(axisdata); int any_negative = 0; @@ -2718,7 +2757,7 @@ npyiter_flip_negative_strides(NpyIter *iter) npy_intp stride = strides[istrides]; /* Adjust the base pointers to start at the end */ - ptrs0[istrides] += shapem1 * stride; + baseoffsets[istrides] += shapem1 * stride; /* Flip the stride */ strides[istrides] = -stride; } @@ -2734,16 +2773,16 @@ npyiter_flip_negative_strides(NpyIter *iter) * in the first AXISDATA, and need to be copied to all the rest */ if (any_flipped) { - npy_intp *resetdataptr = (npy_intp*)NIT_RESETDATAPTR(iter); + char **resetdataptr = NIT_RESETDATAPTR(iter); for (istrides = 0; istrides < nstrides; ++istrides) { - resetdataptr[istrides] = ptrs0[istrides]; + resetdataptr[istrides] += baseoffsets[istrides]; } axisdata = axisdata0; for (idim = 0; idim < ndim; ++idim, axisdata += sizeof_axisdata) { - npy_intp *ptrs = (npy_intp*)NAD_PTRS(axisdata); + char **ptrs = NAD_PTRS(axisdata); for (istrides = 0; istrides < nstrides; ++istrides) { - ptrs[istrides] = ptrs0[istrides]; + ptrs[istrides] = resetdataptr[istrides]; } } /* @@ -3085,36 +3124,63 @@ npyiter_new_temp_array(NpyIter *iter, PyTypeObject *subtype, if (i >= 0) { strides[i] = stride; - stride *= NAD_SHAPE(axisdata); if (p < 0) { reversestride[i] = 1; anyreverse = 1; } if (shape == NULL) { new_shape[i] = NAD_SHAPE(axisdata); + stride *= new_shape[i]; + } + else { + stride *= shape[i]; } } } /* * If custom axes were specified, some dimensions may not have been used. - * Throw an error in this case. + * Throw an error if no shape was specified (i.e. an allocated output), + * otherwise fill in the rest. */ for (idim = 0; idim < op_ndim; ++idim) { if (strides[idim] == 0) { - if (shape) { - PyErr_SetString(PyExc_ValueError, - "Iterator input requiring a temporary copy " - "had custom axes specified which don't cover " - "all the input axes"); - } - else { + npy_intp factor, new_strides[NPY_MAXDIMS], + itemsize; + if (shape == NULL) { PyErr_SetString(PyExc_ValueError, "Iterator automatically allocated output " "had custom axes specified which don't cover " "all the input axes"); + return NULL; } - return NULL; + + /* Fill in the missing strides in C order */ + factor = 1; + itemsize = op_dtype->elsize; + for (idim = op_ndim-1; idim >= 0; --idim) { + if (strides[idim] == 0) { + new_strides[idim] = factor * itemsize; + factor *= shape[idim]; + } + } + + /* + * Copy the missing strides, and multiply the existing strides + * by the calculated factor. This way, the missing strides + * are tighter together in memory, which is good for nested + * loops. + */ + for (idim = 0; idim < op_ndim; ++idim) { + if (strides[idim] == 0) { + strides[idim] = new_strides[idim]; + } + else { + strides[idim] *= factor; + } + } + + break; } } @@ -3719,8 +3785,6 @@ NpyIter_DebugPrint(NpyIter *iter) printf("HASINDEX "); if (itflags&NPY_ITFLAG_HASCOORDS) printf("HASCOORDS "); - if (itflags&NPY_ITFLAG_HASOFFSETS) - printf("HASOFFSETS "); if (itflags&NPY_ITFLAG_FORCEDORDER) printf("FORCEDORDER "); if (itflags&NPY_ITFLAG_NOINNER) @@ -3763,6 +3827,11 @@ NpyIter_DebugPrint(NpyIter *iter) printf("%p ", NIT_RESETDATAPTR(iter)[iiter]); } printf("\n"); + printf("BaseOffsets: "); + for (iiter = 0; iiter < niter; ++iiter) { + printf("%i ", (int)NIT_BASEOFFSETS(iter)[iiter]); + } + printf("\n"); if (itflags&NPY_ITFLAG_HASINDEX) { printf("InitIndex: %d\n", (int)(npy_intp)NIT_RESETDATAPTR(iter)[niter]); diff --git a/numpy/core/src/multiarray/new_iterator.h b/numpy/core/src/multiarray/new_iterator.h index 90949471a..e89fb2569 100644 --- a/numpy/core/src/multiarray/new_iterator.h +++ b/numpy/core/src/multiarray/new_iterator.h @@ -49,8 +49,10 @@ int NpyIter_RemoveInnerLoop(NpyIter *iter); /* Deallocate an iterator */ int NpyIter_Deallocate(NpyIter* iter); -/* Resets the iterator back to its initial state */ +/* Resets the iterator to its initial state */ void NpyIter_Reset(NpyIter *iter); +/* Resets the iterator to its initial state, with new base data pointers */ +void NpyIter_ResetBasePointers(NpyIter *iter, char **baseptrs); /* Sets the iterator to point at the coordinates in 'coords' */ int NpyIter_GotoCoords(NpyIter *iter, npy_intp *coords); /* Sets the iterator to point at the given index */ @@ -62,8 +64,6 @@ int NpyIter_HasInnerLoop(NpyIter *iter); int NpyIter_HasCoords(NpyIter *iter); /* Whether the iterator is tracking an index */ int NpyIter_HasIndex(NpyIter *iter); -/* Whether the iterator gives back offsets instead of pointers */ -int NpyIter_HasOffsets(NpyIter *iter); /* Compute a specialized iteration function for an iterator */ NpyIter_IterNext_Fn NpyIter_GetIterNext(NpyIter *iter); @@ -116,12 +116,10 @@ NPY_NO_EXPORT void NpyIter_DebugPrint(NpyIter *iter); #define NPY_ITER_NO_INNER_ITERATION 0x00000008 /* Convert all the operands to a common data type */ #define NPY_ITER_COMMON_DTYPE 0x00000010 -/* Produce offsets instead of pointers into the data */ -#define NPY_ITER_OFFSETS 0x00000020 /* Enables buffering */ -#define NPY_ITER_BUFFERED 0x00000040 +#define NPY_ITER_BUFFERED 0x00000020 /* Enables buffering, and grows the inner loop when possible */ -#define NPY_ITER_BUFFERED_GROWINNER 0x00000080 +#define NPY_ITER_BUFFERED_GROWINNER 0x00000040 /*** Per-operand flags that may be passed to the iterator constructors ***/ diff --git a/numpy/core/src/multiarray/new_iterator_pywrap.c b/numpy/core/src/multiarray/new_iterator_pywrap.c index 089d62dae..fbc9f1b65 100644 --- a/numpy/core/src/multiarray/new_iterator_pywrap.c +++ b/numpy/core/src/multiarray/new_iterator_pywrap.c @@ -19,6 +19,8 @@ struct NewNpyArrayIterObject_tag { NpyIter *iter; /* Flag indicating iteration started/stopped */ char started, finished; + /* Child to update for nested iteration */ + NewNpyArrayIterObject *nested_child; /* Cached values from the iterator */ NpyIter_IterNext_Fn iternext; NpyIter_GetCoords_Fn getcoords; @@ -70,6 +72,7 @@ npyiter_new(PyTypeObject *subtype, PyObject *args, PyObject *kwds) self = (NewNpyArrayIterObject *)subtype->tp_alloc(subtype, 0); if (self != NULL) { self->iter = NULL; + self->nested_child = NULL; } return (PyObject *)self; @@ -86,6 +89,11 @@ NpyIter_GlobalFlagsConverter(PyObject *flags_in, npy_uint32 *flags) Py_ssize_t length = 0; npy_uint32 flag = 0; + if (flags_in == NULL || flags_in == Py_None) { + *flags = 0; + return NPY_SUCCEED; + } + if (!PyTuple_Check(flags_in) && !PyList_Check(flags_in)) { PyErr_SetString(PyExc_ValueError, "Iterator global flags must be a list or tuple of strings"); @@ -142,11 +150,6 @@ NpyIter_GlobalFlagsConverter(PyObject *flags_in, npy_uint32 *flags) flag = NPY_ITER_NO_INNER_ITERATION; } break; - case 'o': - if (strcmp(str, "offsets") == 0) { - flag = NPY_ITER_OFFSETS; - } - break; } if (flag == 0) { PyErr_Format(PyExc_ValueError, @@ -669,7 +672,7 @@ npyiter_init(NewNpyArrayIterObject *self, PyObject *args, PyObject *kwds) "order", "casting", "op_axes", "buffersize", NULL}; - PyObject *op_in, *flags_in = NULL, *op_flags_in = NULL, + PyObject *op_in = NULL, *flags_in = NULL, *op_flags_in = NULL, *op_dtypes_in = NULL, *op_axes_in = NULL; npy_intp iiter, niter = 0; @@ -702,6 +705,12 @@ npyiter_init(NewNpyArrayIterObject *self, PyObject *args, PyObject *kwds) return -1; } + /* flags */ + if (NpyIter_GlobalFlagsConverter(flags_in, &flags) != NPY_SUCCEED) { + return -1; + } + + /* op and op_flags */ if (npyiter_convert_ops(op_in, op_flags_in, op, op_flags, &niter) != NPY_SUCCEED) { return -1; @@ -710,12 +719,6 @@ npyiter_init(NewNpyArrayIterObject *self, PyObject *args, PyObject *kwds) /* Set the dtypes to all NULL to start as well */ memset(op_request_dtypes, 0, sizeof(op_request_dtypes[0])*niter); - /* flags */ - if (flags_in != NULL && - NpyIter_GlobalFlagsConverter(flags_in, &flags) != NPY_SUCCEED) { - goto fail; - } - /* op_request_dtypes */ if (op_dtypes_in != NULL && op_dtypes_in != Py_None && npyiter_convert_dtypes(op_dtypes_in, @@ -737,7 +740,7 @@ npyiter_init(NewNpyArrayIterObject *self, PyObject *args, PyObject *kwds) } self->iter = NpyIter_MultiNew(niter, op, flags, order, casting, op_flags, - (PyArray_Descr**)op_request_dtypes, + op_request_dtypes, oa_ndim, oa_ndim > 0 ? op_axes : NULL, buffersize); @@ -767,16 +770,337 @@ fail: return -1; } +NPY_NO_EXPORT PyObject * +NpyIter_NestedIters(PyObject *NPY_UNUSED(self), + PyObject *args, PyObject *kwds) +{ + static char *kwlist[] = {"op", "axes", "flags", "op_flags", + "op_dtypes", "order", + "casting", "buffersize", + NULL}; + + PyObject *op_in = NULL, *axes_in = NULL, *flags_in = NULL, + *op_flags_in = NULL, *op_dtypes_in = NULL; + + npy_intp iiter, niter = 0, inest, nnest = 0; + PyArrayObject *op[NPY_MAXARGS]; + npy_uint32 flags = 0, flags_inner = 0; + NPY_ORDER order = NPY_KEEPORDER; + NPY_CASTING casting = NPY_SAFE_CASTING; + npy_uint32 op_flags[NPY_MAXARGS], op_flags_inner[NPY_MAXARGS]; + PyArray_Descr *op_request_dtypes[NPY_MAXARGS], + *op_request_dtypes_inner[NPY_MAXARGS]; + npy_intp op_axes_data[NPY_MAXDIMS]; + npy_intp *nested_op_axes[NPY_MAXDIMS]; + npy_intp nested_naxes[NPY_MAXDIMS], iaxes, naxes; + npy_intp negones[NPY_MAXDIMS]; + char used_axes[NPY_MAXDIMS]; + int buffersize = 0; + + PyObject *ret = NULL; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|OOOO&O&i", kwlist, + &op_in, + &axes_in, + &flags_in, + &op_flags_in, + &op_dtypes_in, + npyiter_order_converter, &order, + PyArray_CastingConverter, &casting, + &buffersize)) { + return NULL; + } + + /* axes */ + if (!PyTuple_Check(axes_in) && !PyList_Check(axes_in)) { + PyErr_SetString(PyExc_ValueError, + "axes must be a tuple of axis arrays"); + return NULL; + } + nnest = PySequence_Size(axes_in); + if (nnest < 2) { + PyErr_SetString(PyExc_ValueError, + "axes must have at least 2 entries for nested iteration"); + return NULL; + } + naxes = 0; + memset(used_axes, 0, NPY_MAXDIMS); + for (inest = 0; inest < nnest; ++inest) { + PyObject *item = PySequence_GetItem(axes_in, inest); + npy_intp i; + if (item == NULL) { + return NULL; + } + if (!PyTuple_Check(item) && !PyList_Check(item)) { + PyErr_SetString(PyExc_ValueError, + "Each item in axes must be a an integer tuple"); + Py_DECREF(item); + return NULL; + } + nested_naxes[inest] = PySequence_Size(item); + if (naxes + nested_naxes[inest] > NPY_MAXDIMS) { + PyErr_SetString(PyExc_ValueError, + "Too many axes given"); + Py_DECREF(item); + return NULL; + } + for (i = 0; i < nested_naxes[inest]; ++i) { + PyObject *v = PySequence_GetItem(item, i); + npy_intp axis; + if (v == NULL) { + Py_DECREF(item); + return NULL; + } + axis = PyInt_AsLong(v); + Py_DECREF(v); + if (axis < 0 || axis >= NPY_MAXDIMS) { + PyErr_SetString(PyExc_ValueError, + "An axis is out of bounds"); + Py_DECREF(item); + return NULL; + } + /* + * This check is very important, without it out of bounds + * data accesses are possible. + */ + if (used_axes[axis] != 0) { + PyErr_SetString(PyExc_ValueError, + "An axis is used more than once"); + Py_DECREF(item); + return NULL; + } + used_axes[axis] = 1; + op_axes_data[naxes+i] = axis; + } + nested_op_axes[inest] = &op_axes_data[naxes]; + naxes += nested_naxes[inest]; + Py_DECREF(item); + } + + /* flags */ + if (NpyIter_GlobalFlagsConverter(flags_in, &flags) != NPY_SUCCEED) { + return NULL; + } + + /* op and op_flags */ + if (npyiter_convert_ops(op_in, op_flags_in, op, op_flags, &niter) + != NPY_SUCCEED) { + return NULL; + } + + /* Set the dtypes to all NULL to start as well */ + memset(op_request_dtypes, 0, sizeof(op_request_dtypes[0])*niter); + memset(op_request_dtypes_inner, 0, + sizeof(op_request_dtypes_inner[0])*niter); + + /* op_request_dtypes */ + if (op_dtypes_in != NULL && op_dtypes_in != Py_None && + npyiter_convert_dtypes(op_dtypes_in, + op_request_dtypes, niter) != NPY_SUCCEED) { + goto fail; + } + + ret = PyTuple_New(nnest); + if (ret == NULL) { + goto fail; + } + + /* For broadcasting allocated arrays */ + for (iaxes = 0; iaxes < naxes; ++iaxes) { + negones[iaxes] = -1; + } + + /* + * Clear any unnecessary ALLOCATE flags, so we can use them + * to indicate exactly the allocated outputs. Also, separate + * the inner loop flags. + */ + for (iiter = 0; iiter < niter; ++iiter) { + if ((op_flags[iiter]&NPY_ITER_ALLOCATE) && op[iiter] != NULL) { + op_flags[iiter] &= ~NPY_ITER_ALLOCATE; + } + + /* + * Clear any flags allowing copies or output allocation for + * the inner loop. + */ + op_flags_inner[iiter] = op_flags[iiter] & ~(NPY_ITER_COPY| + NPY_ITER_UPDATEIFCOPY| + NPY_ITER_ALLOCATE); + /* + * If buffering is enabled and copying is not, + * clear the nbo_aligned flag and strip the data type + * for the outer loops. + */ + if ((flags&(NPY_ITER_BUFFERED|NPY_ITER_BUFFERED_GROWINNER)) && + !(op_flags[iiter]&(NPY_ITER_COPY| + NPY_ITER_UPDATEIFCOPY| + NPY_ITER_ALLOCATE))) { + op_flags[iiter] &= ~NPY_ITER_NBO_ALIGNED; + op_request_dtypes_inner[iiter] = op_request_dtypes[iiter]; + op_request_dtypes[iiter] = NULL; + } + } + + /* Only the inner loop gets the buffering and no inner flags */ + flags_inner = flags&~NPY_ITER_COMMON_DTYPE; + flags &= ~(NPY_ITER_NO_INNER_ITERATION| + NPY_ITER_BUFFERED| + NPY_ITER_BUFFERED_GROWINNER); + + for (inest = 0; inest < nnest; ++inest) { + NewNpyArrayIterObject *iter; + npy_intp *op_axes_niter[NPY_MAXARGS]; + + /* + * All the operands' op_axes are the same, except for + * allocated outputs. + */ + for (iiter = 0; iiter < niter; ++iiter) { + if (op_flags[iiter]&NPY_ITER_ALLOCATE) { + if (inest == 0) { + op_axes_niter[iiter] = NULL; + } + else { + op_axes_niter[iiter] = negones; + } + } + else { + op_axes_niter[iiter] = nested_op_axes[inest]; + } + } + + /* + printf("\n"); + for (iiter = 0; iiter < niter; ++iiter) { + npy_intp i; + + for (i = 0; i < nested_naxes[inest]; ++i) { + printf("%d ", (int)op_axes_niter[iiter][i]); + } + printf("\n"); + } + */ + + /* Allocate the iterator */ + iter = (NewNpyArrayIterObject *)npyiter_new(&NpyIter_Type, NULL, NULL); + if (iter == NULL) { + Py_DECREF(ret); + goto fail; + } + + if (inest < nnest-1) { + iter->iter = NpyIter_MultiNew(niter, op, flags, order, + casting, op_flags, op_request_dtypes, + nested_naxes[inest], op_axes_niter, + 0); + } + else { + iter->iter = NpyIter_MultiNew(niter, op, flags_inner, order, + casting, op_flags_inner, + op_request_dtypes_inner, + nested_naxes[inest], op_axes_niter, + buffersize); + } + + if (iter->iter == NULL) { + Py_DECREF(ret); + goto fail; + } + + /* Cache some values for the member functions to use */ + npyiter_cache_values(iter); + + iter->started = 0; + iter->finished = 0; + + /* + * If there are any allocated outputs or any copies were made, + * adjust op so that the other iterators use the same ones. + */ + if (inest == 0) { + PyArrayObject **objects = NpyIter_GetObjectArray(iter->iter); + for (iiter = 0; iiter < niter; ++iiter) { + if (op[iiter] != objects[iiter]) { + Py_XDECREF(op[iiter]); + op[iiter] = objects[iiter]; + Py_INCREF(op[iiter]); + } + + /* + * Clear any flags allowing copies for + * the rest of the iterators + */ + op_flags[iiter] &= ~(NPY_ITER_COPY| + NPY_ITER_UPDATEIFCOPY); + } + /* Clear the common dtype flag for the rest of the iterators */ + flags &= ~NPY_ITER_COMMON_DTYPE; + } + + PyTuple_SET_ITEM(ret, inest, (PyObject *)iter); + } + + /* Release our references to the ops and dtypes */ + for (iiter = 0; iiter < niter; ++iiter) { + Py_XDECREF(op[iiter]); + Py_XDECREF(op_request_dtypes[iiter]); + Py_XDECREF(op_request_dtypes_inner[iiter]); + } + + /* Set up the nested child references */ + for (inest = 0; inest < nnest-1; ++inest) { + NewNpyArrayIterObject *iter; + iter = (NewNpyArrayIterObject *)PyTuple_GET_ITEM(ret, inest); + /* + * Indicates which iterator to reset with new base pointers + * each iteration step. + */ + iter->nested_child = + (NewNpyArrayIterObject *)PyTuple_GET_ITEM(ret, inest+1); + Py_INCREF(iter->nested_child); + /* + * Need to do a nested reset so all the iterators point + * at the right data + */ + NpyIter_ResetBasePointers(iter->nested_child->iter, iter->dataptrs); + } + + return ret; + +fail: + for (iiter = 0; iiter < niter; ++iiter) { + Py_XDECREF(op[iiter]); + Py_XDECREF(op_request_dtypes[iiter]); + Py_XDECREF(op_request_dtypes_inner[iiter]); + } + return NULL; +} + static void npyiter_dealloc(NewNpyArrayIterObject *self) { if (self->iter) { NpyIter_Deallocate(self->iter); self->iter = NULL; + Py_XDECREF(self->nested_child); + self->nested_child = NULL; } self->ob_type->tp_free((PyObject*)self); } +static void +npyiter_resetbasepointers(NewNpyArrayIterObject *self) +{ + while (self->nested_child) { + NpyIter_ResetBasePointers(self->nested_child->iter, + self->dataptrs); + self = self->nested_child; + self->started = 0; + self->finished = 0; + } +} + static PyObject * npyiter_reset(NewNpyArrayIterObject *self) { @@ -790,6 +1114,9 @@ npyiter_reset(NewNpyArrayIterObject *self) self->started = 0; self->finished = 0; + /* If there is nesting, the nested iterators should be reset */ + npyiter_resetbasepointers(self); + Py_RETURN_NONE; } @@ -797,6 +1124,9 @@ static PyObject * npyiter_iternext(NewNpyArrayIterObject *self) { if (self->iter != NULL && !self->finished && self->iternext(self->iter)) { + /* If there is nesting, the nested iterators should be reset */ + npyiter_resetbasepointers(self); + Py_RETURN_TRUE; } else { @@ -877,29 +1207,17 @@ static PyObject *npyiter_value_get(NewNpyArrayIterObject *self) dtypes = self->dtypes; dataptrs = self->dataptrs; - if (NpyIter_HasOffsets(self->iter)) { - npy_intp *offsets = (npy_intp *)dataptrs; - /* Return an array with all the offsets */ + /* Return an array or tuple of arrays with the values */ + if (niter == 1) { + ret = npyiter_seq_item(self, 0); + } + else { ret = PyTuple_New(niter); if (ret == NULL) { return NULL; } for (iiter = 0; iiter < niter; ++iiter) { - PyTuple_SET_ITEM(ret, iiter, PyInt_FromLong(offsets[iiter])); - } - } else { - /* Return an array or tuple of arrays with the values */ - if (niter == 1) { - ret = npyiter_seq_item(self, 0); - } - else { - ret = PyTuple_New(niter); - if (ret == NULL) { - return NULL; - } - for (iiter = 0; iiter < niter; ++iiter) { - PyTuple_SET_ITEM(ret, iiter, npyiter_seq_item(self, iiter)); - } + PyTuple_SET_ITEM(ret, iiter, npyiter_seq_item(self, iiter)); } } @@ -970,6 +1288,8 @@ static PyObject *npyiter_itviews_get(NewNpyArrayIterObject *self) static PyObject * npyiter_next(NewNpyArrayIterObject *self) { + NewNpyArrayIterObject * tmp; + if (self->iter == NULL || self->finished) { return NULL; } @@ -983,6 +1303,9 @@ npyiter_next(NewNpyArrayIterObject *self) self->finished = 1; return NULL; } + + /* If there is nesting, the nested iterators should be reset */ + npyiter_resetbasepointers(self); } self->started = 1; @@ -1083,7 +1406,10 @@ static int npyiter_coords_set(NewNpyArrayIterObject *self, PyObject *value) } self->started = 0; self->finished = 0; - + + /* If there is nesting, the nested iterators should be reset */ + npyiter_resetbasepointers(self); + return 0; } else { @@ -1137,6 +1463,10 @@ static int npyiter_index_set(NewNpyArrayIterObject *self, PyObject *value) } self->started = 0; self->finished = 0; + + /* If there is nesting, the nested iterators should be reset */ + npyiter_resetbasepointers(self); + return 0; } else { diff --git a/numpy/core/tests/test_new_iterator.py b/numpy/core/tests/test_new_iterator.py index e021d68bb..6aec368dd 100644 --- a/numpy/core/tests/test_new_iterator.py +++ b/numpy/core/tests/test_new_iterator.py @@ -948,14 +948,6 @@ def test_iter_op_axes_errors(): assert_raises(ValueError, newiter, [a,a], [], [['readonly']]*2, op_axes=[[0,1],[1,0]]) -def test_iter_offsets(): - # Check that offsets get returned when requested - - a = arange(6, dtype='f4').reshape(2,3) - b = arange(6, dtype='i2').reshape(3,2).T - i = newiter([a,b], ['offsets'], [['readonly']]*2) - assert_equal([x for x in i], [(0,0),(4,4),(8,8),(12,2),(16,6),(20,10)]) - def test_iter_allocate_output_simple(): # Check that the iterator will properly allocate outputs @@ -1306,5 +1298,172 @@ def test_iter_no_broadcast(): assert_raises(ValueError, np.newiter, [a,b,c], [], [['readonly'],['readonly'],['readonly','no_broadcast']]) +def test_iter_nested_iters_basic(): + # Test nested iteration basic usage + a = arange(12).reshape(2,3,2) + + i, j = np.nested_iters(a, [[0],[1,2]]) + vals = [] + for x in i: + vals.append([y for y in j]) + assert_equal(vals, [[0,1,2,3,4,5],[6,7,8,9,10,11]]) + + i, j = np.nested_iters(a, [[0,1],[2]]) + vals = [] + for x in i: + vals.append([y for y in j]) + assert_equal(vals, [[0,1],[2,3],[4,5],[6,7],[8,9],[10,11]]) + + i, j = np.nested_iters(a, [[0,2],[1]]) + vals = [] + for x in i: + vals.append([y for y in j]) + assert_equal(vals, [[0,2,4],[1,3,5],[6,8,10],[7,9,11]]) + +def test_iter_nested_iters_reorder(): + # Test nested iteration basic usage + a = arange(12).reshape(2,3,2) + + # In 'K' order (default), it gets reordered + i, j = np.nested_iters(a, [[0],[2,1]]) + vals = [] + for x in i: + vals.append([y for y in j]) + assert_equal(vals, [[0,1,2,3,4,5],[6,7,8,9,10,11]]) + + i, j = np.nested_iters(a, [[1,0],[2]]) + vals = [] + for x in i: + vals.append([y for y in j]) + assert_equal(vals, [[0,1],[2,3],[4,5],[6,7],[8,9],[10,11]]) + + i, j = np.nested_iters(a, [[2,0],[1]]) + vals = [] + for x in i: + vals.append([y for y in j]) + assert_equal(vals, [[0,2,4],[1,3,5],[6,8,10],[7,9,11]]) + + # In 'C' order, it doesn't + i, j = np.nested_iters(a, [[0],[2,1]], order='C') + vals = [] + for x in i: + vals.append([y for y in j]) + assert_equal(vals, [[0,2,4,1,3,5],[6,8,10,7,9,11]]) + + i, j = np.nested_iters(a, [[1,0],[2]], order='C') + vals = [] + for x in i: + vals.append([y for y in j]) + assert_equal(vals, [[0,1],[6,7],[2,3],[8,9],[4,5],[10,11]]) + + i, j = np.nested_iters(a, [[2,0],[1]], order='C') + vals = [] + for x in i: + vals.append([y for y in j]) + assert_equal(vals, [[0,2,4],[6,8,10],[1,3,5],[7,9,11]]) + +def test_iter_nested_iters_flip_axes(): + # Test nested iteration with negative axes + a = arange(12).reshape(2,3,2)[::-1,::-1,::-1] + + # In 'K' order (default), the axes all get flipped + i, j = np.nested_iters(a, [[0],[1,2]]) + vals = [] + for x in i: + vals.append([y for y in j]) + assert_equal(vals, [[0,1,2,3,4,5],[6,7,8,9,10,11]]) + + i, j = np.nested_iters(a, [[0,1],[2]]) + vals = [] + for x in i: + vals.append([y for y in j]) + assert_equal(vals, [[0,1],[2,3],[4,5],[6,7],[8,9],[10,11]]) + + i, j = np.nested_iters(a, [[0,2],[1]]) + vals = [] + for x in i: + vals.append([y for y in j]) + assert_equal(vals, [[0,2,4],[1,3,5],[6,8,10],[7,9,11]]) + + # In 'C' order, flipping axes is disabled + i, j = np.nested_iters(a, [[0],[1,2]], order='C') + vals = [] + for x in i: + vals.append([y for y in j]) + assert_equal(vals, [[11,10,9,8,7,6],[5,4,3,2,1,0]]) + + i, j = np.nested_iters(a, [[0,1],[2]], order='C') + vals = [] + for x in i: + vals.append([y for y in j]) + assert_equal(vals, [[11,10],[9,8],[7,6],[5,4],[3,2],[1,0]]) + + i, j = np.nested_iters(a, [[0,2],[1]], order='C') + vals = [] + for x in i: + vals.append([y for y in j]) + assert_equal(vals, [[11,9,7],[10,8,6],[5,3,1],[4,2,0]]) + +def test_iter_nested_iters_broadcast(): + # Test nested iteration with broadcasting + a = arange(2).reshape(2,1) + b = arange(3).reshape(1,3) + + i, j = np.nested_iters([a,b], [[0],[1]]) + vals = [] + for x in i: + vals.append([y for y in j]) + assert_equal(vals, [[[0,0],[0,1],[0,2]],[[1,0],[1,1],[1,2]]]) + + i, j = np.nested_iters([a,b], [[1],[0]]) + vals = [] + for x in i: + vals.append([y for y in j]) + assert_equal(vals, [[[0,0],[1,0]],[[0,1],[1,1]],[[0,2],[1,2]]]) + +def test_iter_nested_iters_dtype_copy(): + # Test nested iteration with a copy to change dtype + + # copy + a = arange(6, dtype='i4').reshape(2,3) + i, j = np.nested_iters(a, [[0],[1]], + op_flags=['readonly','copy'], + op_dtypes='f8') + assert_equal(j[0].dtype, np.dtype('f8')) + vals = [] + for x in i: + vals.append([y for y in j]) + assert_equal(vals, [[0,1,2],[3,4,5]]) + vals = None + + # updateifcopy + a = arange(6, dtype='f4').reshape(2,3) + i, j = np.nested_iters(a, [[0],[1]], + op_flags=['readwrite','updateifcopy'], + casting='same_kind', + op_dtypes='f8') + assert_equal(j[0].dtype, np.dtype('f8')) + for x in i: + for y in j: + y[()] += 1 + assert_equal(a, [[0,1,2],[3,4,5]]) + i, j, x, y = (None,)*4 # force the updateifcopy + assert_equal(a, [[1,2,3],[4,5,6]]) + +def test_iter_nested_iters_dtype_buffered(): + # Test nested iteration with buffering to change dtype + + a = arange(6, dtype='f4').reshape(2,3) + i, j = np.nested_iters(a, [[0],[1]], + flags=['buffered'], + op_flags=['readwrite'], + casting='same_kind', + op_dtypes='f8') + assert_equal(j[0].dtype, np.dtype('f8')) + for x in i: + for y in j: + y[()] += 1 + assert_equal(a, [[1,2,3],[4,5,6]]) + if __name__ == "__main__": run_module_suite() |