From c5c1a4808bf3c2386c5ee6c39b8156a4978092f7 Mon Sep 17 00:00:00 2001 From: Mark Wiebe Date: Thu, 7 Jul 2011 12:32:41 -0500 Subject: ENH: nditer: Rename nditer.c.src to indicate it has just the templated bits --- numpy/core/SConscript | 2 +- numpy/core/code_generators/genapi.py | 2 +- numpy/core/setup.py | 4 +- numpy/core/src/multiarray/nditer.c.src | 605 --------------------------- numpy/core/src/multiarray/nditer_templ.c.src | 605 +++++++++++++++++++++++++++ 5 files changed, 609 insertions(+), 609 deletions(-) delete mode 100644 numpy/core/src/multiarray/nditer.c.src create mode 100644 numpy/core/src/multiarray/nditer_templ.c.src (limited to 'numpy') diff --git a/numpy/core/SConscript b/numpy/core/SConscript index f831515b9..178c7de37 100644 --- a/numpy/core/SConscript +++ b/numpy/core/SConscript @@ -380,7 +380,7 @@ umath_loops_src = env.GenerateFromTemplate(pjoin('src', 'umath', 'loops.c.src')) arraytypes_src = env.GenerateFromTemplate( pjoin('src', 'multiarray', 'arraytypes.c.src')) nditer_src = env.GenerateFromTemplate( - pjoin('src', 'multiarray', 'nditer.c.src')) + pjoin('src', 'multiarray', 'nditer_templ.c.src')) lowlevel_strided_loops_src = env.GenerateFromTemplate( pjoin('src', 'multiarray', 'lowlevel_strided_loops.c.src')) einsum_src = env.GenerateFromTemplate(pjoin('src', 'multiarray', 'einsum.c.src')) diff --git a/numpy/core/code_generators/genapi.py b/numpy/core/code_generators/genapi.py index 38628d1c6..29cbb271f 100644 --- a/numpy/core/code_generators/genapi.py +++ b/numpy/core/code_generators/genapi.py @@ -47,10 +47,10 @@ API_FILES = [join('multiarray', 'methods.c'), join('multiarray', 'datetime_strings.c'), join('multiarray', 'datetime_busday.c'), join('multiarray', 'datetime_busdaycal.c'), - join('multiarray', 'nditer.c.src'), join('multiarray', 'nditer_api.c'), join('multiarray', 'nditer_constr.c'), join('multiarray', 'nditer_pywrap.c'), + join('multiarray', 'nditer_templ.c.src'), join('multiarray', 'einsum.c.src'), join('umath', 'ufunc_object.c'), join('umath', 'ufunc_type_resolution.c'), diff --git a/numpy/core/setup.py b/numpy/core/setup.py index aa3175b20..d5f2edcc3 100644 --- a/numpy/core/setup.py +++ b/numpy/core/setup.py @@ -686,7 +686,7 @@ def configuration(parent_package='',top_path=None): subpath = join('src', 'multiarray') sources = [join(local_dir, subpath, 'scalartypes.c.src'), join(local_dir, subpath, 'arraytypes.c.src'), - join(local_dir, subpath, 'nditer.c.src'), + join(local_dir, subpath, 'nditer_templ.c.src'), join(local_dir, subpath, 'lowlevel_strided_loops.c.src'), join(local_dir, subpath, 'einsum.c.src')] @@ -773,7 +773,7 @@ def configuration(parent_package='',top_path=None): join('src', 'multiarray', 'mapping.c'), join('src', 'multiarray', 'methods.c'), join('src', 'multiarray', 'multiarraymodule.c'), - join('src', 'multiarray', 'nditer.c.src'), + join('src', 'multiarray', 'nditer_templ.c.src'), join('src', 'multiarray', 'nditer_api.c'), join('src', 'multiarray', 'nditer_constr.c'), join('src', 'multiarray', 'nditer_pywrap.c'), diff --git a/numpy/core/src/multiarray/nditer.c.src b/numpy/core/src/multiarray/nditer.c.src deleted file mode 100644 index 59aae244b..000000000 --- a/numpy/core/src/multiarray/nditer.c.src +++ /dev/null @@ -1,605 +0,0 @@ -/* - * This file implements the API functions for NumPy's nditer that - * are specialized using the templating system. - * - * Copyright (c) 2010-2011 by Mark Wiebe (mwwiebe@gmail.com) - * The Univerity of British Columbia - * - * See LICENSE.txt for the license. - */ - -/* Indicate that this .c file is allowed to include the header */ -#define NPY_ITERATOR_IMPLEMENTATION_CODE -#include "nditer_impl.h" - -/* SPECIALIZED iternext functions that handle the non-buffering part */ - -/**begin repeat - * #const_itflags = 0, - * NPY_ITFLAG_HASINDEX, - * NPY_ITFLAG_EXLOOP, - * NPY_ITFLAG_RANGE, - * NPY_ITFLAG_RANGE|NPY_ITFLAG_HASINDEX# - * #tag_itflags = 0, IND, NOINN, RNG, RNGuIND# - */ -/**begin repeat1 - * #const_ndim = 1, 2, NPY_MAXDIMS# - * #tag_ndim = 1, 2, ANY# - */ -/**begin repeat2 - * #const_nop = 1, 2, NPY_MAXDIMS# - * #tag_nop = 1, 2, ANY# - */ - -/* Specialized iternext (@const_itflags@,@tag_ndim@,@tag_nop@) */ -static int -npyiter_iternext_itflags@tag_itflags@_dims@tag_ndim@_iters@tag_nop@( - NpyIter *iter) -{ -#if !(@const_itflags@&NPY_ITFLAG_EXLOOP) || (@const_ndim@ > 1) - const npy_uint32 itflags = @const_itflags@; -# if @const_ndim@ >= NPY_MAXDIMS - int idim, ndim = NIT_NDIM(iter); -# endif -# if @const_nop@ < NPY_MAXDIMS - const int nop = @const_nop@; -# else - int nop = NIT_NOP(iter); -# endif - - NpyIter_AxisData *axisdata0; - npy_intp istrides, nstrides = NAD_NSTRIDES(); -#endif -#if @const_ndim@ > 1 - NpyIter_AxisData *axisdata1; - npy_intp sizeof_axisdata; -#endif -#if @const_ndim@ > 2 - NpyIter_AxisData *axisdata2; -#endif - -#if (@const_itflags@&NPY_ITFLAG_RANGE) - /* When ranged iteration is enabled, use the iterindex */ - if (++NIT_ITERINDEX(iter) >= NIT_ITEREND(iter)) { - return 0; - } -#endif - -#if @const_ndim@ > 1 - sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop); -#endif - -# if !(@const_itflags@&NPY_ITFLAG_EXLOOP) || (@const_ndim@ > 1) - axisdata0 = NIT_AXISDATA(iter); -# endif -# if !(@const_itflags@&NPY_ITFLAG_EXLOOP) - /* Increment index 0 */ - NAD_INDEX(axisdata0)++; - /* Increment pointer 0 */ - for (istrides = 0; istrides < nstrides; ++istrides) { - NAD_PTRS(axisdata0)[istrides] += NAD_STRIDES(axisdata0)[istrides]; - } -# endif - -#if @const_ndim@ == 1 - -# if !(@const_itflags@&NPY_ITFLAG_EXLOOP) - /* Finished when the index equals the shape */ - return NAD_INDEX(axisdata0) < NAD_SHAPE(axisdata0); -# else - return 0; -# endif - -#else - -# if !(@const_itflags@&NPY_ITFLAG_EXLOOP) - if (NAD_INDEX(axisdata0) < NAD_SHAPE(axisdata0)) { - return 1; - } -# endif - - axisdata1 = NIT_INDEX_AXISDATA(axisdata0, 1); - /* Increment index 1 */ - NAD_INDEX(axisdata1)++; - /* Increment pointer 1 */ - for (istrides = 0; istrides < nstrides; ++istrides) { - NAD_PTRS(axisdata1)[istrides] += NAD_STRIDES(axisdata1)[istrides]; - } - - if (NAD_INDEX(axisdata1) < NAD_SHAPE(axisdata1)) { - /* Reset the 1st index to 0 */ - NAD_INDEX(axisdata0) = 0; - /* Reset the 1st pointer to the value of the 2nd */ - for (istrides = 0; istrides < nstrides; ++istrides) { - NAD_PTRS(axisdata0)[istrides] = NAD_PTRS(axisdata1)[istrides]; - } - return 1; - } - -# if @const_ndim@ == 2 - return 0; -# else - - axisdata2 = NIT_INDEX_AXISDATA(axisdata1, 1); - /* Increment index 2 */ - NAD_INDEX(axisdata2)++; - /* Increment pointer 2 */ - for (istrides = 0; istrides < nstrides; ++istrides) { - NAD_PTRS(axisdata2)[istrides] += NAD_STRIDES(axisdata2)[istrides]; - } - - if (NAD_INDEX(axisdata2) < NAD_SHAPE(axisdata2)) { - /* Reset the 1st and 2nd indices to 0 */ - NAD_INDEX(axisdata0) = 0; - NAD_INDEX(axisdata1) = 0; - /* Reset the 1st and 2nd pointers to the value of the 3nd */ - for (istrides = 0; istrides < nstrides; ++istrides) { - NAD_PTRS(axisdata0)[istrides] = NAD_PTRS(axisdata2)[istrides]; - NAD_PTRS(axisdata1)[istrides] = NAD_PTRS(axisdata2)[istrides]; - } - return 1; - } - - for (idim = 3; idim < ndim; ++idim) { - NIT_ADVANCE_AXISDATA(axisdata2, 1); - /* Increment the index */ - NAD_INDEX(axisdata2)++; - /* Increment the pointer */ - for (istrides = 0; istrides < nstrides; ++istrides) { - NAD_PTRS(axisdata2)[istrides] += NAD_STRIDES(axisdata2)[istrides]; - } - - - if (NAD_INDEX(axisdata2) < NAD_SHAPE(axisdata2)) { - /* Reset the indices and pointers of all previous axisdatas */ - axisdata1 = axisdata2; - do { - NIT_ADVANCE_AXISDATA(axisdata1, -1); - /* Reset the index to 0 */ - NAD_INDEX(axisdata1) = 0; - /* Reset the pointer to the updated value */ - for (istrides = 0; istrides < nstrides; ++istrides) { - NAD_PTRS(axisdata1)[istrides] = - NAD_PTRS(axisdata2)[istrides]; - } - } while (axisdata1 != axisdata0); - - return 1; - } - } - - return 0; - -# endif /* ndim != 2 */ - -#endif /* ndim != 1 */ -} - -/**end repeat2**/ -/**end repeat1**/ -/**end repeat**/ - - -/**begin repeat - * #const_nop = 1, 2, 3, 4, NPY_MAXDIMS# - * #tag_nop = 1, 2, 3, 4, ANY# - */ - -/* - * Iternext function that handles the reduction buffering part. This - * is done with a double loop to avoid frequent re-buffering. - */ -static int -npyiter_buffered_reduce_iternext_iters@tag_nop@(NpyIter *iter) -{ - npy_uint32 itflags = NIT_ITFLAGS(iter); - /*int ndim = NIT_NDIM(iter);*/ -#if @const_nop@ >= NPY_MAXDIMS - int nop = NIT_NOP(iter); -#else - const int nop = @const_nop@; -#endif - - int iop; - - NpyIter_AxisData *axisdata; - NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter); - char **ptrs; - char *prev_dataptrs[NPY_MAXARGS]; - - ptrs = NBF_PTRS(bufferdata); - - /* - * If the iterator handles the inner loop, need to increment all - * the indices and pointers - */ - if (!(itflags&NPY_ITFLAG_EXLOOP)) { - /* Increment within the buffer */ - if (++NIT_ITERINDEX(iter) < NBF_BUFITEREND(bufferdata)) { - npy_intp *strides; - - strides = NBF_STRIDES(bufferdata); - for (iop = 0; iop < nop; ++iop) { - ptrs[iop] += strides[iop]; - } - return 1; - } - } - else { - NIT_ITERINDEX(iter) += NBF_SIZE(bufferdata); - } - - NPY_IT_DBG_PRINT1("Iterator: Finished iteration %d of outer reduce loop\n", - (int)NBF_REDUCE_POS(bufferdata)); - /* The outer increment for the reduce double loop */ - if (++NBF_REDUCE_POS(bufferdata) < NBF_REDUCE_OUTERSIZE(bufferdata)) { - npy_intp *reduce_outerstrides = NBF_REDUCE_OUTERSTRIDES(bufferdata); - char **reduce_outerptrs = NBF_REDUCE_OUTERPTRS(bufferdata); - for (iop = 0; iop < nop; ++iop) { - char *ptr = reduce_outerptrs[iop] + reduce_outerstrides[iop]; - ptrs[iop] = ptr; - reduce_outerptrs[iop] = ptr; - } - NBF_BUFITEREND(bufferdata) = NIT_ITERINDEX(iter) + NBF_SIZE(bufferdata); - return 1; - } - - /* Save the previously used data pointers */ - axisdata = NIT_AXISDATA(iter); - memcpy(prev_dataptrs, NAD_PTRS(axisdata), NPY_SIZEOF_INTP*nop); - - /* Write back to the arrays */ - npyiter_copy_from_buffers(iter); - - /* Check if we're past the end */ - if (NIT_ITERINDEX(iter) >= NIT_ITEREND(iter)) { - NBF_SIZE(bufferdata) = 0; - return 0; - } - /* Increment to the next buffer */ - else { - npyiter_goto_iterindex(iter, NIT_ITERINDEX(iter)); - } - - /* Prepare the next buffers and set iterend/size */ - npyiter_copy_to_buffers(iter, prev_dataptrs); - - return 1; -} - -/**end repeat**/ - -/* iternext function that handles the buffering part */ -static int -npyiter_buffered_iternext(NpyIter *iter) -{ - npy_uint32 itflags = NIT_ITFLAGS(iter); - /*int ndim = NIT_NDIM(iter);*/ - int nop = NIT_NOP(iter); - - NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter); - - /* - * If the iterator handles the inner loop, need to increment all - * the indices and pointers - */ - if (!(itflags&NPY_ITFLAG_EXLOOP)) { - /* Increment within the buffer */ - if (++NIT_ITERINDEX(iter) < NBF_BUFITEREND(bufferdata)) { - int iop; - npy_intp *strides; - char **ptrs; - - strides = NBF_STRIDES(bufferdata); - ptrs = NBF_PTRS(bufferdata); - for (iop = 0; iop < nop; ++iop) { - ptrs[iop] += strides[iop]; - } - return 1; - } - } - else { - NIT_ITERINDEX(iter) += NBF_SIZE(bufferdata); - } - - /* Write back to the arrays */ - npyiter_copy_from_buffers(iter); - - /* Check if we're past the end */ - if (NIT_ITERINDEX(iter) >= NIT_ITEREND(iter)) { - NBF_SIZE(bufferdata) = 0; - return 0; - } - /* Increment to the next buffer */ - else { - npyiter_goto_iterindex(iter, NIT_ITERINDEX(iter)); - } - - /* Prepare the next buffers and set iterend/size */ - npyiter_copy_to_buffers(iter, NULL); - - return 1; -} - -/**end repeat2**/ -/**end repeat1**/ -/**end repeat**/ - -/* Specialization of iternext for when the iteration size is 1 */ -static int -npyiter_iternext_sizeone(NpyIter *iter) -{ - return 0; -} - -/*NUMPY_API - * Compute the specialized iteration function for an iterator - * - * If errmsg is non-NULL, it should point to a variable which will - * receive the error message, and no Python exception will be set. - * This is so that the function can be called from code not holding - * the GIL. - */ -NPY_NO_EXPORT NpyIter_IterNextFunc * -NpyIter_GetIterNext(NpyIter *iter, char **errmsg) -{ - npy_uint32 itflags = NIT_ITFLAGS(iter); - int ndim = NIT_NDIM(iter); - int nop = NIT_NOP(iter); - - /* - * When there is just one iteration and buffering is disabled - * the iternext function is very simple. - */ - if (itflags&NPY_ITFLAG_ONEITERATION) { - return &npyiter_iternext_sizeone; - } - - /* - * If buffering is enabled. - */ - if (itflags&NPY_ITFLAG_BUFFER) { - if (itflags&NPY_ITFLAG_REDUCE) { - switch (nop) { - case 1: - return &npyiter_buffered_reduce_iternext_iters1; - case 2: - return &npyiter_buffered_reduce_iternext_iters2; - case 3: - return &npyiter_buffered_reduce_iternext_iters3; - case 4: - return &npyiter_buffered_reduce_iternext_iters4; - default: - return &npyiter_buffered_reduce_iternext_itersANY; - } - } - else { - return &npyiter_buffered_iternext; - } - } - - /* - * Ignore all the flags that don't affect the iterator memory - * layout or the iternext function. Currently only HASINDEX, - * EXLOOP, and RANGE affect them here. - */ - itflags &= (NPY_ITFLAG_HASINDEX|NPY_ITFLAG_EXLOOP|NPY_ITFLAG_RANGE); - - /* Switch statements let the compiler optimize this most effectively */ - switch (itflags) { - /* - * The combinations HASINDEX|EXLOOP and RANGE|EXLOOP are excluded - * by the New functions - */ -/**begin repeat - * #const_itflags = 0, - * NPY_ITFLAG_HASINDEX, - * NPY_ITFLAG_EXLOOP, - * NPY_ITFLAG_RANGE, - * NPY_ITFLAG_RANGE|NPY_ITFLAG_HASINDEX# - * #tag_itflags = 0, IND, NOINN, RNG, RNGuIND# - */ - case @const_itflags@: - switch (ndim) { -/**begin repeat1 - * #const_ndim = 1, 2# - * #tag_ndim = 1, 2# - */ - case @const_ndim@: - switch (nop) { -/**begin repeat2 - * #const_nop = 1, 2# - * #tag_nop = 1, 2# - */ - case @const_nop@: - return &npyiter_iternext_itflags@tag_itflags@_dims@tag_ndim@_iters@tag_nop@; -/**end repeat2**/ - /* Not specialized on nop */ - default: - return &npyiter_iternext_itflags@tag_itflags@_dims@tag_ndim@_itersANY; - } -/**end repeat1**/ - /* Not specialized on ndim */ - default: - switch (nop) { -/**begin repeat1 - * #const_nop = 1, 2# - * #tag_nop = 1, 2# - */ - case @const_nop@: - return &npyiter_iternext_itflags@tag_itflags@_dimsANY_iters@tag_nop@; -/**end repeat1**/ - /* Not specialized on nop */ - default: - return &npyiter_iternext_itflags@tag_itflags@_dimsANY_itersANY; - } - } -/**end repeat**/ - } - /* The switch above should have caught all the possibilities. */ - if (errmsg == NULL) { - PyErr_Format(PyExc_ValueError, - "GetIterNext internal iterator error - unexpected " - "itflags/ndim/nop combination (%04x/%d/%d)", - (int)itflags, (int)ndim, (int)nop); - } - else { - *errmsg = "GetIterNext internal iterator error - unexpected " - "itflags/ndim/nop combination"; - } - return NULL; -} - - -/* SPECIALIZED getindex functions */ - -/**begin repeat - * #const_itflags = 0, - * NPY_ITFLAG_HASINDEX, - * NPY_ITFLAG_IDENTPERM, - * NPY_ITFLAG_HASINDEX|NPY_ITFLAG_IDENTPERM, - * NPY_ITFLAG_NEGPERM, - * NPY_ITFLAG_HASINDEX|NPY_ITFLAG_NEGPERM, - * NPY_ITFLAG_BUFFER, - * NPY_ITFLAG_HASINDEX|NPY_ITFLAG_BUFFER, - * NPY_ITFLAG_IDENTPERM|NPY_ITFLAG_BUFFER, - * NPY_ITFLAG_HASINDEX|NPY_ITFLAG_IDENTPERM|NPY_ITFLAG_BUFFER, - * NPY_ITFLAG_NEGPERM|NPY_ITFLAG_BUFFER, - * NPY_ITFLAG_HASINDEX|NPY_ITFLAG_NEGPERM|NPY_ITFLAG_BUFFER# - * #tag_itflags = 0, IND, IDP, INDuIDP, NEGP, INDuNEGP, - * BUF, INDuBUF, IDPuBUF, INDuIDPuBUF, NEGPuBUF, INDuNEGPuBUF# - */ -static void -npyiter_get_multi_index_itflags@tag_itflags@( - NpyIter *iter, npy_intp *out_multi_index) -{ - const npy_uint32 itflags = @const_itflags@; - int idim, ndim = NIT_NDIM(iter); - int nop = NIT_NOP(iter); - - npy_intp sizeof_axisdata; - NpyIter_AxisData *axisdata; -#if !((@const_itflags@)&NPY_ITFLAG_IDENTPERM) - npy_int8 *perm = NIT_PERM(iter); -#endif - - axisdata = NIT_AXISDATA(iter); - sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop); -#if ((@const_itflags@)&NPY_ITFLAG_IDENTPERM) - out_multi_index += ndim-1; - for(idim = 0; idim < ndim; ++idim, --out_multi_index, - NIT_ADVANCE_AXISDATA(axisdata, 1)) { - *out_multi_index = NAD_INDEX(axisdata); - } -#elif !((@const_itflags@)&NPY_ITFLAG_NEGPERM) - for(idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { - npy_int8 p = perm[idim]; - out_multi_index[ndim-p-1] = NAD_INDEX(axisdata); - } -#else - for(idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { - npy_int8 p = perm[idim]; - if (p < 0) { - /* If the perm entry is negative, reverse the index */ - out_multi_index[ndim+p] = NAD_SHAPE(axisdata) - NAD_INDEX(axisdata) - 1; - } - else { - out_multi_index[ndim-p-1] = NAD_INDEX(axisdata); - } - } -#endif /* not ident perm */ -} -/**end repeat**/ - -/*NUMPY_API - * Compute a specialized get_multi_index function for the iterator - * - * If errmsg is non-NULL, it should point to a variable which will - * receive the error message, and no Python exception will be set. - * This is so that the function can be called from code not holding - * the GIL. - */ -NPY_NO_EXPORT NpyIter_GetMultiIndexFunc * -NpyIter_GetGetMultiIndex(NpyIter *iter, char **errmsg) -{ - npy_uint32 itflags = NIT_ITFLAGS(iter); - int ndim = NIT_NDIM(iter); - int nop = NIT_NOP(iter); - - /* These flags must be correct */ - if ((itflags&(NPY_ITFLAG_HASMULTIINDEX|NPY_ITFLAG_DELAYBUF)) != - NPY_ITFLAG_HASMULTIINDEX) { - if (!(itflags&NPY_ITFLAG_HASMULTIINDEX)) { - if (errmsg == NULL) { - PyErr_SetString(PyExc_ValueError, - "Cannot retrieve a GetMultiIndex function for an " - "iterator that doesn't track a multi-index."); - } - else { - *errmsg = "Cannot retrieve a GetMultiIndex function for an " - "iterator that doesn't track a multi-index."; - } - return NULL; - } - else { - if (errmsg == NULL) { - PyErr_SetString(PyExc_ValueError, - "Cannot retrieve a GetMultiIndex function for an " - "iterator that used DELAY_BUFALLOC before a Reset call"); - } - else { - *errmsg = "Cannot retrieve a GetMultiIndex function for an " - "iterator that used DELAY_BUFALLOC before a " - "Reset call"; - } - return NULL; - } - } - - /* - * Only these flags affect the iterator memory layout or - * the get_multi_index behavior. IDENTPERM and NEGPERM are mutually - * exclusive, so that reduces the number of cases slightly. - */ - itflags &= (NPY_ITFLAG_HASINDEX | - NPY_ITFLAG_IDENTPERM | - NPY_ITFLAG_NEGPERM | - NPY_ITFLAG_BUFFER); - - switch (itflags) { -/**begin repeat - * #const_itflags = 0, - * NPY_ITFLAG_HASINDEX, - * NPY_ITFLAG_IDENTPERM, - * NPY_ITFLAG_HASINDEX|NPY_ITFLAG_IDENTPERM, - * NPY_ITFLAG_NEGPERM, - * NPY_ITFLAG_HASINDEX|NPY_ITFLAG_NEGPERM, - * NPY_ITFLAG_BUFFER, - * NPY_ITFLAG_HASINDEX|NPY_ITFLAG_BUFFER, - * NPY_ITFLAG_IDENTPERM|NPY_ITFLAG_BUFFER, - * NPY_ITFLAG_HASINDEX|NPY_ITFLAG_IDENTPERM|NPY_ITFLAG_BUFFER, - * NPY_ITFLAG_NEGPERM|NPY_ITFLAG_BUFFER, - * NPY_ITFLAG_HASINDEX|NPY_ITFLAG_NEGPERM|NPY_ITFLAG_BUFFER# - * #tag_itflags = 0, IND, IDP, INDuIDP, NEGP, INDuNEGP, - * BUF, INDuBUF, IDPuBUF, INDuIDPuBUF, NEGPuBUF, INDuNEGPuBUF# - */ - case @const_itflags@: - return npyiter_get_multi_index_itflags@tag_itflags@; -/**end repeat**/ - } - /* The switch above should have caught all the possibilities. */ - if (errmsg == NULL) { - PyErr_Format(PyExc_ValueError, - "GetGetMultiIndex internal iterator error - unexpected " - "itflags/ndim/nop combination (%04x/%d/%d)", - (int)itflags, (int)ndim, (int)nop); - } - else { - *errmsg = "GetGetMultiIndex internal iterator error - unexpected " - "itflags/ndim/nop combination"; - } - return NULL; - -} - -#undef NPY_ITERATOR_IMPLEMENTATION_CODE diff --git a/numpy/core/src/multiarray/nditer_templ.c.src b/numpy/core/src/multiarray/nditer_templ.c.src new file mode 100644 index 000000000..59aae244b --- /dev/null +++ b/numpy/core/src/multiarray/nditer_templ.c.src @@ -0,0 +1,605 @@ +/* + * This file implements the API functions for NumPy's nditer that + * are specialized using the templating system. + * + * Copyright (c) 2010-2011 by Mark Wiebe (mwwiebe@gmail.com) + * The Univerity of British Columbia + * + * See LICENSE.txt for the license. + */ + +/* Indicate that this .c file is allowed to include the header */ +#define NPY_ITERATOR_IMPLEMENTATION_CODE +#include "nditer_impl.h" + +/* SPECIALIZED iternext functions that handle the non-buffering part */ + +/**begin repeat + * #const_itflags = 0, + * NPY_ITFLAG_HASINDEX, + * NPY_ITFLAG_EXLOOP, + * NPY_ITFLAG_RANGE, + * NPY_ITFLAG_RANGE|NPY_ITFLAG_HASINDEX# + * #tag_itflags = 0, IND, NOINN, RNG, RNGuIND# + */ +/**begin repeat1 + * #const_ndim = 1, 2, NPY_MAXDIMS# + * #tag_ndim = 1, 2, ANY# + */ +/**begin repeat2 + * #const_nop = 1, 2, NPY_MAXDIMS# + * #tag_nop = 1, 2, ANY# + */ + +/* Specialized iternext (@const_itflags@,@tag_ndim@,@tag_nop@) */ +static int +npyiter_iternext_itflags@tag_itflags@_dims@tag_ndim@_iters@tag_nop@( + NpyIter *iter) +{ +#if !(@const_itflags@&NPY_ITFLAG_EXLOOP) || (@const_ndim@ > 1) + const npy_uint32 itflags = @const_itflags@; +# if @const_ndim@ >= NPY_MAXDIMS + int idim, ndim = NIT_NDIM(iter); +# endif +# if @const_nop@ < NPY_MAXDIMS + const int nop = @const_nop@; +# else + int nop = NIT_NOP(iter); +# endif + + NpyIter_AxisData *axisdata0; + npy_intp istrides, nstrides = NAD_NSTRIDES(); +#endif +#if @const_ndim@ > 1 + NpyIter_AxisData *axisdata1; + npy_intp sizeof_axisdata; +#endif +#if @const_ndim@ > 2 + NpyIter_AxisData *axisdata2; +#endif + +#if (@const_itflags@&NPY_ITFLAG_RANGE) + /* When ranged iteration is enabled, use the iterindex */ + if (++NIT_ITERINDEX(iter) >= NIT_ITEREND(iter)) { + return 0; + } +#endif + +#if @const_ndim@ > 1 + sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop); +#endif + +# if !(@const_itflags@&NPY_ITFLAG_EXLOOP) || (@const_ndim@ > 1) + axisdata0 = NIT_AXISDATA(iter); +# endif +# if !(@const_itflags@&NPY_ITFLAG_EXLOOP) + /* Increment index 0 */ + NAD_INDEX(axisdata0)++; + /* Increment pointer 0 */ + for (istrides = 0; istrides < nstrides; ++istrides) { + NAD_PTRS(axisdata0)[istrides] += NAD_STRIDES(axisdata0)[istrides]; + } +# endif + +#if @const_ndim@ == 1 + +# if !(@const_itflags@&NPY_ITFLAG_EXLOOP) + /* Finished when the index equals the shape */ + return NAD_INDEX(axisdata0) < NAD_SHAPE(axisdata0); +# else + return 0; +# endif + +#else + +# if !(@const_itflags@&NPY_ITFLAG_EXLOOP) + if (NAD_INDEX(axisdata0) < NAD_SHAPE(axisdata0)) { + return 1; + } +# endif + + axisdata1 = NIT_INDEX_AXISDATA(axisdata0, 1); + /* Increment index 1 */ + NAD_INDEX(axisdata1)++; + /* Increment pointer 1 */ + for (istrides = 0; istrides < nstrides; ++istrides) { + NAD_PTRS(axisdata1)[istrides] += NAD_STRIDES(axisdata1)[istrides]; + } + + if (NAD_INDEX(axisdata1) < NAD_SHAPE(axisdata1)) { + /* Reset the 1st index to 0 */ + NAD_INDEX(axisdata0) = 0; + /* Reset the 1st pointer to the value of the 2nd */ + for (istrides = 0; istrides < nstrides; ++istrides) { + NAD_PTRS(axisdata0)[istrides] = NAD_PTRS(axisdata1)[istrides]; + } + return 1; + } + +# if @const_ndim@ == 2 + return 0; +# else + + axisdata2 = NIT_INDEX_AXISDATA(axisdata1, 1); + /* Increment index 2 */ + NAD_INDEX(axisdata2)++; + /* Increment pointer 2 */ + for (istrides = 0; istrides < nstrides; ++istrides) { + NAD_PTRS(axisdata2)[istrides] += NAD_STRIDES(axisdata2)[istrides]; + } + + if (NAD_INDEX(axisdata2) < NAD_SHAPE(axisdata2)) { + /* Reset the 1st and 2nd indices to 0 */ + NAD_INDEX(axisdata0) = 0; + NAD_INDEX(axisdata1) = 0; + /* Reset the 1st and 2nd pointers to the value of the 3nd */ + for (istrides = 0; istrides < nstrides; ++istrides) { + NAD_PTRS(axisdata0)[istrides] = NAD_PTRS(axisdata2)[istrides]; + NAD_PTRS(axisdata1)[istrides] = NAD_PTRS(axisdata2)[istrides]; + } + return 1; + } + + for (idim = 3; idim < ndim; ++idim) { + NIT_ADVANCE_AXISDATA(axisdata2, 1); + /* Increment the index */ + NAD_INDEX(axisdata2)++; + /* Increment the pointer */ + for (istrides = 0; istrides < nstrides; ++istrides) { + NAD_PTRS(axisdata2)[istrides] += NAD_STRIDES(axisdata2)[istrides]; + } + + + if (NAD_INDEX(axisdata2) < NAD_SHAPE(axisdata2)) { + /* Reset the indices and pointers of all previous axisdatas */ + axisdata1 = axisdata2; + do { + NIT_ADVANCE_AXISDATA(axisdata1, -1); + /* Reset the index to 0 */ + NAD_INDEX(axisdata1) = 0; + /* Reset the pointer to the updated value */ + for (istrides = 0; istrides < nstrides; ++istrides) { + NAD_PTRS(axisdata1)[istrides] = + NAD_PTRS(axisdata2)[istrides]; + } + } while (axisdata1 != axisdata0); + + return 1; + } + } + + return 0; + +# endif /* ndim != 2 */ + +#endif /* ndim != 1 */ +} + +/**end repeat2**/ +/**end repeat1**/ +/**end repeat**/ + + +/**begin repeat + * #const_nop = 1, 2, 3, 4, NPY_MAXDIMS# + * #tag_nop = 1, 2, 3, 4, ANY# + */ + +/* + * Iternext function that handles the reduction buffering part. This + * is done with a double loop to avoid frequent re-buffering. + */ +static int +npyiter_buffered_reduce_iternext_iters@tag_nop@(NpyIter *iter) +{ + npy_uint32 itflags = NIT_ITFLAGS(iter); + /*int ndim = NIT_NDIM(iter);*/ +#if @const_nop@ >= NPY_MAXDIMS + int nop = NIT_NOP(iter); +#else + const int nop = @const_nop@; +#endif + + int iop; + + NpyIter_AxisData *axisdata; + NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter); + char **ptrs; + char *prev_dataptrs[NPY_MAXARGS]; + + ptrs = NBF_PTRS(bufferdata); + + /* + * If the iterator handles the inner loop, need to increment all + * the indices and pointers + */ + if (!(itflags&NPY_ITFLAG_EXLOOP)) { + /* Increment within the buffer */ + if (++NIT_ITERINDEX(iter) < NBF_BUFITEREND(bufferdata)) { + npy_intp *strides; + + strides = NBF_STRIDES(bufferdata); + for (iop = 0; iop < nop; ++iop) { + ptrs[iop] += strides[iop]; + } + return 1; + } + } + else { + NIT_ITERINDEX(iter) += NBF_SIZE(bufferdata); + } + + NPY_IT_DBG_PRINT1("Iterator: Finished iteration %d of outer reduce loop\n", + (int)NBF_REDUCE_POS(bufferdata)); + /* The outer increment for the reduce double loop */ + if (++NBF_REDUCE_POS(bufferdata) < NBF_REDUCE_OUTERSIZE(bufferdata)) { + npy_intp *reduce_outerstrides = NBF_REDUCE_OUTERSTRIDES(bufferdata); + char **reduce_outerptrs = NBF_REDUCE_OUTERPTRS(bufferdata); + for (iop = 0; iop < nop; ++iop) { + char *ptr = reduce_outerptrs[iop] + reduce_outerstrides[iop]; + ptrs[iop] = ptr; + reduce_outerptrs[iop] = ptr; + } + NBF_BUFITEREND(bufferdata) = NIT_ITERINDEX(iter) + NBF_SIZE(bufferdata); + return 1; + } + + /* Save the previously used data pointers */ + axisdata = NIT_AXISDATA(iter); + memcpy(prev_dataptrs, NAD_PTRS(axisdata), NPY_SIZEOF_INTP*nop); + + /* Write back to the arrays */ + npyiter_copy_from_buffers(iter); + + /* Check if we're past the end */ + if (NIT_ITERINDEX(iter) >= NIT_ITEREND(iter)) { + NBF_SIZE(bufferdata) = 0; + return 0; + } + /* Increment to the next buffer */ + else { + npyiter_goto_iterindex(iter, NIT_ITERINDEX(iter)); + } + + /* Prepare the next buffers and set iterend/size */ + npyiter_copy_to_buffers(iter, prev_dataptrs); + + return 1; +} + +/**end repeat**/ + +/* iternext function that handles the buffering part */ +static int +npyiter_buffered_iternext(NpyIter *iter) +{ + npy_uint32 itflags = NIT_ITFLAGS(iter); + /*int ndim = NIT_NDIM(iter);*/ + int nop = NIT_NOP(iter); + + NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter); + + /* + * If the iterator handles the inner loop, need to increment all + * the indices and pointers + */ + if (!(itflags&NPY_ITFLAG_EXLOOP)) { + /* Increment within the buffer */ + if (++NIT_ITERINDEX(iter) < NBF_BUFITEREND(bufferdata)) { + int iop; + npy_intp *strides; + char **ptrs; + + strides = NBF_STRIDES(bufferdata); + ptrs = NBF_PTRS(bufferdata); + for (iop = 0; iop < nop; ++iop) { + ptrs[iop] += strides[iop]; + } + return 1; + } + } + else { + NIT_ITERINDEX(iter) += NBF_SIZE(bufferdata); + } + + /* Write back to the arrays */ + npyiter_copy_from_buffers(iter); + + /* Check if we're past the end */ + if (NIT_ITERINDEX(iter) >= NIT_ITEREND(iter)) { + NBF_SIZE(bufferdata) = 0; + return 0; + } + /* Increment to the next buffer */ + else { + npyiter_goto_iterindex(iter, NIT_ITERINDEX(iter)); + } + + /* Prepare the next buffers and set iterend/size */ + npyiter_copy_to_buffers(iter, NULL); + + return 1; +} + +/**end repeat2**/ +/**end repeat1**/ +/**end repeat**/ + +/* Specialization of iternext for when the iteration size is 1 */ +static int +npyiter_iternext_sizeone(NpyIter *iter) +{ + return 0; +} + +/*NUMPY_API + * Compute the specialized iteration function for an iterator + * + * If errmsg is non-NULL, it should point to a variable which will + * receive the error message, and no Python exception will be set. + * This is so that the function can be called from code not holding + * the GIL. + */ +NPY_NO_EXPORT NpyIter_IterNextFunc * +NpyIter_GetIterNext(NpyIter *iter, char **errmsg) +{ + npy_uint32 itflags = NIT_ITFLAGS(iter); + int ndim = NIT_NDIM(iter); + int nop = NIT_NOP(iter); + + /* + * When there is just one iteration and buffering is disabled + * the iternext function is very simple. + */ + if (itflags&NPY_ITFLAG_ONEITERATION) { + return &npyiter_iternext_sizeone; + } + + /* + * If buffering is enabled. + */ + if (itflags&NPY_ITFLAG_BUFFER) { + if (itflags&NPY_ITFLAG_REDUCE) { + switch (nop) { + case 1: + return &npyiter_buffered_reduce_iternext_iters1; + case 2: + return &npyiter_buffered_reduce_iternext_iters2; + case 3: + return &npyiter_buffered_reduce_iternext_iters3; + case 4: + return &npyiter_buffered_reduce_iternext_iters4; + default: + return &npyiter_buffered_reduce_iternext_itersANY; + } + } + else { + return &npyiter_buffered_iternext; + } + } + + /* + * Ignore all the flags that don't affect the iterator memory + * layout or the iternext function. Currently only HASINDEX, + * EXLOOP, and RANGE affect them here. + */ + itflags &= (NPY_ITFLAG_HASINDEX|NPY_ITFLAG_EXLOOP|NPY_ITFLAG_RANGE); + + /* Switch statements let the compiler optimize this most effectively */ + switch (itflags) { + /* + * The combinations HASINDEX|EXLOOP and RANGE|EXLOOP are excluded + * by the New functions + */ +/**begin repeat + * #const_itflags = 0, + * NPY_ITFLAG_HASINDEX, + * NPY_ITFLAG_EXLOOP, + * NPY_ITFLAG_RANGE, + * NPY_ITFLAG_RANGE|NPY_ITFLAG_HASINDEX# + * #tag_itflags = 0, IND, NOINN, RNG, RNGuIND# + */ + case @const_itflags@: + switch (ndim) { +/**begin repeat1 + * #const_ndim = 1, 2# + * #tag_ndim = 1, 2# + */ + case @const_ndim@: + switch (nop) { +/**begin repeat2 + * #const_nop = 1, 2# + * #tag_nop = 1, 2# + */ + case @const_nop@: + return &npyiter_iternext_itflags@tag_itflags@_dims@tag_ndim@_iters@tag_nop@; +/**end repeat2**/ + /* Not specialized on nop */ + default: + return &npyiter_iternext_itflags@tag_itflags@_dims@tag_ndim@_itersANY; + } +/**end repeat1**/ + /* Not specialized on ndim */ + default: + switch (nop) { +/**begin repeat1 + * #const_nop = 1, 2# + * #tag_nop = 1, 2# + */ + case @const_nop@: + return &npyiter_iternext_itflags@tag_itflags@_dimsANY_iters@tag_nop@; +/**end repeat1**/ + /* Not specialized on nop */ + default: + return &npyiter_iternext_itflags@tag_itflags@_dimsANY_itersANY; + } + } +/**end repeat**/ + } + /* The switch above should have caught all the possibilities. */ + if (errmsg == NULL) { + PyErr_Format(PyExc_ValueError, + "GetIterNext internal iterator error - unexpected " + "itflags/ndim/nop combination (%04x/%d/%d)", + (int)itflags, (int)ndim, (int)nop); + } + else { + *errmsg = "GetIterNext internal iterator error - unexpected " + "itflags/ndim/nop combination"; + } + return NULL; +} + + +/* SPECIALIZED getindex functions */ + +/**begin repeat + * #const_itflags = 0, + * NPY_ITFLAG_HASINDEX, + * NPY_ITFLAG_IDENTPERM, + * NPY_ITFLAG_HASINDEX|NPY_ITFLAG_IDENTPERM, + * NPY_ITFLAG_NEGPERM, + * NPY_ITFLAG_HASINDEX|NPY_ITFLAG_NEGPERM, + * NPY_ITFLAG_BUFFER, + * NPY_ITFLAG_HASINDEX|NPY_ITFLAG_BUFFER, + * NPY_ITFLAG_IDENTPERM|NPY_ITFLAG_BUFFER, + * NPY_ITFLAG_HASINDEX|NPY_ITFLAG_IDENTPERM|NPY_ITFLAG_BUFFER, + * NPY_ITFLAG_NEGPERM|NPY_ITFLAG_BUFFER, + * NPY_ITFLAG_HASINDEX|NPY_ITFLAG_NEGPERM|NPY_ITFLAG_BUFFER# + * #tag_itflags = 0, IND, IDP, INDuIDP, NEGP, INDuNEGP, + * BUF, INDuBUF, IDPuBUF, INDuIDPuBUF, NEGPuBUF, INDuNEGPuBUF# + */ +static void +npyiter_get_multi_index_itflags@tag_itflags@( + NpyIter *iter, npy_intp *out_multi_index) +{ + const npy_uint32 itflags = @const_itflags@; + int idim, ndim = NIT_NDIM(iter); + int nop = NIT_NOP(iter); + + npy_intp sizeof_axisdata; + NpyIter_AxisData *axisdata; +#if !((@const_itflags@)&NPY_ITFLAG_IDENTPERM) + npy_int8 *perm = NIT_PERM(iter); +#endif + + axisdata = NIT_AXISDATA(iter); + sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop); +#if ((@const_itflags@)&NPY_ITFLAG_IDENTPERM) + out_multi_index += ndim-1; + for(idim = 0; idim < ndim; ++idim, --out_multi_index, + NIT_ADVANCE_AXISDATA(axisdata, 1)) { + *out_multi_index = NAD_INDEX(axisdata); + } +#elif !((@const_itflags@)&NPY_ITFLAG_NEGPERM) + for(idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { + npy_int8 p = perm[idim]; + out_multi_index[ndim-p-1] = NAD_INDEX(axisdata); + } +#else + for(idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { + npy_int8 p = perm[idim]; + if (p < 0) { + /* If the perm entry is negative, reverse the index */ + out_multi_index[ndim+p] = NAD_SHAPE(axisdata) - NAD_INDEX(axisdata) - 1; + } + else { + out_multi_index[ndim-p-1] = NAD_INDEX(axisdata); + } + } +#endif /* not ident perm */ +} +/**end repeat**/ + +/*NUMPY_API + * Compute a specialized get_multi_index function for the iterator + * + * If errmsg is non-NULL, it should point to a variable which will + * receive the error message, and no Python exception will be set. + * This is so that the function can be called from code not holding + * the GIL. + */ +NPY_NO_EXPORT NpyIter_GetMultiIndexFunc * +NpyIter_GetGetMultiIndex(NpyIter *iter, char **errmsg) +{ + npy_uint32 itflags = NIT_ITFLAGS(iter); + int ndim = NIT_NDIM(iter); + int nop = NIT_NOP(iter); + + /* These flags must be correct */ + if ((itflags&(NPY_ITFLAG_HASMULTIINDEX|NPY_ITFLAG_DELAYBUF)) != + NPY_ITFLAG_HASMULTIINDEX) { + if (!(itflags&NPY_ITFLAG_HASMULTIINDEX)) { + if (errmsg == NULL) { + PyErr_SetString(PyExc_ValueError, + "Cannot retrieve a GetMultiIndex function for an " + "iterator that doesn't track a multi-index."); + } + else { + *errmsg = "Cannot retrieve a GetMultiIndex function for an " + "iterator that doesn't track a multi-index."; + } + return NULL; + } + else { + if (errmsg == NULL) { + PyErr_SetString(PyExc_ValueError, + "Cannot retrieve a GetMultiIndex function for an " + "iterator that used DELAY_BUFALLOC before a Reset call"); + } + else { + *errmsg = "Cannot retrieve a GetMultiIndex function for an " + "iterator that used DELAY_BUFALLOC before a " + "Reset call"; + } + return NULL; + } + } + + /* + * Only these flags affect the iterator memory layout or + * the get_multi_index behavior. IDENTPERM and NEGPERM are mutually + * exclusive, so that reduces the number of cases slightly. + */ + itflags &= (NPY_ITFLAG_HASINDEX | + NPY_ITFLAG_IDENTPERM | + NPY_ITFLAG_NEGPERM | + NPY_ITFLAG_BUFFER); + + switch (itflags) { +/**begin repeat + * #const_itflags = 0, + * NPY_ITFLAG_HASINDEX, + * NPY_ITFLAG_IDENTPERM, + * NPY_ITFLAG_HASINDEX|NPY_ITFLAG_IDENTPERM, + * NPY_ITFLAG_NEGPERM, + * NPY_ITFLAG_HASINDEX|NPY_ITFLAG_NEGPERM, + * NPY_ITFLAG_BUFFER, + * NPY_ITFLAG_HASINDEX|NPY_ITFLAG_BUFFER, + * NPY_ITFLAG_IDENTPERM|NPY_ITFLAG_BUFFER, + * NPY_ITFLAG_HASINDEX|NPY_ITFLAG_IDENTPERM|NPY_ITFLAG_BUFFER, + * NPY_ITFLAG_NEGPERM|NPY_ITFLAG_BUFFER, + * NPY_ITFLAG_HASINDEX|NPY_ITFLAG_NEGPERM|NPY_ITFLAG_BUFFER# + * #tag_itflags = 0, IND, IDP, INDuIDP, NEGP, INDuNEGP, + * BUF, INDuBUF, IDPuBUF, INDuIDPuBUF, NEGPuBUF, INDuNEGPuBUF# + */ + case @const_itflags@: + return npyiter_get_multi_index_itflags@tag_itflags@; +/**end repeat**/ + } + /* The switch above should have caught all the possibilities. */ + if (errmsg == NULL) { + PyErr_Format(PyExc_ValueError, + "GetGetMultiIndex internal iterator error - unexpected " + "itflags/ndim/nop combination (%04x/%d/%d)", + (int)itflags, (int)ndim, (int)nop); + } + else { + *errmsg = "GetGetMultiIndex internal iterator error - unexpected " + "itflags/ndim/nop combination"; + } + return NULL; + +} + +#undef NPY_ITERATOR_IMPLEMENTATION_CODE -- cgit v1.2.1