diff options
author | Mark Wiebe <mwiebe@enthought.com> | 2011-07-07 12:04:53 -0500 |
---|---|---|
committer | Charles Harris <charlesr.harris@gmail.com> | 2011-07-07 11:54:06 -0600 |
commit | 36f4bdfbe464259540208a5d7d927ec656524757 (patch) | |
tree | 4c1d65a0d854074a281476af3983ad06a0dc9a62 /numpy | |
parent | e124ac52c2824332fa5b32ba92205012d71a1e8f (diff) | |
download | numpy-36f4bdfbe464259540208a5d7d927ec656524757.tar.gz |
ENH: nditer: Move construction/copy/destruction to its own implementation file
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/core/SConscript | 1 | ||||
-rw-r--r-- | numpy/core/code_generators/genapi.py | 1 | ||||
-rw-r--r-- | numpy/core/setup.py | 46 | ||||
-rw-r--r-- | numpy/core/src/multiarray/nditer.c.src | 3238 | ||||
-rw-r--r-- | numpy/core/src/multiarray/nditer_constr.c | 2956 | ||||
-rw-r--r-- | numpy/core/src/multiarray/nditer_impl.h | 301 |
6 files changed, 3294 insertions, 3249 deletions
diff --git a/numpy/core/SConscript b/numpy/core/SConscript index ca630254e..802b8aa2b 100644 --- a/numpy/core/SConscript +++ b/numpy/core/SConscript @@ -469,6 +469,7 @@ if ENABLE_SEPARATE_COMPILATION: pjoin('src', 'multiarray', 'buffer.c'), pjoin('src', 'multiarray', 'numpymemoryview.c'), pjoin('src', 'multiarray', 'scalarapi.c'), + pjoin('src', 'multiarray', 'nditer_constr.c'), pjoin('src', 'multiarray', 'nditer_pywrap.c'), pjoin('src', 'multiarray', 'dtype_transfer.c')] multiarray_src.extend(arraytypes_src) diff --git a/numpy/core/code_generators/genapi.py b/numpy/core/code_generators/genapi.py index b4d651b1d..23b85f9fe 100644 --- a/numpy/core/code_generators/genapi.py +++ b/numpy/core/code_generators/genapi.py @@ -48,6 +48,7 @@ API_FILES = [join('multiarray', 'methods.c'), join('multiarray', 'datetime_busday.c'), join('multiarray', 'datetime_busdaycal.c'), join('multiarray', 'nditer.c.src'), + join('multiarray', 'nditer_constr.c'), join('multiarray', 'nditer_pywrap.c'), join('multiarray', 'einsum.c.src'), join('umath', 'ufunc_object.c'), diff --git a/numpy/core/setup.py b/numpy/core/setup.py index 80f84d29f..38ef079b0 100644 --- a/numpy/core/setup.py +++ b/numpy/core/setup.py @@ -714,6 +714,7 @@ def configuration(parent_package='',top_path=None): join('src', 'multiarray', 'mapping.h'), join('src', 'multiarray', 'methods.h'), join('src', 'multiarray', 'multiarraymodule.h'), + join('src', 'multiarray', 'nditer_impl.h'), join('src', 'multiarray', 'numpymemoryview.h'), join('src', 'multiarray', 'number.h'), join('src', 'multiarray', 'numpyos.h'), @@ -726,42 +727,43 @@ def configuration(parent_package='',top_path=None): join('src', 'private', 'lowlevel_strided_loops.h')] multiarray_src = [ - join('src', 'multiarray', 'multiarraymodule.c'), - join('src', 'multiarray', 'hashdescr.c'), join('src', 'multiarray', 'arrayobject.c'), - join('src', 'multiarray', 'numpymemoryview.c'), + join('src', 'multiarray', 'arraytypes.c.src'), join('src', 'multiarray', 'buffer.c'), + join('src', 'multiarray', 'calculation.c'), + join('src', 'multiarray', 'common.c'), + join('src', 'multiarray', 'convert.c'), + join('src', 'multiarray', 'convert_datatype.c'), + join('src', 'multiarray', 'conversion_utils.c'), + join('src', 'multiarray', 'ctors.c'), join('src', 'multiarray', 'datetime.c'), join('src', 'multiarray', 'datetime_strings.c'), join('src', 'multiarray', 'datetime_busday.c'), join('src', 'multiarray', 'datetime_busdaycal.c'), - join('src', 'multiarray', 'numpyos.c'), - join('src', 'multiarray', 'conversion_utils.c'), - join('src', 'multiarray', 'flagsobject.c'), join('src', 'multiarray', 'descriptor.c'), + join('src', 'multiarray', 'dtype_transfer.c'), + join('src', 'multiarray', 'einsum.c.src'), + join('src', 'multiarray', 'flagsobject.c'), + join('src', 'multiarray', 'getset.c'), + join('src', 'multiarray', 'hashdescr.c'), + join('src', 'multiarray', 'item_selection.c'), join('src', 'multiarray', 'iterators.c'), + join('src', 'multiarray', 'lowlevel_strided_loops.c.src'), join('src', 'multiarray', 'mapping.c'), + join('src', 'multiarray', 'methods.c'), + join('src', 'multiarray', 'multiarraymodule.c'), + join('src', 'multiarray', 'nditer.c.src'), + join('src', 'multiarray', 'nditer_constr.c'), + join('src', 'multiarray', 'nditer_pywrap.c'), join('src', 'multiarray', 'number.c'), - join('src', 'multiarray', 'getset.c'), + join('src', 'multiarray', 'numpymemoryview.c'), + join('src', 'multiarray', 'numpyos.c'), + join('src', 'multiarray', 'refcount.c'), join('src', 'multiarray', 'sequence.c'), - join('src', 'multiarray', 'methods.c'), - join('src', 'multiarray', 'ctors.c'), - join('src', 'multiarray', 'convert_datatype.c'), - join('src', 'multiarray', 'convert.c'), join('src', 'multiarray', 'shape.c'), - join('src', 'multiarray', 'item_selection.c'), - join('src', 'multiarray', 'calculation.c'), - join('src', 'multiarray', 'common.c'), - join('src', 'multiarray', 'usertypes.c'), join('src', 'multiarray', 'scalarapi.c'), - join('src', 'multiarray', 'refcount.c'), - join('src', 'multiarray', 'arraytypes.c.src'), join('src', 'multiarray', 'scalartypes.c.src'), - join('src', 'multiarray', 'nditer.c.src'), - join('src', 'multiarray', 'lowlevel_strided_loops.c.src'), - join('src', 'multiarray', 'dtype_transfer.c'), - join('src', 'multiarray', 'nditer_pywrap.c'), - join('src', 'multiarray', 'einsum.c.src')] + join('src', 'multiarray', 'usertypes.c')] if PYTHON_HAS_UNICODE_WIDE: multiarray_src.append(join('src', 'multiarray', 'ucsnarrow.c')) diff --git a/numpy/core/src/multiarray/nditer.c.src b/numpy/core/src/multiarray/nditer.c.src index d8758bdfa..991426f1c 100644 --- a/numpy/core/src/multiarray/nditer.c.src +++ b/numpy/core/src/multiarray/nditer.c.src @@ -7,962 +7,17 @@ * See LICENSE.txt for the license. */ -#define PY_SSIZE_T_CLEAN -#include "Python.h" -#include "structmember.h" - -#define NPY_NO_DEPRECATED_API -#define _MULTIARRAYMODULE -#include <numpy/ndarrayobject.h> -#include <numpy/npy_3kcompat.h> -#include "convert_datatype.h" - -#include "lowlevel_strided_loops.h" - -/********** ITERATOR CONSTRUCTION TIMING **************/ -#define NPY_IT_CONSTRUCTION_TIMING 0 - -#if NPY_IT_CONSTRUCTION_TIMING -#define NPY_IT_TIME_POINT(var) { \ - unsigned int hi, lo; \ - __asm__ __volatile__ ( \ - "rdtsc" \ - : "=d" (hi), "=a" (lo)); \ - var = (((unsigned long long)hi) << 32) | lo; \ - } -#define NPY_IT_PRINT_TIME_START(var) { \ - printf("%30s: start\n", #var); \ - c_temp = var; \ - } -#define NPY_IT_PRINT_TIME_VAR(var) { \ - printf("%30s: %6.0f clocks\n", #var, \ - ((double)(var-c_temp))); \ - c_temp = var; \ - } -#else -#define NPY_IT_TIME_POINT(var) -#endif - -/******************************************************/ - -/********** PRINTF DEBUG TRACING **************/ -#define NPY_IT_DBG_TRACING 0 - -#if NPY_IT_DBG_TRACING -#define NPY_IT_DBG_PRINT(s) printf("%s", s) -#define NPY_IT_DBG_PRINT1(s, p1) printf(s, p1) -#define NPY_IT_DBG_PRINT2(s, p1, p2) printf(s, p1, p2) -#define NPY_IT_DBG_PRINT3(s, p1, p2, p3) printf(s, p1, p2, p3) -#else -#define NPY_IT_DBG_PRINT(s) -#define NPY_IT_DBG_PRINT1(s, p1) -#define NPY_IT_DBG_PRINT2(s, p1, p2) -#define NPY_IT_DBG_PRINT3(s, p1, p2, p3) -#endif -/**********************************************/ - -/* Rounds up a number of bytes to be divisible by sizeof intp */ -#if NPY_SIZEOF_INTP == 4 -#define NPY_INTP_ALIGNED(size) ((size + 0x3)&(-0x4)) -#else -#define NPY_INTP_ALIGNED(size) ((size + 0x7)&(-0x8)) -#endif - -/* Internal iterator flags */ - -/* The perm is the identity */ -#define NPY_ITFLAG_IDENTPERM 0x0001 -/* The perm has negative entries (indicating flipped axes) */ -#define NPY_ITFLAG_NEGPERM 0x0002 -/* The iterator is tracking an index */ -#define NPY_ITFLAG_HASINDEX 0x0004 -/* The iterator is tracking a multi-index */ -#define NPY_ITFLAG_HASMULTIINDEX 0x0008 -/* The iteration order was forced on construction */ -#define NPY_ITFLAG_FORCEDORDER 0x0010 -/* The inner loop is handled outside the iterator */ -#define NPY_ITFLAG_EXLOOP 0x0020 -/* The iterator is ranged */ -#define NPY_ITFLAG_RANGE 0x0040 -/* The iterator is buffered */ -#define NPY_ITFLAG_BUFFER 0x0080 -/* The iterator should grow the buffered inner loop when possible */ -#define NPY_ITFLAG_GROWINNER 0x0100 -/* There is just one iteration, can specialize iternext for that */ -#define NPY_ITFLAG_ONEITERATION 0x0200 -/* Delay buffer allocation until first Reset* call */ -#define NPY_ITFLAG_DELAYBUF 0x0400 -/* Iteration needs API access during iternext */ -#define NPY_ITFLAG_NEEDSAPI 0x0800 -/* Iteration includes one or more operands being reduced */ -#define NPY_ITFLAG_REDUCE 0x1000 -/* Reduce iteration doesn't need to recalculate reduce loops next time */ -#define NPY_ITFLAG_REUSE_REDUCE_LOOPS 0x2000 - -/* Internal iterator per-operand iterator flags */ - -/* The operand will be written to */ -#define NPY_OP_ITFLAG_WRITE 0x01 -/* The operand will be read from */ -#define NPY_OP_ITFLAG_READ 0x02 -/* The operand needs type conversion/byte swapping/alignment */ -#define NPY_OP_ITFLAG_CAST 0x04 -/* The operand never needs buffering */ -#define NPY_OP_ITFLAG_BUFNEVER 0x08 -/* The operand is aligned */ -#define NPY_OP_ITFLAG_ALIGNED 0x10 -/* The operand is being reduced */ -#define NPY_OP_ITFLAG_REDUCE 0x20 - -/* - * The data layout of the iterator is fully specified by - * a triple (itflags, ndim, nop). These three variables - * are expected to exist in all functions calling these macros, - * either as true variables initialized to the correct values - * from the iterator, or as constants in the case of specialized - * functions such as the various iternext functions. - */ - -struct NpyIter_InternalOnly { - /* Initial fixed position data */ - npy_uint32 itflags; - npy_uint16 ndim, nop; - npy_intp itersize, iterstart, iterend; - /* iterindex is only used if RANGED or BUFFERED is set */ - npy_intp iterindex; - /* The rest is variable */ - char iter_flexdata; -}; - -typedef struct NpyIter_AD NpyIter_AxisData; -typedef struct NpyIter_BD NpyIter_BufferData; - -/* Byte sizes of the iterator members */ -#define NIT_PERM_SIZEOF(itflags, ndim, nop) \ - NPY_INTP_ALIGNED(NPY_MAXDIMS) -#define NIT_DTYPES_SIZEOF(itflags, ndim, nop) \ - ((NPY_SIZEOF_INTP)*(nop)) -#define NIT_RESETDATAPTR_SIZEOF(itflags, ndim, nop) \ - ((NPY_SIZEOF_INTP)*(nop+1)) -#define NIT_BASEOFFSETS_SIZEOF(itflags, ndim, nop) \ - ((NPY_SIZEOF_INTP)*(nop+1)) -#define NIT_OPERANDS_SIZEOF(itflags, ndim, nop) \ - ((NPY_SIZEOF_INTP)*(nop)) -#define NIT_OPITFLAGS_SIZEOF(itflags, ndim, nop) \ - (NPY_INTP_ALIGNED(nop)) -#define NIT_BUFFERDATA_SIZEOF(itflags, ndim, nop) \ - ((itflags&NPY_ITFLAG_BUFFER) ? ((NPY_SIZEOF_INTP)*(6 + 9*nop)) : 0) - -/* Byte offsets of the iterator members starting from iter->iter_flexdata */ -#define NIT_PERM_OFFSET() \ - (0) -#define NIT_DTYPES_OFFSET(itflags, ndim, nop) \ - (NIT_PERM_OFFSET() + \ - NIT_PERM_SIZEOF(itflags, ndim, nop)) -#define NIT_RESETDATAPTR_OFFSET(itflags, ndim, nop) \ - (NIT_DTYPES_OFFSET(itflags, ndim, nop) + \ - NIT_DTYPES_SIZEOF(itflags, ndim, nop)) -#define NIT_BASEOFFSETS_OFFSET(itflags, ndim, nop) \ - (NIT_RESETDATAPTR_OFFSET(itflags, ndim, nop) + \ - NIT_RESETDATAPTR_SIZEOF(itflags, ndim, nop)) -#define NIT_OPERANDS_OFFSET(itflags, ndim, nop) \ - (NIT_BASEOFFSETS_OFFSET(itflags, ndim, nop) + \ - NIT_BASEOFFSETS_SIZEOF(itflags, ndim, nop)) -#define NIT_OPITFLAGS_OFFSET(itflags, ndim, nop) \ - (NIT_OPERANDS_OFFSET(itflags, ndim, nop) + \ - NIT_OPERANDS_SIZEOF(itflags, ndim, nop)) -#define NIT_BUFFERDATA_OFFSET(itflags, ndim, nop) \ - (NIT_OPITFLAGS_OFFSET(itflags, ndim, nop) + \ - NIT_OPITFLAGS_SIZEOF(itflags, ndim, nop)) -#define NIT_AXISDATA_OFFSET(itflags, ndim, nop) \ - (NIT_BUFFERDATA_OFFSET(itflags, ndim, nop) + \ - NIT_BUFFERDATA_SIZEOF(itflags, ndim, nop)) - -/* Internal-only ITERATOR DATA MEMBER ACCESS */ -#define NIT_ITFLAGS(iter) \ - ((iter)->itflags) -#define NIT_NDIM(iter) \ - ((iter)->ndim) -#define NIT_NOP(iter) \ - ((iter)->nop) -#define NIT_ITERSIZE(iter) \ - (iter->itersize) -#define NIT_ITERSTART(iter) \ - (iter->iterstart) -#define NIT_ITEREND(iter) \ - (iter->iterend) -#define NIT_ITERINDEX(iter) \ - (iter->iterindex) -#define NIT_PERM(iter) ((npy_int8 *)( \ - &(iter)->iter_flexdata + NIT_PERM_OFFSET())) -#define NIT_DTYPES(iter) ((PyArray_Descr **)( \ - &(iter)->iter_flexdata + NIT_DTYPES_OFFSET(itflags, ndim, nop))) -#define NIT_RESETDATAPTR(iter) ((char **)( \ - &(iter)->iter_flexdata + NIT_RESETDATAPTR_OFFSET(itflags, ndim, nop))) -#define NIT_BASEOFFSETS(iter) ((npy_intp *)( \ - &(iter)->iter_flexdata + NIT_BASEOFFSETS_OFFSET(itflags, ndim, nop))) -#define NIT_OPERANDS(iter) ((PyArrayObject **)( \ - &(iter)->iter_flexdata + NIT_OPERANDS_OFFSET(itflags, ndim, nop))) -#define NIT_OPITFLAGS(iter) ( \ - &(iter)->iter_flexdata + NIT_OPITFLAGS_OFFSET(itflags, ndim, nop)) -#define NIT_BUFFERDATA(iter) ((NpyIter_BufferData *)( \ - &(iter)->iter_flexdata + NIT_BUFFERDATA_OFFSET(itflags, ndim, nop))) -#define NIT_AXISDATA(iter) ((NpyIter_AxisData *)( \ - &(iter)->iter_flexdata + NIT_AXISDATA_OFFSET(itflags, ndim, nop))) - -/* Internal-only BUFFERDATA MEMBER ACCESS */ -struct NpyIter_BD { - npy_intp buffersize, size, bufiterend, - reduce_pos, reduce_outersize, reduce_outerdim; - npy_intp bd_flexdata; -}; -#define NBF_BUFFERSIZE(bufferdata) ((bufferdata)->buffersize) -#define NBF_SIZE(bufferdata) ((bufferdata)->size) -#define NBF_BUFITEREND(bufferdata) ((bufferdata)->bufiterend) -#define NBF_REDUCE_POS(bufferdata) ((bufferdata)->reduce_pos) -#define NBF_REDUCE_OUTERSIZE(bufferdata) ((bufferdata)->reduce_outersize) -#define NBF_REDUCE_OUTERDIM(bufferdata) ((bufferdata)->reduce_outerdim) -#define NBF_STRIDES(bufferdata) ( \ - &(bufferdata)->bd_flexdata + 0) -#define NBF_PTRS(bufferdata) ((char **) \ - (&(bufferdata)->bd_flexdata + 1*(nop))) -#define NBF_REDUCE_OUTERSTRIDES(bufferdata) ( \ - (&(bufferdata)->bd_flexdata + 2*(nop))) -#define NBF_REDUCE_OUTERPTRS(bufferdata) ((char **) \ - (&(bufferdata)->bd_flexdata + 3*(nop))) -#define NBF_READTRANSFERFN(bufferdata) ((PyArray_StridedTransferFn **) \ - (&(bufferdata)->bd_flexdata + 4*(nop))) -#define NBF_READTRANSFERDATA(bufferdata) ((NpyAuxData **) \ - (&(bufferdata)->bd_flexdata + 5*(nop))) -#define NBF_WRITETRANSFERFN(bufferdata) ((PyArray_StridedTransferFn **) \ - (&(bufferdata)->bd_flexdata + 6*(nop))) -#define NBF_WRITETRANSFERDATA(bufferdata) ((NpyAuxData **) \ - (&(bufferdata)->bd_flexdata + 7*(nop))) -#define NBF_BUFFERS(bufferdata) ((char **) \ - (&(bufferdata)->bd_flexdata + 8*(nop))) - -/* Internal-only AXISDATA MEMBER ACCESS. */ -struct NpyIter_AD { - npy_intp shape, index; - npy_intp ad_flexdata; -}; -#define NAD_SHAPE(axisdata) ((axisdata)->shape) -#define NAD_INDEX(axisdata) ((axisdata)->index) -#define NAD_STRIDES(axisdata) ( \ - &(axisdata)->ad_flexdata + 0) -#define NAD_PTRS(axisdata) ((char **) \ - &(axisdata)->ad_flexdata + 1*(nop+1)) - -#define NAD_NSTRIDES() \ - ((nop) + ((itflags&NPY_ITFLAG_HASINDEX) ? 1 : 0)) - -/* Size of one AXISDATA struct within the iterator */ -#define NIT_AXISDATA_SIZEOF(itflags, ndim, nop) (( \ - /* intp shape */ \ - 1 + \ - /* intp index */ \ - 1 + \ - /* intp stride[nop+1] AND char* ptr[nop+1] */ \ - 2*((nop)+1) \ - )*NPY_SIZEOF_INTP ) - -/* - * Macro to advance an AXISDATA pointer by a specified count. - * Requires that sizeof_axisdata be previously initialized - * to NIT_AXISDATA_SIZEOF(itflags, ndim, nop). - */ -#define NIT_INDEX_AXISDATA(axisdata, index) ((NpyIter_AxisData *) \ - (((char *)(axisdata)) + (index)*sizeof_axisdata)) -#define NIT_ADVANCE_AXISDATA(axisdata, count) \ - axisdata = NIT_INDEX_AXISDATA(axisdata, count) - -/* Size of the whole iterator */ -#define NIT_SIZEOF_ITERATOR(itflags, ndim, nop) ( \ - sizeof(struct NpyIter_InternalOnly) + \ - NIT_AXISDATA_OFFSET(itflags, ndim, nop) + \ - NIT_AXISDATA_SIZEOF(itflags, ndim, nop)*(ndim)) - -/* Internal helper functions */ -static int -npyiter_check_global_flags(npy_uint32 flags, npy_uint32* itflags); -static int -npyiter_check_op_axes(int nop, int oa_ndim, int **op_axes, - npy_intp *itershape); -static int -npyiter_calculate_ndim(int nop, PyArrayObject **op_in, - int oa_ndim); -static int -npyiter_check_per_op_flags(npy_uint32 flags, char *op_itflags); -static int -npyiter_prepare_one_operand(PyArrayObject **op, - char **op_dataptr, - PyArray_Descr *op_request_dtype, - PyArray_Descr** op_dtype, - npy_uint32 flags, - npy_uint32 op_flags, char *op_itflags); -static int -npyiter_prepare_operands(int nop, PyArrayObject **op_in, - PyArrayObject **op, - char **op_dataptr, - PyArray_Descr **op_request_dtypes, - PyArray_Descr **op_dtype, - npy_uint32 flags, - npy_uint32 *op_flags, char *op_itflags); -static int -npyiter_check_casting(int nop, PyArrayObject **op, - PyArray_Descr **op_dtype, - NPY_CASTING casting, - char *op_itflags); -static int -npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, char *op_itflags, - char **op_dataptr, - npy_uint32 *op_flags, int **op_axes, - npy_intp *itershape, - int output_scalars); -static void -npyiter_replace_axisdata(NpyIter *iter, int iop, - PyArrayObject *op, - int op_ndim, char *op_dataptr, - int *op_axes); -static void -npyiter_compute_index_strides(NpyIter *iter, npy_uint32 flags); -static void -npyiter_apply_forced_iteration_order(NpyIter *iter, NPY_ORDER order); - -static void -npyiter_flip_negative_strides(NpyIter *iter); -static void -npyiter_reverse_axis_ordering(NpyIter *iter); -static void -npyiter_find_best_axis_ordering(NpyIter *iter); -static void -npyiter_coalesce_axes(NpyIter *iter); - -static PyArray_Descr * -npyiter_get_common_dtype(int nop, PyArrayObject **op, - char *op_itflags, PyArray_Descr **op_dtype, - PyArray_Descr **op_request_dtypes, - int only_inputs, int output_scalars); - -static PyArrayObject * -npyiter_new_temp_array(NpyIter *iter, PyTypeObject *subtype, - npy_uint32 flags, char *op_itflags, - int op_ndim, npy_intp *shape, - PyArray_Descr *op_dtype, int *op_axes); -static int -npyiter_allocate_arrays(NpyIter *iter, - npy_uint32 flags, - PyArray_Descr **op_dtype, PyTypeObject *subtype, - npy_uint32 *op_flags, char *op_itflags, - int **op_axes, int output_scalars); -static void -npyiter_get_priority_subtype(int nop, PyArrayObject **op, - char *op_itflags, - double *subtype_priority, PyTypeObject **subtype); +/* Indicate that this .c file is allowed to include the header */ +#define NPY_ITERATOR_IMPLEMENTATION_CODE +#include "nditer_impl.h" -static int -npyiter_allocate_transfer_functions(NpyIter *iter); -static int -npyiter_allocate_buffers(NpyIter *iter, char **errmsg); -static void npyiter_goto_iterindex(NpyIter *iter, npy_intp iterindex); -static void -npyiter_copy_from_buffers(NpyIter *iter); -static void -npyiter_copy_to_buffers(NpyIter *iter, char **prev_dataptrs); +/* Internal helper functions private to this file */ static npy_intp npyiter_checkreducesize(NpyIter *iter, npy_intp count, npy_intp *reduce_innersize, npy_intp *reduce_outerdim); /*NUMPY_API - * Allocate a new iterator for multiple array objects, and advanced - * options for controlling the broadcasting, shape, and buffer size. - */ -NPY_NO_EXPORT NpyIter * -NpyIter_AdvancedNew(int nop, PyArrayObject **op_in, npy_uint32 flags, - NPY_ORDER order, NPY_CASTING casting, - npy_uint32 *op_flags, - PyArray_Descr **op_request_dtypes, - int oa_ndim, int **op_axes, npy_intp *itershape, - npy_intp buffersize) -{ - npy_uint32 itflags = NPY_ITFLAG_IDENTPERM; - int idim, ndim; - int iop; - - /* The iterator being constructed */ - NpyIter *iter; - - /* Per-operand values */ - PyArrayObject **op; - PyArray_Descr **op_dtype; - char *op_itflags; - char **op_dataptr; - - npy_int8 *perm; - NpyIter_BufferData *bufferdata = NULL; - int any_allocate = 0, any_missing_dtypes = 0, - output_scalars = 0, need_subtype = 0; - - /* The subtype for automatically allocated outputs */ - double subtype_priority = NPY_PRIORITY; - PyTypeObject *subtype = &PyArray_Type; - -#if NPY_IT_CONSTRUCTION_TIMING - npy_intp c_temp, - c_start, - c_check_op_axes, - c_check_global_flags, - c_calculate_ndim, - c_malloc, - c_prepare_operands, - c_fill_axisdata, - c_compute_index_strides, - c_apply_forced_iteration_order, - c_find_best_axis_ordering, - c_get_priority_subtype, - c_find_output_common_dtype, - c_check_casting, - c_allocate_arrays, - c_coalesce_axes, - c_prepare_buffers; -#endif - - NPY_IT_TIME_POINT(c_start); - - if (nop > NPY_MAXARGS) { - PyErr_Format(PyExc_ValueError, - "Cannot construct an iterator with more than %d operands " - "(%d were requested)", (int)NPY_MAXARGS, (int)nop); - return NULL; - } - - /* Error check 'oa_ndim' and 'op_axes', which must be used together */ - if (!npyiter_check_op_axes(nop, oa_ndim, op_axes, itershape)) { - return NULL; - } - - NPY_IT_TIME_POINT(c_check_op_axes); - - /* Check the global iterator flags */ - if (!npyiter_check_global_flags(flags, &itflags)) { - return NULL; - } - - NPY_IT_TIME_POINT(c_check_global_flags); - - /* Calculate how many dimensions the iterator should have */ - ndim = npyiter_calculate_ndim(nop, op_in, oa_ndim); - - /* If 'ndim' is zero, any outputs should be scalars */ - if (ndim == 0) { - output_scalars = 1; - ndim = 1; - } - - NPY_IT_TIME_POINT(c_calculate_ndim); - - /* Allocate memory for the iterator */ - iter = (NpyIter*) - PyArray_malloc(NIT_SIZEOF_ITERATOR(itflags, ndim, nop)); - - NPY_IT_TIME_POINT(c_malloc); - - /* Fill in the basic data */ - NIT_ITFLAGS(iter) = itflags; - NIT_NDIM(iter) = ndim; - NIT_NOP(iter) = nop; - NIT_ITERINDEX(iter) = 0; - memset(NIT_BASEOFFSETS(iter), 0, (nop+1)*NPY_SIZEOF_INTP); - - op = NIT_OPERANDS(iter); - op_dtype = NIT_DTYPES(iter); - op_itflags = NIT_OPITFLAGS(iter); - op_dataptr = NIT_RESETDATAPTR(iter); - - /* Prepare all the operands */ - if (!npyiter_prepare_operands(nop, op_in, op, op_dataptr, - op_request_dtypes, op_dtype, - flags, - op_flags, op_itflags)) { - PyArray_free(iter); - return NULL; - } - /* Set resetindex to zero as well (it's just after the resetdataptr) */ - op_dataptr[nop] = 0; - - NPY_IT_TIME_POINT(c_prepare_operands); - - /* - * Initialize buffer data (must set the buffers and transferdata - * to NULL before we might deallocate the iterator). - */ - if (itflags&NPY_ITFLAG_BUFFER) { - bufferdata = NIT_BUFFERDATA(iter); - NBF_SIZE(bufferdata) = 0; - memset(NBF_BUFFERS(bufferdata), 0, nop*NPY_SIZEOF_INTP); - memset(NBF_READTRANSFERDATA(bufferdata), 0, nop*NPY_SIZEOF_INTP); - memset(NBF_WRITETRANSFERDATA(bufferdata), 0, nop*NPY_SIZEOF_INTP); - } - - /* Fill in the AXISDATA arrays and set the ITERSIZE field */ - if (!npyiter_fill_axisdata(iter, flags, op_itflags, op_dataptr, - op_flags, op_axes, itershape, - output_scalars)) { - NpyIter_Deallocate(iter); - return NULL; - } - - NPY_IT_TIME_POINT(c_fill_axisdata); - - if (itflags&NPY_ITFLAG_BUFFER) { - /* - * If buffering is enabled and no buffersize was given, use a default - * chosen to be big enough to get some amortization benefits, but - * small enough to be cache-friendly. - */ - if (buffersize <= 0) { - buffersize = NPY_BUFSIZE; - } - /* No point in a buffer bigger than the iteration size */ - if (buffersize > NIT_ITERSIZE(iter)) { - buffersize = NIT_ITERSIZE(iter); - } - NBF_BUFFERSIZE(bufferdata) = buffersize; - } - - /* - * If an index was requested, compute the strides for it. - * Note that we must do this before changing the order of the - * axes - */ - npyiter_compute_index_strides(iter, flags); - - NPY_IT_TIME_POINT(c_compute_index_strides); - - /* Initialize the perm to the identity */ - perm = NIT_PERM(iter); - for(idim = 0; idim < ndim; ++idim) { - perm[idim] = (npy_int8)idim; - } - - /* - * If an iteration order is being forced, apply it. - */ - npyiter_apply_forced_iteration_order(iter, order); - itflags = NIT_ITFLAGS(iter); - - NPY_IT_TIME_POINT(c_apply_forced_iteration_order); - - /* Set some flags for allocated outputs */ - for (iop = 0; iop < nop; ++iop) { - if (op[iop] == NULL) { - /* Flag this so later we can avoid flipping axes */ - any_allocate = 1; - /* If a subtype may be used, indicate so */ - if (!(op_flags[iop]&NPY_ITER_NO_SUBTYPE)) { - need_subtype = 1; - } - /* - * If the data type wasn't provided, will need to - * calculate it. - */ - if (op_dtype[iop] == NULL) { - any_missing_dtypes = 1; - } - } - } - - /* - * If the ordering was not forced, reorder the axes - * and flip negative strides to find the best one. - */ - if (!(itflags&NPY_ITFLAG_FORCEDORDER)) { - if (ndim > 1) { - npyiter_find_best_axis_ordering(iter); - } - /* - * If there's an output being allocated, we must not negate - * any strides. - */ - if (!any_allocate && !(flags&NPY_ITER_DONT_NEGATE_STRIDES)) { - npyiter_flip_negative_strides(iter); - } - itflags = NIT_ITFLAGS(iter); - } - - NPY_IT_TIME_POINT(c_find_best_axis_ordering); - - if (need_subtype) { - npyiter_get_priority_subtype(nop, op, op_itflags, - &subtype_priority, &subtype); - } - - NPY_IT_TIME_POINT(c_get_priority_subtype); - - /* - * If an automatically allocated output didn't have a specified - * dtype, we need to figure it out now, before allocating the outputs. - */ - if (any_missing_dtypes || (flags&NPY_ITER_COMMON_DTYPE)) { - PyArray_Descr *dtype; - int only_inputs = !(flags&NPY_ITER_COMMON_DTYPE); - - op = NIT_OPERANDS(iter); - op_dtype = NIT_DTYPES(iter); - - dtype = npyiter_get_common_dtype(nop, op, - op_itflags, op_dtype, - op_request_dtypes, - only_inputs, - output_scalars); - if (dtype == NULL) { - NpyIter_Deallocate(iter); - return NULL; - } - if (flags&NPY_ITER_COMMON_DTYPE) { - NPY_IT_DBG_PRINT("Iterator: Replacing all data types\n"); - /* Replace all the data types */ - for (iop = 0; iop < nop; ++iop) { - if (op_dtype[iop] != dtype) { - Py_XDECREF(op_dtype[iop]); - Py_INCREF(dtype); - op_dtype[iop] = dtype; - } - } - } - else { - NPY_IT_DBG_PRINT("Iterator: Setting unset output data types\n"); - /* Replace the NULL data types */ - for (iop = 0; iop < nop; ++iop) { - if (op_dtype[iop] == NULL) { - Py_INCREF(dtype); - op_dtype[iop] = dtype; - } - } - } - Py_DECREF(dtype); - } - - NPY_IT_TIME_POINT(c_find_output_common_dtype); - - /* - * All of the data types have been settled, so it's time - * to check that data type conversions are following the - * casting rules. - */ - if (!npyiter_check_casting(nop, op, op_dtype, casting, op_itflags)) { - NpyIter_Deallocate(iter); - return NULL; - } - - NPY_IT_TIME_POINT(c_check_casting); - - /* - * At this point, the iteration order has been finalized. so - * any allocation of ops that were NULL, or any temporary - * copying due to casting/byte order/alignment can be - * done now using a memory layout matching the iterator. - */ - if (!npyiter_allocate_arrays(iter, flags, op_dtype, subtype, op_flags, - op_itflags, op_axes, output_scalars)) { - NpyIter_Deallocate(iter); - return NULL; - } - - NPY_IT_TIME_POINT(c_allocate_arrays); - - /* - * Finally, if a multi-index wasn't requested, - * it may be possible to coalesce some axes together. - */ - if (ndim > 1 && !(itflags&NPY_ITFLAG_HASMULTIINDEX)) { - npyiter_coalesce_axes(iter); - /* - * The operation may have changed the layout, so we have to - * get the internal pointers again. - */ - itflags = NIT_ITFLAGS(iter); - ndim = NIT_NDIM(iter); - op = NIT_OPERANDS(iter); - op_dtype = NIT_DTYPES(iter); - op_itflags = NIT_OPITFLAGS(iter); - op_dataptr = NIT_RESETDATAPTR(iter); - } - - NPY_IT_TIME_POINT(c_coalesce_axes); - - /* - * Now that the axes are finished, check whether we can apply - * the single iteration optimization to the iternext function. - */ - if (!(itflags&NPY_ITFLAG_BUFFER)) { - NpyIter_AxisData *axisdata = NIT_AXISDATA(iter); - if (itflags&NPY_ITFLAG_EXLOOP) { - if (NIT_ITERSIZE(iter) == NAD_SHAPE(axisdata)) { - NIT_ITFLAGS(iter) |= NPY_ITFLAG_ONEITERATION; - } - } - else if (NIT_ITERSIZE(iter) == 1) { - NIT_ITFLAGS(iter) |= NPY_ITFLAG_ONEITERATION; - } - } - - /* - * If REFS_OK was specified, check whether there are any - * reference arrays and flag it if so. - */ - if (flags&NPY_ITER_REFS_OK) { - for (iop = 0; iop < nop; ++iop) { - PyArray_Descr *rdt = op_dtype[iop]; - if ((rdt->flags&(NPY_ITEM_REFCOUNT| - NPY_ITEM_IS_POINTER| - NPY_NEEDS_PYAPI)) != 0) { - /* Iteration needs API access */ - NIT_ITFLAGS(iter) |= NPY_ITFLAG_NEEDSAPI; - } - } - } - - /* If buffering is set without delayed allocation */ - if (itflags&NPY_ITFLAG_BUFFER) { - if (!npyiter_allocate_transfer_functions(iter)) { - NpyIter_Deallocate(iter); - return NULL; - } - if (itflags&NPY_ITFLAG_DELAYBUF) { - bufferdata = NIT_BUFFERDATA(iter); - /* Make the data pointers NULL */ - memset(NBF_PTRS(bufferdata), 0, nop*NPY_SIZEOF_INTP); - } - else { - /* Allocate the buffers */ - if (!npyiter_allocate_buffers(iter, NULL)) { - NpyIter_Deallocate(iter); - return NULL; - } - - /* Prepare the next buffers and set iterend/size */ - npyiter_copy_to_buffers(iter, NULL); - } - } - - NPY_IT_TIME_POINT(c_prepare_buffers); - -#if NPY_IT_CONSTRUCTION_TIMING - printf("\nIterator construction timing:\n"); - NPY_IT_PRINT_TIME_START(c_start); - NPY_IT_PRINT_TIME_VAR(c_check_op_axes); - NPY_IT_PRINT_TIME_VAR(c_check_global_flags); - NPY_IT_PRINT_TIME_VAR(c_calculate_ndim); - NPY_IT_PRINT_TIME_VAR(c_malloc); - NPY_IT_PRINT_TIME_VAR(c_prepare_operands); - NPY_IT_PRINT_TIME_VAR(c_fill_axisdata); - NPY_IT_PRINT_TIME_VAR(c_compute_index_strides); - NPY_IT_PRINT_TIME_VAR(c_apply_forced_iteration_order); - NPY_IT_PRINT_TIME_VAR(c_find_best_axis_ordering); - NPY_IT_PRINT_TIME_VAR(c_get_priority_subtype); - NPY_IT_PRINT_TIME_VAR(c_find_output_common_dtype); - NPY_IT_PRINT_TIME_VAR(c_check_casting); - NPY_IT_PRINT_TIME_VAR(c_allocate_arrays); - NPY_IT_PRINT_TIME_VAR(c_coalesce_axes); - NPY_IT_PRINT_TIME_VAR(c_prepare_buffers); - printf("\n"); -#endif - - return iter; -} - -/*NUMPY_API - * Allocate a new iterator for more than one array object, using - * standard NumPy broadcasting rules and the default buffer size. - */ -NPY_NO_EXPORT NpyIter * -NpyIter_MultiNew(int nop, PyArrayObject **op_in, npy_uint32 flags, - NPY_ORDER order, NPY_CASTING casting, - npy_uint32 *op_flags, - PyArray_Descr **op_request_dtypes) -{ - return NpyIter_AdvancedNew(nop, op_in, flags, order, casting, - op_flags, op_request_dtypes, - 0, NULL, NULL, 0); -} - -/*NUMPY_API - * Allocate a new iterator for one array object. - */ -NPY_NO_EXPORT NpyIter * -NpyIter_New(PyArrayObject *op, npy_uint32 flags, - NPY_ORDER order, NPY_CASTING casting, - PyArray_Descr* dtype) -{ - /* Split the flags into separate global and op flags */ - npy_uint32 op_flags = flags&NPY_ITER_PER_OP_FLAGS; - flags &= NPY_ITER_GLOBAL_FLAGS; - - return NpyIter_AdvancedNew(1, &op, flags, order, casting, - &op_flags, &dtype, - 0, NULL, NULL, 0); -} - -/*NUMPY_API - * Makes a copy of the iterator - */ -NPY_NO_EXPORT NpyIter * -NpyIter_Copy(NpyIter *iter) -{ - npy_uint32 itflags = NIT_ITFLAGS(iter); - int ndim = NIT_NDIM(iter); - int iop, nop = NIT_NOP(iter); - int out_of_memory = 0; - - npy_intp size; - NpyIter *newiter; - PyArrayObject **objects; - PyArray_Descr **dtypes; - - /* Allocate memory for the new iterator */ - size = NIT_SIZEOF_ITERATOR(itflags, ndim, nop); - newiter = (NpyIter*)PyArray_malloc(size); - - /* Copy the raw values to the new iterator */ - memcpy(newiter, iter, size); - - /* Take ownership of references to the operands and dtypes */ - objects = NIT_OPERANDS(newiter); - dtypes = NIT_DTYPES(newiter); - for (iop = 0; iop < nop; ++iop) { - Py_INCREF(objects[iop]); - Py_INCREF(dtypes[iop]); - } - - /* Allocate buffers and make copies of the transfer data if necessary */ - if (itflags&NPY_ITFLAG_BUFFER) { - NpyIter_BufferData *bufferdata; - npy_intp buffersize, itemsize; - char **buffers; - NpyAuxData **readtransferdata, **writetransferdata; - - bufferdata = NIT_BUFFERDATA(newiter); - buffers = NBF_BUFFERS(bufferdata); - readtransferdata = NBF_READTRANSFERDATA(bufferdata); - writetransferdata = NBF_WRITETRANSFERDATA(bufferdata); - buffersize = NBF_BUFFERSIZE(bufferdata); - - for (iop = 0; iop < nop; ++iop) { - if (buffers[iop] != NULL) { - if (out_of_memory) { - buffers[iop] = NULL; - } - else { - itemsize = dtypes[iop]->elsize; - buffers[iop] = PyArray_malloc(itemsize*buffersize); - if (buffers[iop] == NULL) { - out_of_memory = 1; - } - } - } - - if (readtransferdata[iop] != NULL) { - if (out_of_memory) { - readtransferdata[iop] = NULL; - } - else { - readtransferdata[iop] = - NPY_AUXDATA_CLONE(readtransferdata[iop]); - if (readtransferdata[iop] == NULL) { - out_of_memory = 1; - } - } - } - - if (writetransferdata[iop] != NULL) { - if (out_of_memory) { - writetransferdata[iop] = NULL; - } - else { - writetransferdata[iop] = - NPY_AUXDATA_CLONE(writetransferdata[iop]); - if (writetransferdata[iop] == NULL) { - out_of_memory = 1; - } - } - } - } - - /* Initialize the buffers to the current iterindex */ - if (!out_of_memory && NBF_SIZE(bufferdata) > 0) { - npyiter_goto_iterindex(newiter, NIT_ITERINDEX(newiter)); - - /* Prepare the next buffers and set iterend/size */ - npyiter_copy_to_buffers(newiter, NULL); - } - } - - if (out_of_memory) { - NpyIter_Deallocate(newiter); - PyErr_NoMemory(); - return NULL; - } - - return newiter; -} - -/*NUMPY_API - * Deallocate an iterator - */ -NPY_NO_EXPORT int -NpyIter_Deallocate(NpyIter *iter) -{ - npy_uint32 itflags = NIT_ITFLAGS(iter); - /*int ndim = NIT_NDIM(iter);*/ - int iop, nop = NIT_NOP(iter); - - PyArray_Descr **dtype = NIT_DTYPES(iter); - PyArrayObject **object = NIT_OPERANDS(iter); - - /* Deallocate any buffers and buffering data */ - if (itflags&NPY_ITFLAG_BUFFER) { - NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter); - char **buffers; - NpyAuxData **transferdata; - - /* buffers */ - buffers = NBF_BUFFERS(bufferdata); - for(iop = 0; iop < nop; ++iop, ++buffers) { - if (*buffers) { - PyArray_free(*buffers); - } - } - /* read bufferdata */ - transferdata = NBF_READTRANSFERDATA(bufferdata); - for(iop = 0; iop < nop; ++iop, ++transferdata) { - if (*transferdata) { - NPY_AUXDATA_FREE(*transferdata); - } - } - /* write bufferdata */ - transferdata = NBF_WRITETRANSFERDATA(bufferdata); - for(iop = 0; iop < nop; ++iop, ++transferdata) { - if (*transferdata) { - NPY_AUXDATA_FREE(*transferdata); - } - } - } - - /* Deallocate all the dtypes and objects that were iterated */ - for(iop = 0; iop < nop; ++iop, ++dtype, ++object) { - Py_XDECREF(*dtype); - Py_XDECREF(*object); - } - - /* Deallocate the iterator memory */ - PyArray_free(iter); - - return NPY_SUCCEED; -} - -/*NUMPY_API * Removes an axis from iteration. This requires that NPY_ITER_MULTI_INDEX * was set for iterator creation, and does not work if buffering is * enabled. This function also resets the iterator to its initial state. @@ -2821,1545 +1876,7 @@ NpyIter_GetInnerLoopSizePtr(NpyIter *iter) } } -/* Checks 'flags' for (C|F)_ORDER_INDEX, MULTI_INDEX, and EXTERNAL_LOOP, - * setting the appropriate internal flags in 'itflags'. - * - * Returns 1 on success, 0 on error. - */ -static int -npyiter_check_global_flags(npy_uint32 flags, npy_uint32* itflags) -{ - if ((flags&NPY_ITER_PER_OP_FLAGS) != 0) { - PyErr_SetString(PyExc_ValueError, - "A per-operand flag was passed as a global flag " - "to the iterator constructor"); - return 0; - } - - /* Check for an index */ - if (flags&(NPY_ITER_C_INDEX | NPY_ITER_F_INDEX)) { - if ((flags&(NPY_ITER_C_INDEX | NPY_ITER_F_INDEX)) == - (NPY_ITER_C_INDEX | NPY_ITER_F_INDEX)) { - PyErr_SetString(PyExc_ValueError, - "Iterator flags C_INDEX and " - "F_INDEX cannot both be specified"); - return 0; - } - (*itflags) |= NPY_ITFLAG_HASINDEX; - } - /* Check if a multi-index was requested */ - if (flags&NPY_ITER_MULTI_INDEX) { - /* - * This flag primarily disables dimension manipulations that - * would produce an incorrect multi-index. - */ - (*itflags) |= NPY_ITFLAG_HASMULTIINDEX; - } - /* Check if the caller wants to handle inner iteration */ - if (flags&NPY_ITER_EXTERNAL_LOOP) { - if ((*itflags)&(NPY_ITFLAG_HASINDEX|NPY_ITFLAG_HASMULTIINDEX)) { - PyErr_SetString(PyExc_ValueError, - "Iterator flag EXTERNAL_LOOP cannot be used " - "if an index or multi-index is being tracked"); - return 0; - } - (*itflags) |= NPY_ITFLAG_EXLOOP; - } - /* Ranged */ - if (flags&NPY_ITER_RANGED) { - (*itflags) |= NPY_ITFLAG_RANGE; - if ((flags&NPY_ITER_EXTERNAL_LOOP) && - !(flags&NPY_ITER_BUFFERED)) { - PyErr_SetString(PyExc_ValueError, - "Iterator flag RANGED cannot be used with " - "the flag EXTERNAL_LOOP unless " - "BUFFERED is also enabled"); - return 0; - } - } - /* Buffering */ - if (flags&NPY_ITER_BUFFERED) { - (*itflags) |= NPY_ITFLAG_BUFFER; - if (flags&NPY_ITER_GROWINNER) { - (*itflags) |= NPY_ITFLAG_GROWINNER; - } - if (flags&NPY_ITER_DELAY_BUFALLOC) { - (*itflags) |= NPY_ITFLAG_DELAYBUF; - } - } - - return 1; -} - -static int -npyiter_calculate_ndim(int nop, PyArrayObject **op_in, - int oa_ndim) -{ - /* If 'op_axes' is being used, force 'ndim' */ - if (oa_ndim > 0 ) { - return oa_ndim; - } - /* Otherwise it's the maximum 'ndim' from the operands */ - else { - int ndim = 0, iop; - - for (iop = 0; iop < nop; ++iop) { - if (op_in[iop] != NULL) { - int ondim = PyArray_NDIM(op_in[iop]); - if (ondim > ndim) { - ndim = ondim; - } - } - - } - - return ndim; - } -} - -static int -npyiter_check_op_axes(int nop, int oa_ndim, int **op_axes, - npy_intp *itershape) -{ - char axes_dupcheck[NPY_MAXDIMS]; - int iop, idim; - - if (oa_ndim == 0 && (op_axes != NULL || itershape != NULL)) { - PyErr_Format(PyExc_ValueError, - "If 'op_axes' or 'itershape' is not NULL in the" - "iterator constructor, 'oa_ndim' must be greater than zero"); - return 0; - } - else if (oa_ndim > 0) { - if (oa_ndim > NPY_MAXDIMS) { - PyErr_Format(PyExc_ValueError, - "Cannot construct an iterator with more than %d dimensions " - "(%d were requested for op_axes)", - (int)NPY_MAXDIMS, oa_ndim); - return 0; - } - else if (op_axes == NULL) { - PyErr_Format(PyExc_ValueError, - "If 'oa_ndim' is greater than zero in the iterator " - "constructor, then op_axes cannot be NULL"); - return 0; - } - - /* Check that there are no duplicates in op_axes */ - for (iop = 0; iop < nop; ++iop) { - int *axes = op_axes[iop]; - if (axes != NULL) { - memset(axes_dupcheck, 0, NPY_MAXDIMS); - for (idim = 0; idim < oa_ndim; ++idim) { - npy_intp i = axes[idim]; - if (i >= 0) { - if (i >= NPY_MAXDIMS) { - PyErr_Format(PyExc_ValueError, - "The 'op_axes' provided to the iterator " - "constructor for operand %d " - "contained invalid " - "values %d", (int)iop, (int)i); - return 0; - } else if(axes_dupcheck[i] == 1) { - PyErr_Format(PyExc_ValueError, - "The 'op_axes' provided to the iterator " - "constructor for operand %d " - "contained duplicate " - "value %d", (int)iop, (int)i); - return 0; - } - else { - axes_dupcheck[i] = 1; - } - } - } - } - } - } - - return 1; -} - -/* - * Checks the per-operand input flags, and fills in op_itflags. - * - * Returns 1 on success, 0 on failure. - */ -static int -npyiter_check_per_op_flags(npy_uint32 op_flags, char *op_itflags) -{ - if ((op_flags&NPY_ITER_GLOBAL_FLAGS) != 0) { - PyErr_SetString(PyExc_ValueError, - "A global iterator flag was passed as a per-operand flag " - "to the iterator constructor"); - return 0; - } - - /* Check the read/write flags */ - if (op_flags&NPY_ITER_READONLY) { - /* The read/write flags are mutually exclusive */ - if (op_flags&(NPY_ITER_READWRITE|NPY_ITER_WRITEONLY)) { - PyErr_SetString(PyExc_ValueError, - "Only one of the iterator flags READWRITE, " - "READONLY, and WRITEONLY may be " - "specified for an operand"); - return 0; - } - - *op_itflags = NPY_OP_ITFLAG_READ; - } - else if (op_flags&NPY_ITER_READWRITE) { - /* The read/write flags are mutually exclusive */ - if (op_flags&NPY_ITER_WRITEONLY) { - PyErr_SetString(PyExc_ValueError, - "Only one of the iterator flags READWRITE, " - "READONLY, and WRITEONLY may be " - "specified for an operand"); - return 0; - } - - *op_itflags = NPY_OP_ITFLAG_READ|NPY_OP_ITFLAG_WRITE; - } - else if(op_flags&NPY_ITER_WRITEONLY) { - *op_itflags = NPY_OP_ITFLAG_WRITE; - } - else { - PyErr_SetString(PyExc_ValueError, - "None of the iterator flags READWRITE, " - "READONLY, or WRITEONLY were " - "specified for an operand"); - return 0; - } - - /* Check the flags for temporary copies */ - if (((*op_itflags)&NPY_OP_ITFLAG_WRITE) && - (op_flags&(NPY_ITER_COPY| - NPY_ITER_UPDATEIFCOPY)) == NPY_ITER_COPY) { - PyErr_SetString(PyExc_ValueError, - "If an iterator operand is writeable, must use " - "the flag UPDATEIFCOPY instead of " - "COPY"); - return 0; - } - - return 1; -} - -/* - * Prepares a a constructor operand. Assumes a reference to 'op' - * is owned, and that 'op' may be replaced. Fills in 'op_dtype' - * and 'ndim'. - * - * Returns 1 on success, 0 on failure. - */ -static int -npyiter_prepare_one_operand(PyArrayObject **op, - char **op_dataptr, - PyArray_Descr *op_request_dtype, - PyArray_Descr **op_dtype, - npy_uint32 flags, - npy_uint32 op_flags, char *op_itflags) -{ - /* NULL operands must be automatically allocated outputs */ - if (*op == NULL) { - /* ALLOCATE should be enabled */ - if (!(op_flags&NPY_ITER_ALLOCATE)) { - PyErr_SetString(PyExc_ValueError, - "Iterator operand was NULL, but automatic allocation as an " - "output wasn't requested"); - return 0; - } - /* Writing should be enabled */ - if (!((*op_itflags)&NPY_OP_ITFLAG_WRITE)) { - PyErr_SetString(PyExc_ValueError, - "Automatic allocation was requested for an iterator " - "operand, but it wasn't flagged for writing"); - return 0; - } - /* - * Reading should be disabled if buffering is enabled without - * also enabling NPY_ITER_DELAY_BUFALLOC. In all other cases, - * the caller may initialize the allocated operand to a value - * before beginning iteration. - */ - if (((flags&(NPY_ITER_BUFFERED| - NPY_ITER_DELAY_BUFALLOC)) == NPY_ITER_BUFFERED) && - ((*op_itflags)&NPY_OP_ITFLAG_READ)) { - PyErr_SetString(PyExc_ValueError, - "Automatic allocation was requested for an iterator " - "operand, and it was flagged as readable, but buffering " - " without delayed allocation was enabled"); - return 0; - } - *op_dataptr = NULL; - /* If a requested dtype was provided, use it, otherwise NULL */ - Py_XINCREF(op_request_dtype); - *op_dtype = op_request_dtype; - - return 1; - } - - if (PyArray_Check(*op)) { - if (((*op_itflags)&NPY_OP_ITFLAG_WRITE) && - (!PyArray_CHKFLAGS(*op, NPY_ARRAY_WRITEABLE))) { - PyErr_SetString(PyExc_ValueError, - "Iterator operand was a non-writeable array, but was " - "flagged as writeable"); - return 0; - } - if (!(flags&NPY_ITER_ZEROSIZE_OK) && PyArray_SIZE(*op) == 0) { - PyErr_SetString(PyExc_ValueError, - "Iteration of zero-sized operands is not enabled"); - return 0; - } - *op_dataptr = PyArray_BYTES(*op); - /* PyArray_DESCR does not give us a reference */ - *op_dtype = PyArray_DESCR(*op); - if (*op_dtype == NULL) { - PyErr_SetString(PyExc_ValueError, - "Iterator input array object has no dtype descr"); - return 0; - } - Py_INCREF(*op_dtype); - /* - * If references weren't specifically allowed, make sure there - * are no references in the inputs or requested dtypes. - */ - if (!(flags&NPY_ITER_REFS_OK)) { - PyArray_Descr *dt = PyArray_DESCR(*op); - if (((dt->flags&(NPY_ITEM_REFCOUNT| - NPY_ITEM_IS_POINTER)) != 0) || - (dt != *op_dtype && - (((*op_dtype)->flags&(NPY_ITEM_REFCOUNT| - NPY_ITEM_IS_POINTER))) != 0)) { - PyErr_SetString(PyExc_TypeError, - "Iterator operand or requested dtype holds " - "references, but the REFS_OK flag was not enabled"); - return 0; - } - } - /* - * Checking whether casts are valid is done later, once the - * final data types have been selected. For now, just store the - * requested type. - */ - if (op_request_dtype != NULL) { - /* We just have a borrowed reference to op_request_dtype */ - Py_INCREF(op_request_dtype); - /* If the requested dtype is flexible, adapt it */ - PyArray_AdaptFlexibleDType((PyObject *)(*op), PyArray_DESCR(*op), - &op_request_dtype); - if (op_request_dtype == NULL) { - return 0; - } - - /* Store the requested dtype */ - Py_DECREF(*op_dtype); - *op_dtype = op_request_dtype; - } - - /* Check if the operand is in the byte order requested */ - if (op_flags&NPY_ITER_NBO) { - /* Check byte order */ - if (!PyArray_ISNBO((*op_dtype)->byteorder)) { - PyArray_Descr *nbo_dtype; - - /* Replace with a new descr which is in native byte order */ - nbo_dtype = PyArray_DescrNewByteorder(*op_dtype, NPY_NATIVE); - Py_DECREF(*op_dtype); - *op_dtype = nbo_dtype; - - NPY_IT_DBG_PRINT("Iterator: Setting NPY_OP_ITFLAG_CAST " - "because of NPY_ITER_NBO\n"); - /* Indicate that byte order or alignment needs fixing */ - *op_itflags |= NPY_OP_ITFLAG_CAST; - } - } - /* Check if the operand is aligned */ - if (op_flags&NPY_ITER_ALIGNED) { - /* Check alignment */ - if (!PyArray_ISALIGNED(*op)) { - NPY_IT_DBG_PRINT("Iterator: Setting NPY_OP_ITFLAG_CAST " - "because of NPY_ITER_ALIGNED\n"); - *op_itflags |= NPY_OP_ITFLAG_CAST; - } - } - /* - * The check for NPY_ITER_CONTIG can only be done later, - * once the final iteration order is settled. - */ - } - else { - PyErr_SetString(PyExc_ValueError, - "Iterator inputs must be ndarrays"); - return 0; - } - - return 1; -} - -/* - * Process all the operands, copying new references so further processing - * can replace the arrays if copying is necessary. - */ -static int -npyiter_prepare_operands(int nop, PyArrayObject **op_in, - PyArrayObject **op, - char **op_dataptr, - PyArray_Descr **op_request_dtypes, - PyArray_Descr **op_dtype, - npy_uint32 flags, - npy_uint32 *op_flags, char *op_itflags) -{ - int iop, i; - - for (iop = 0; iop < nop; ++iop) { - op[iop] = op_in[iop]; - Py_XINCREF(op[iop]); - op_dtype[iop] = NULL; - - /* Check the readonly/writeonly flags, and fill in op_itflags */ - if (!npyiter_check_per_op_flags(op_flags[iop], &op_itflags[iop])) { - for (i = 0; i <= iop; ++i) { - Py_XDECREF(op[i]); - Py_XDECREF(op_dtype[i]); - } - return 0; - } - - /* - * Prepare the operand. This produces an op_dtype[iop] reference - * on success. - */ - if (!npyiter_prepare_one_operand(&op[iop], - &op_dataptr[iop], - op_request_dtypes ? op_request_dtypes[iop] : NULL, - &op_dtype[iop], - flags, - op_flags[iop], &op_itflags[iop])) { - for (i = 0; i <= iop; ++i) { - Py_XDECREF(op[i]); - Py_XDECREF(op_dtype[i]); - } - return 0; - } - } - - - /* If all the operands were NULL, it's an error */ - if (op[0] == NULL) { - int all_null = 1; - for (iop = 1; iop < nop; ++iop) { - if (op[iop] != NULL) { - all_null = 0; - break; - } - } - if (all_null) { - for (i = 0; i < nop; ++i) { - Py_XDECREF(op[i]); - Py_XDECREF(op_dtype[i]); - } - PyErr_SetString(PyExc_ValueError, - "At least one iterator input must be non-NULL"); - return 0; - } - } - - return 1; -} - -static const char * -npyiter_casting_to_string(NPY_CASTING casting) -{ - switch (casting) { - case NPY_NO_CASTING: - return "'no'"; - case NPY_EQUIV_CASTING: - return "'equiv'"; - case NPY_SAFE_CASTING: - return "'safe'"; - case NPY_SAME_KIND_CASTING: - return "'same_kind'"; - case NPY_UNSAFE_CASTING: - return "'unsafe'"; - default: - return "<unknown>"; - } -} - -static int -npyiter_check_casting(int nop, PyArrayObject **op, - PyArray_Descr **op_dtype, - NPY_CASTING casting, - char *op_itflags) -{ - int iop; - - for(iop = 0; iop < nop; ++iop) { - NPY_IT_DBG_PRINT1("Iterator: Checking casting for operand %d\n", - (int)iop); -#if NPY_IT_DBG_TRACING - printf("op: "); - if (op[iop] != NULL) { - PyObject_Print((PyObject *)PyArray_DESCR(op[iop]), stdout, 0); - } - else { - printf("<null>"); - } - printf(", iter: "); - PyObject_Print((PyObject *)op_dtype[iop], stdout, 0); - printf("\n"); -#endif - /* If the types aren't equivalent, a cast is necessary */ - if (op[iop] != NULL && !PyArray_EquivTypes(PyArray_DESCR(op[iop]), - op_dtype[iop])) { - /* Check read (op -> temp) casting */ - if ((op_itflags[iop]&NPY_OP_ITFLAG_READ) && - !PyArray_CanCastArrayTo(op[iop], - op_dtype[iop], - casting)) { - PyObject *errmsg; - errmsg = PyUString_FromFormat( - "Iterator operand %d dtype could not be cast from ", - (int)iop); - PyUString_ConcatAndDel(&errmsg, - PyObject_Repr((PyObject *)PyArray_DESCR(op[iop]))); - PyUString_ConcatAndDel(&errmsg, - PyUString_FromString(" to ")); - PyUString_ConcatAndDel(&errmsg, - PyObject_Repr((PyObject *)op_dtype[iop])); - PyUString_ConcatAndDel(&errmsg, - PyUString_FromFormat(" according to the rule %s", - npyiter_casting_to_string(casting))); - PyErr_SetObject(PyExc_TypeError, errmsg); - return 0; - } - /* Check write (temp -> op) casting */ - if ((op_itflags[iop]&NPY_OP_ITFLAG_WRITE) && - !PyArray_CanCastTypeTo(op_dtype[iop], - PyArray_DESCR(op[iop]), - casting)) { - PyObject *errmsg; - errmsg = PyUString_FromString( - "Iterator requested dtype could not be cast from "); - PyUString_ConcatAndDel(&errmsg, - PyObject_Repr((PyObject *)op_dtype[iop])); - PyUString_ConcatAndDel(&errmsg, - PyUString_FromString(" to ")); - PyUString_ConcatAndDel(&errmsg, - PyObject_Repr((PyObject *)PyArray_DESCR(op[iop]))); - PyUString_ConcatAndDel(&errmsg, - PyUString_FromFormat(", the operand %d dtype, " - "according to the rule %s", - (int)iop, - npyiter_casting_to_string(casting))); - PyErr_SetObject(PyExc_TypeError, errmsg); - return 0; - } - - NPY_IT_DBG_PRINT("Iterator: Setting NPY_OP_ITFLAG_CAST " - "because the types aren't equivalent\n"); - /* Indicate that this operand needs casting */ - op_itflags[iop] |= NPY_OP_ITFLAG_CAST; - } - } - - return 1; -} - -static PyObject * -npyiter_shape_string(npy_intp n, npy_intp *vals, char *ending) -{ - npy_intp i; - PyObject *ret, *tmp; - - /* - * Negative dimension indicates "newaxis", which can - * be discarded for printing if its a leading dimension. - * Find the first non-"newaxis" dimension. - */ - i = 0; - while (i < n && vals[i] < 0) { - ++i; - } - - if (i == n) { - return PyUString_FromFormat("()%s", ending); - } - else { - ret = PyUString_FromFormat("(%" NPY_INTP_FMT, vals[i++]); - if (ret == NULL) { - return NULL; - } - } - - for (; i < n; ++i) { - if (vals[i] < 0) { - tmp = PyUString_FromString(",newaxis"); - } - else { - tmp = PyUString_FromFormat(",%" NPY_INTP_FMT, vals[i]); - } - if (tmp == NULL) { - Py_DECREF(ret); - return NULL; - } - - PyUString_ConcatAndDel(&ret, tmp); - if (ret == NULL) { - return NULL; - } - } - - tmp = PyUString_FromFormat(")%s", ending); - PyUString_ConcatAndDel(&ret, tmp); - return ret; -} - -/* - * Fills in the AXISDATA for the 'nop' operands, broadcasting - * the dimensionas as necessary. Also fills - * in the ITERSIZE data member. - * - * If op_axes is not NULL, it should point to an array of ndim-sized - * arrays, one for each op. - * - * Returns 1 on success, 0 on failure. - */ -static int -npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, char *op_itflags, - char **op_dataptr, - npy_uint32 *op_flags, int **op_axes, - npy_intp *itershape, - int output_scalars) -{ - npy_uint32 itflags = NIT_ITFLAGS(iter); - int idim, ndim = NIT_NDIM(iter); - int iop, nop = NIT_NOP(iter); - - int ondim; - NpyIter_AxisData *axisdata; - npy_intp sizeof_axisdata; - PyArrayObject **op = NIT_OPERANDS(iter), *op_cur; - npy_intp broadcast_shape[NPY_MAXDIMS]; - - /* First broadcast the shapes together */ - if (itershape == NULL) { - for (idim = 0; idim < ndim; ++idim) { - broadcast_shape[idim] = 1; - } - } - else { - for (idim = 0; idim < ndim; ++idim) { - broadcast_shape[idim] = itershape[idim]; - /* Negative shape entries are deduced from the operands */ - if (broadcast_shape[idim] < 0) { - broadcast_shape[idim] = 1; - } - } - } - for (iop = 0; iop < nop; ++iop) { - op_cur = op[iop]; - if (op_cur != NULL) { - npy_intp *shape = PyArray_DIMS(op_cur); - ondim = PyArray_NDIM(op_cur); - - if (op_axes == NULL || op_axes[iop] == NULL) { - /* - * Possible if op_axes are being used, but - * op_axes[iop] is NULL - */ - if (ondim > ndim) { - PyErr_SetString(PyExc_ValueError, - "input operand has more dimensions than allowed " - "by the axis remapping"); - return 0; - } - for (idim = 0; idim < ondim; ++idim) { - npy_intp bshape = broadcast_shape[idim+ndim-ondim], - op_shape = shape[idim]; - if (bshape == 1) { - broadcast_shape[idim+ndim-ondim] = op_shape; - } - else if (bshape != op_shape && op_shape != 1) { - goto broadcast_error; - } - } - } - else { - int *axes = op_axes[iop]; - for (idim = 0; idim < ndim; ++idim) { - int i = axes[idim]; - if (i >= 0) { - if (i < ondim) { - npy_intp bshape = broadcast_shape[idim], - op_shape = shape[i]; - if (bshape == 1) { - broadcast_shape[idim] = op_shape; - } - else if (bshape != op_shape && op_shape != 1) { - goto broadcast_error; - } - } - else { - PyErr_Format(PyExc_ValueError, - "Iterator input op_axes[%d][%d] (==%d) " - "is not a valid axis of op[%d], which " - "has %d dimensions ", - (int)iop, (int)(ndim-idim-1), (int)i, - (int)iop, (int)ondim); - return 0; - } - } - } - } - } - } - /* - * If a shape was provided with a 1 entry, make sure that entry didn't - * get expanded by broadcasting. - */ - if (itershape != NULL) { - for (idim = 0; idim < ndim; ++idim) { - if (itershape[idim] == 1 && broadcast_shape[idim] != 1) { - goto broadcast_error; - } - } - } - - axisdata = NIT_AXISDATA(iter); - sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop); - - /* Now process the operands, filling in the axisdata */ - for (idim = 0; idim < ndim; ++idim) { - npy_intp bshape = broadcast_shape[ndim-idim-1]; - npy_intp *strides = NAD_STRIDES(axisdata); - - NAD_SHAPE(axisdata) = bshape; - NAD_INDEX(axisdata) = 0; - memcpy(NAD_PTRS(axisdata), op_dataptr, NPY_SIZEOF_INTP*nop); - - for (iop = 0; iop < nop; ++iop) { - op_cur = op[iop]; - - if (op_axes == NULL || op_axes[iop] == NULL) { - if (op_cur == NULL) { - strides[iop] = 0; - } - else { - ondim = PyArray_NDIM(op_cur); - if (bshape == 1) { - strides[iop] = 0; - if (idim >= ondim && !output_scalars && - (op_flags[iop]&NPY_ITER_NO_BROADCAST)) { - goto operand_different_than_broadcast; - } - } - else if (idim >= ondim || - PyArray_DIM(op_cur, ondim-idim-1) == 1) { - strides[iop] = 0; - if (op_flags[iop]&NPY_ITER_NO_BROADCAST) { - goto operand_different_than_broadcast; - } - /* If it's writeable, this means a reduction */ - if (op_itflags[iop]&NPY_OP_ITFLAG_WRITE) { - if (!(flags&NPY_ITER_REDUCE_OK)) { - PyErr_SetString(PyExc_ValueError, - "output operand requires a reduction, but " - "reduction is not enabled"); - return 0; - } - if (!(op_itflags[iop]&NPY_OP_ITFLAG_READ)) { - PyErr_SetString(PyExc_ValueError, - "output operand requires a reduction, but " - "is flagged as write-only, not " - "read-write"); - return 0; - } - NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE; - op_itflags[iop] |= NPY_OP_ITFLAG_REDUCE; - } - } - else { - strides[iop] = PyArray_STRIDE(op_cur, ondim-idim-1); - } - } - } - else { - int *axes = op_axes[iop]; - int i = axes[ndim-idim-1]; - if (i >= 0) { - if (bshape == 1 || op_cur == NULL) { - strides[iop] = 0; - } - else if (PyArray_DIM(op_cur, i) == 1) { - strides[iop] = 0; - if (op_flags[iop]&NPY_ITER_NO_BROADCAST) { - goto operand_different_than_broadcast; - } - /* If it's writeable, this means a reduction */ - if (op_itflags[iop]&NPY_OP_ITFLAG_WRITE) { - if (!(flags&NPY_ITER_REDUCE_OK)) { - PyErr_SetString(PyExc_ValueError, - "output operand requires a reduction, but " - "reduction is not enabled"); - return 0; - } - if (!(op_itflags[iop]&NPY_OP_ITFLAG_READ)) { - PyErr_SetString(PyExc_ValueError, - "output operand requires a reduction, but " - "is flagged as write-only, not " - "read-write"); - return 0; - } - NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE; - op_itflags[iop] |= NPY_OP_ITFLAG_REDUCE; - } - } - else { - strides[iop] = PyArray_STRIDE(op_cur, i); - } - } - else if (bshape == 1) { - strides[iop] = 0; - } - else { - strides[iop] = 0; - /* If it's writeable, this means a reduction */ - if (op_itflags[iop]&NPY_OP_ITFLAG_WRITE) { - if (!(flags&NPY_ITER_REDUCE_OK)) { - PyErr_SetString(PyExc_ValueError, - "output operand requires a reduction, but " - "reduction is not enabled"); - return 0; - } - if (!(op_itflags[iop]&NPY_OP_ITFLAG_READ)) { - PyErr_SetString(PyExc_ValueError, - "output operand requires a reduction, but " - "is flagged as write-only, not " - "read-write"); - return 0; - } - NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE; - op_itflags[iop] |= NPY_OP_ITFLAG_REDUCE; - } - } - } - } - - NIT_ADVANCE_AXISDATA(axisdata, 1); - } - - /* Now fill in the ITERSIZE member */ - NIT_ITERSIZE(iter) = broadcast_shape[0]; - for (idim = 1; idim < ndim; ++idim) { - NIT_ITERSIZE(iter) *= broadcast_shape[idim]; - } - /* The range defaults to everything */ - NIT_ITERSTART(iter) = 0; - NIT_ITEREND(iter) = NIT_ITERSIZE(iter); - - return 1; - -broadcast_error: { - PyObject *errmsg, *tmp; - npy_intp remdims[NPY_MAXDIMS]; - char *tmpstr; - - if (op_axes == NULL) { - errmsg = PyUString_FromString("operands could not be broadcast " - "together with shapes "); - if (errmsg == NULL) { - return 0; - } - for (iop = 0; iop < nop; ++iop) { - if (op[iop] != NULL) { - tmp = npyiter_shape_string(PyArray_NDIM(op[iop]), - PyArray_DIMS(op[iop]), - " "); - if (tmp == NULL) { - Py_DECREF(errmsg); - return 0; - } - PyUString_ConcatAndDel(&errmsg, tmp); - if (errmsg == NULL) { - return 0; - } - } - } - if (itershape != NULL) { - tmp = PyUString_FromString("and requested shape "); - if (tmp == NULL) { - Py_DECREF(errmsg); - return 0; - } - PyUString_ConcatAndDel(&errmsg, tmp); - if (errmsg == NULL) { - return 0; - } - - tmp = npyiter_shape_string(ndim, itershape, ""); - if (tmp == NULL) { - Py_DECREF(errmsg); - return 0; - } - PyUString_ConcatAndDel(&errmsg, tmp); - if (errmsg == NULL) { - return 0; - } - - } - PyErr_SetObject(PyExc_ValueError, errmsg); - } - else { - errmsg = PyUString_FromString("operands could not be broadcast " - "together with remapped shapes " - "[original->remapped]: "); - for (iop = 0; iop < nop; ++iop) { - if (op[iop] != NULL) { - int *axes = op_axes[iop]; - - tmpstr = (axes == NULL) ? " " : "->"; - tmp = npyiter_shape_string(PyArray_NDIM(op[iop]), - PyArray_DIMS(op[iop]), - tmpstr); - if (tmp == NULL) { - return 0; - } - PyUString_ConcatAndDel(&errmsg, tmp); - if (errmsg == NULL) { - return 0; - } - - if (axes != NULL) { - for (idim = 0; idim < ndim; ++idim) { - npy_intp i = axes[idim]; - - if (i >= 0 && i < PyArray_NDIM(op[iop])) { - remdims[idim] = PyArray_DIM(op[iop], i); - } - else { - remdims[idim] = -1; - } - } - tmp = npyiter_shape_string(ndim, remdims, " "); - if (tmp == NULL) { - return 0; - } - PyUString_ConcatAndDel(&errmsg, tmp); - if (errmsg == NULL) { - return 0; - } - } - } - } - if (itershape != NULL) { - tmp = PyUString_FromString("and requested shape "); - if (tmp == NULL) { - Py_DECREF(errmsg); - return 0; - } - PyUString_ConcatAndDel(&errmsg, tmp); - if (errmsg == NULL) { - return 0; - } - - tmp = npyiter_shape_string(ndim, itershape, ""); - if (tmp == NULL) { - Py_DECREF(errmsg); - return 0; - } - PyUString_ConcatAndDel(&errmsg, tmp); - if (errmsg == NULL) { - return 0; - } - - } - PyErr_SetObject(PyExc_ValueError, errmsg); - } - - return 0; - } - -operand_different_than_broadcast: { - npy_intp remdims[NPY_MAXDIMS]; - PyObject *errmsg, *tmp; - - /* Start of error message */ - if (op_flags[iop]&NPY_ITER_READONLY) { - errmsg = PyUString_FromString("non-broadcastable operand " - "with shape "); - } - else { - errmsg = PyUString_FromString("non-broadcastable output " - "operand with shape "); - } - if (errmsg == NULL) { - return 0; - } - - /* Operand shape */ - tmp = npyiter_shape_string(PyArray_NDIM(op[iop]), - PyArray_DIMS(op[iop]), ""); - if (tmp == NULL) { - return 0; - } - PyUString_ConcatAndDel(&errmsg, tmp); - if (errmsg == NULL) { - return 0; - } - /* Remapped operand shape */ - if (op_axes != NULL && op_axes[iop] != NULL) { - int *axes = op_axes[iop]; - - for (idim = 0; idim < ndim; ++idim) { - npy_intp i = axes[ndim-idim-1]; - - if (i >= 0 && i < PyArray_NDIM(op[iop])) { - remdims[idim] = PyArray_DIM(op[iop], i); - } - else { - remdims[idim] = -1; - } - } - - tmp = PyUString_FromString(" [remapped to "); - if (tmp == NULL) { - return 0; - } - PyUString_ConcatAndDel(&errmsg, tmp); - if (errmsg == NULL) { - return 0; - } - - tmp = npyiter_shape_string(ndim, remdims, "]"); - if (tmp == NULL) { - return 0; - } - PyUString_ConcatAndDel(&errmsg, tmp); - if (errmsg == NULL) { - return 0; - } - } - - tmp = PyUString_FromString(" doesn't match the broadcast shape "); - if (tmp == NULL) { - return 0; - } - PyUString_ConcatAndDel(&errmsg, tmp); - if (errmsg == NULL) { - return 0; - } - - /* Broadcast shape */ - tmp = npyiter_shape_string(ndim, broadcast_shape, ""); - if (tmp == NULL) { - return 0; - } - PyUString_ConcatAndDel(&errmsg, tmp); - if (errmsg == NULL) { - return 0; - } - - PyErr_SetObject(PyExc_ValueError, errmsg); - - return 0; - } -} - -/* - * Replaces the AXISDATA for the iop'th operand, broadcasting - * the dimensions as necessary. Assumes the replacement array is - * exactly the same shape as the original array used when - * npy_fill_axisdata was called. - * - * If op_axes is not NULL, it should point to an ndim-sized - * array. - */ -static void -npyiter_replace_axisdata(NpyIter *iter, int iop, - PyArrayObject *op, - int op_ndim, char *op_dataptr, - int *op_axes) -{ - npy_uint32 itflags = NIT_ITFLAGS(iter); - int idim, ndim = NIT_NDIM(iter); - int nop = NIT_NOP(iter); - - NpyIter_AxisData *axisdata0, *axisdata; - npy_intp sizeof_axisdata; - npy_int8 *perm; - npy_intp baseoffset = 0; - - perm = NIT_PERM(iter); - axisdata0 = NIT_AXISDATA(iter); - sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop); - - /* - * Replace just the strides which were non-zero, and compute - * the base data address. - */ - axisdata = axisdata0; - - if (op_axes != NULL) { - for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { - npy_int8 p; - int i; - npy_intp shape; - - /* Apply the perm to get the original axis */ - p = perm[idim]; - if (p < 0) { - i = op_axes[ndim+p]; - } - else { - i = op_axes[ndim-p-1]; - } - - if (0 <= i && i < op_ndim) { - shape = PyArray_DIM(op, i); - if (shape != 1) { - npy_intp stride = PyArray_STRIDE(op, i); - if (p < 0) { - /* If the perm entry is negative, flip the axis */ - NAD_STRIDES(axisdata)[iop] = -stride; - baseoffset += stride*(shape-1); - } - else { - NAD_STRIDES(axisdata)[iop] = stride; - } - } - } - } - } - else { - for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { - npy_int8 p; - int i; - npy_intp shape; - - /* Apply the perm to get the original axis */ - p = perm[idim]; - if (p < 0) { - i = op_ndim+p; - } - else { - i = op_ndim-p-1; - } - - if (i >= 0) { - shape = PyArray_DIM(op, i); - if (shape != 1) { - npy_intp stride = PyArray_STRIDE(op, i); - if (p < 0) { - /* If the perm entry is negative, flip the axis */ - NAD_STRIDES(axisdata)[iop] = -stride; - baseoffset += stride*(shape-1); - } - else { - NAD_STRIDES(axisdata)[iop] = stride; - } - } - } - } - } - - op_dataptr += baseoffset; - - /* Now the base data pointer is calculated, set it everywhere it's needed */ - NIT_RESETDATAPTR(iter)[iop] = op_dataptr; - NIT_BASEOFFSETS(iter)[iop] = baseoffset; - axisdata = axisdata0; - for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { - NAD_PTRS(axisdata)[iop] = op_dataptr; - } -} - -/* - * Computes the iterator's index strides and initializes the index values - * to zero. - * - * This must be called before the axes (i.e. the AXISDATA array) may - * be reordered. - */ -static void -npyiter_compute_index_strides(NpyIter *iter, npy_uint32 flags) -{ - npy_uint32 itflags = NIT_ITFLAGS(iter); - int idim, ndim = NIT_NDIM(iter); - int nop = NIT_NOP(iter); - - npy_intp indexstride; - NpyIter_AxisData *axisdata; - npy_intp sizeof_axisdata; - - /* - * If there is only one element being iterated, we just have - * to touch the first AXISDATA because nothing will ever be - * incremented. - */ - if (NIT_ITERSIZE(iter) == 1) { - if (itflags&NPY_ITFLAG_HASINDEX) { - axisdata = NIT_AXISDATA(iter); - NAD_PTRS(axisdata)[nop] = 0; - } - return; - } - - if (flags&NPY_ITER_C_INDEX) { - sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop); - axisdata = NIT_AXISDATA(iter); - indexstride = 1; - for(idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { - npy_intp shape = NAD_SHAPE(axisdata); - - if (shape == 1) { - NAD_STRIDES(axisdata)[nop] = 0; - } - else { - NAD_STRIDES(axisdata)[nop] = indexstride; - } - NAD_PTRS(axisdata)[nop] = 0; - indexstride *= shape; - } - } - else if (flags&NPY_ITER_F_INDEX) { - sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop); - axisdata = NIT_INDEX_AXISDATA(NIT_AXISDATA(iter), ndim-1); - indexstride = 1; - for(idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, -1)) { - npy_intp shape = NAD_SHAPE(axisdata); - - if (shape == 1) { - NAD_STRIDES(axisdata)[nop] = 0; - } - else { - NAD_STRIDES(axisdata)[nop] = indexstride; - } - NAD_PTRS(axisdata)[nop] = 0; - indexstride *= shape; - } - } -} - -/* - * If the order is NPY_KEEPORDER, lets the iterator find the best - * iteration order, otherwise forces it. Indicates in the itflags that - * whether the iteration order was forced. - */ -static void -npyiter_apply_forced_iteration_order(NpyIter *iter, NPY_ORDER order) -{ - /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/ - int ndim = NIT_NDIM(iter); - int iop, nop = NIT_NOP(iter); - - switch (order) { - case NPY_CORDER: - NIT_ITFLAGS(iter) |= NPY_ITFLAG_FORCEDORDER; - break; - case NPY_FORTRANORDER: - NIT_ITFLAGS(iter) |= NPY_ITFLAG_FORCEDORDER; - /* Only need to actually do something if there is more than 1 dim */ - if (ndim > 1) { - npyiter_reverse_axis_ordering(iter); - } - break; - case NPY_ANYORDER: - NIT_ITFLAGS(iter) |= NPY_ITFLAG_FORCEDORDER; - /* Only need to actually do something if there is more than 1 dim */ - if (ndim > 1) { - PyArrayObject **op = NIT_OPERANDS(iter); - int forder = 1; - - /* Check that all the array inputs are fortran order */ - for (iop = 0; iop < nop; ++iop, ++op) { - if (*op && !PyArray_CHKFLAGS(*op, NPY_ARRAY_F_CONTIGUOUS)) { - forder = 0; - break; - } - } - - if (forder) { - npyiter_reverse_axis_ordering(iter); - } - } - break; - case NPY_KEEPORDER: - /* Don't set the forced order flag here... */ - break; - } -} - - -/* - * This function negates any strides in the iterator - * which are negative. When iterating more than one - * object, it only flips strides when they are all - * negative or zero. - */ -static void -npyiter_flip_negative_strides(NpyIter *iter) -{ - npy_uint32 itflags = NIT_ITFLAGS(iter); - int idim, ndim = NIT_NDIM(iter); - int iop, nop = NIT_NOP(iter); - - npy_intp istrides, nstrides = NAD_NSTRIDES(); - NpyIter_AxisData *axisdata, *axisdata0; - npy_intp *baseoffsets; - npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop); - int any_flipped = 0; - - axisdata0 = axisdata = NIT_AXISDATA(iter); - baseoffsets = NIT_BASEOFFSETS(iter); - for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { - npy_intp *strides = NAD_STRIDES(axisdata); - int any_negative = 0; - - /* - * Check the signs of all the strides, excluding - * the index stride at the end. - */ - for (iop = 0; iop < nop; ++iop) { - if (strides[iop] < 0) { - any_negative = 1; - } - else if (strides[iop] != 0) { - break; - } - } - /* - * If at least on stride is negative and none are positive, - * flip all the strides for this dimension. - */ - if (any_negative && iop == nop) { - npy_intp shapem1 = NAD_SHAPE(axisdata) - 1; - - for (istrides = 0; istrides < nstrides; ++istrides) { - npy_intp stride = strides[istrides]; - - /* Adjust the base pointers to start at the end */ - baseoffsets[istrides] += shapem1 * stride; - /* Flip the stride */ - strides[istrides] = -stride; - } - /* - * Make the perm entry negative so get_multi_index - * knows it's flipped - */ - NIT_PERM(iter)[idim] = -1-NIT_PERM(iter)[idim]; - - any_flipped = 1; - } - } - - /* - * If any strides were flipped, the base pointers were adjusted - * in the first AXISDATA, and need to be copied to all the rest - */ - if (any_flipped) { - char **resetdataptr = NIT_RESETDATAPTR(iter); - - for (istrides = 0; istrides < nstrides; ++istrides) { - resetdataptr[istrides] += baseoffsets[istrides]; - } - axisdata = axisdata0; - for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { - char **ptrs = NAD_PTRS(axisdata); - for (istrides = 0; istrides < nstrides; ++istrides) { - ptrs[istrides] = resetdataptr[istrides]; - } - } - /* - * Indicate that some of the perm entries are negative, - * and that it's not (strictly speaking) the identity perm. - */ - NIT_ITFLAGS(iter) = (NIT_ITFLAGS(iter)|NPY_ITFLAG_NEGPERM) & - ~NPY_ITFLAG_IDENTPERM; - } -} - -static void -npyiter_reverse_axis_ordering(NpyIter *iter) -{ - npy_uint32 itflags = NIT_ITFLAGS(iter); - int ndim = NIT_NDIM(iter); - int nop = NIT_NOP(iter); - - npy_intp i, temp, size; - npy_intp *first, *last; - npy_int8 *perm; - - size = NIT_AXISDATA_SIZEOF(itflags, ndim, nop)/NPY_SIZEOF_INTP; - first = (npy_intp*)NIT_AXISDATA(iter); - last = first + (ndim-1)*size; - - /* This loop reverses the order of the AXISDATA array */ - while (first < last) { - for (i = 0; i < size; ++i) { - temp = first[i]; - first[i] = last[i]; - last[i] = temp; - } - first += size; - last -= size; - } - - /* Store the perm we applied */ - perm = NIT_PERM(iter); - for(i = ndim-1; i >= 0; --i, ++perm) { - *perm = (npy_int8)i; - } - - NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_IDENTPERM; -} - -static npy_intp intp_abs(npy_intp x) -{ - return (x < 0) ? -x : x; -} - -static void -npyiter_find_best_axis_ordering(NpyIter *iter) -{ - npy_uint32 itflags = NIT_ITFLAGS(iter); - int idim, ndim = NIT_NDIM(iter); - int iop, nop = NIT_NOP(iter); - - npy_intp ax_i0, ax_i1, ax_ipos; - npy_int8 ax_j0, ax_j1; - npy_int8 *perm; - NpyIter_AxisData *axisdata = NIT_AXISDATA(iter); - npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop); - int permuted = 0; - - perm = NIT_PERM(iter); - - /* - * Do a custom stable insertion sort. Note that because - * the AXISDATA has been reversed from C order, this - * is sorting from smallest stride to biggest stride. - */ - for (ax_i0 = 1; ax_i0 < ndim; ++ax_i0) { - npy_intp *strides0; - - /* 'ax_ipos' is where perm[ax_i0] will get inserted */ - ax_ipos = ax_i0; - ax_j0 = perm[ax_i0]; - - strides0 = NAD_STRIDES(NIT_INDEX_AXISDATA(axisdata, ax_j0)); - for (ax_i1 = ax_i0-1; ax_i1 >= 0; --ax_i1) { - int ambig = 1, shouldswap = 0; - npy_intp *strides1; - - ax_j1 = perm[ax_i1]; - - strides1 = NAD_STRIDES(NIT_INDEX_AXISDATA(axisdata, ax_j1)); - - for (iop = 0; iop < nop; ++iop) { - if (strides0[iop] != 0 && strides1[iop] != 0) { - if (intp_abs(strides1[iop]) <= - intp_abs(strides0[iop])) { - /* - * Set swap even if it's not ambiguous already, - * because in the case of conflicts between - * different operands, C-order wins. - */ - shouldswap = 0; - } - else { - /* Only set swap if it's still ambiguous */ - if (ambig) { - shouldswap = 1; - } - } - - /* - * A comparison has been done, so it's - * no longer ambiguous - */ - ambig = 0; - } - } - /* - * If the comparison was unambiguous, either shift - * 'ax_ipos' to 'ax_i1' or stop looking for an insertion - * point - */ - if (!ambig) { - if (shouldswap) { - ax_ipos = ax_i1; - } - else { - break; - } - } - } - - /* Insert perm[ax_i0] into the right place */ - if (ax_ipos != ax_i0) { - for (ax_i1 = ax_i0; ax_i1 > ax_ipos; --ax_i1) { - perm[ax_i1] = perm[ax_i1-1]; - } - perm[ax_ipos] = ax_j0; - permuted = 1; - } - } - - /* Apply the computed permutation to the AXISDATA array */ - if (permuted == 1) { - npy_intp i, size = sizeof_axisdata/NPY_SIZEOF_INTP; - NpyIter_AxisData *ad_i; - - /* Use the index as a flag, set each to 1 */ - ad_i = axisdata; - for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(ad_i, 1)) { - NAD_INDEX(ad_i) = 1; - } - /* Apply the permutation by following the cycles */ - for (idim = 0; idim < ndim; ++idim) { - ad_i = NIT_INDEX_AXISDATA(axisdata, idim); - - /* If this axis hasn't been touched yet, process it */ - if (NAD_INDEX(ad_i) == 1) { - npy_int8 pidim = perm[idim]; - npy_intp tmp; - NpyIter_AxisData *ad_p, *ad_q; - - if (pidim != idim) { - /* Follow the cycle, copying the data */ - for (i = 0; i < size; ++i) { - pidim = perm[idim]; - ad_q = ad_i; - tmp = *((npy_intp*)ad_q + i); - while (pidim != idim) { - ad_p = NIT_INDEX_AXISDATA(axisdata, pidim); - *((npy_intp*)ad_q + i) = *((npy_intp*)ad_p + i); - - ad_q = ad_p; - pidim = perm[(int)pidim]; - } - *((npy_intp*)ad_q + i) = tmp; - } - /* Follow the cycle again, marking it as done */ - pidim = perm[idim]; - - while (pidim != idim) { - NAD_INDEX(NIT_INDEX_AXISDATA(axisdata, pidim)) = 0; - pidim = perm[(int)pidim]; - } - } - NAD_INDEX(ad_i) = 0; - } - } - /* Clear the identity perm flag */ - NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_IDENTPERM; - } -} - -static void +NPY_NO_EXPORT void npyiter_coalesce_axes(NpyIter *iter) { npy_uint32 itflags = NIT_ITFLAGS(iter); @@ -4432,748 +1949,13 @@ npyiter_coalesce_axes(NpyIter *iter) } /* - * Allocates a temporary array which can be used to replace op - * in the iteration. Its dtype will be op_dtype. - * - * The result array has a memory ordering which matches the iterator, - * which may or may not match that of op. The parameter 'shape' may be - * NULL, in which case it is filled in from the iterator's shape. - * - * This function must be called before any axes are coalesced. - */ -static PyArrayObject * -npyiter_new_temp_array(NpyIter *iter, PyTypeObject *subtype, - npy_uint32 flags, char *op_itflags, - int op_ndim, npy_intp *shape, - PyArray_Descr *op_dtype, int *op_axes) -{ - npy_uint32 itflags = NIT_ITFLAGS(iter); - int idim, ndim = NIT_NDIM(iter); - int nop = NIT_NOP(iter); - - npy_int8 *perm = NIT_PERM(iter); - npy_intp new_shape[NPY_MAXDIMS], strides[NPY_MAXDIMS], - stride = op_dtype->elsize; - char reversestride[NPY_MAXDIMS], anyreverse = 0; - NpyIter_AxisData *axisdata; - npy_intp sizeof_axisdata; - npy_intp i; - - PyArrayObject *ret; - - /* If it's a scalar, don't need to check the axes */ - if (op_ndim == 0) { - Py_INCREF(op_dtype); - ret = (PyArrayObject *)PyArray_NewFromDescr(subtype, op_dtype, 0, - NULL, NULL, NULL, 0, NULL); - - /* Double-check that the subtype didn't mess with the dimensions */ - if (PyArray_NDIM(ret) != 0) { - PyErr_SetString(PyExc_RuntimeError, - "Iterator automatic output has an array subtype " - "which changed the dimensions of the output"); - Py_DECREF(ret); - return NULL; - } - - return ret; - } - - axisdata = NIT_AXISDATA(iter); - sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop); - - memset(reversestride, 0, NPY_MAXDIMS); - /* Initialize the strides to invalid values */ - for (i = 0; i < NPY_MAXDIMS; ++i) { - strides[i] = NPY_MAX_INTP; - } - - if (op_axes != NULL) { - for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { - npy_int8 p; - - /* Apply the perm to get the original axis */ - p = perm[idim]; - if (p < 0) { - i = op_axes[ndim+p]; - } - else { - i = op_axes[ndim-p-1]; - } - - if (i >= 0) { - NPY_IT_DBG_PRINT3("Iterator: Setting allocated stride %d " - "for iterator dimension %d to %d\n", (int)i, - (int)idim, (int)stride); - strides[i] = stride; - if (p < 0) { - reversestride[i] = 1; - anyreverse = 1; - } - else { - reversestride[i] = 0; - } - if (shape == NULL) { - new_shape[i] = NAD_SHAPE(axisdata); - stride *= new_shape[i]; - if (i >= ndim) { - PyErr_SetString(PyExc_ValueError, - "automatically allocated output array " - "specified with an inconsistent axis mapping"); - return NULL; - } - } - else { - stride *= shape[i]; - } - } - else { - if (shape == NULL) { - /* - * If deleting this axis produces a reduction, but - * reduction wasn't enabled, throw an error - */ - if (NAD_SHAPE(axisdata) != 1) { - if (!(flags&NPY_ITER_REDUCE_OK)) { - PyErr_SetString(PyExc_ValueError, - "output requires a reduction, but " - "reduction is not enabled"); - return NULL; - } - if (!((*op_itflags)&NPY_OP_ITFLAG_READ)) { - PyErr_SetString(PyExc_ValueError, - "output requires a reduction, but " - "is flagged as write-only, not read-write"); - return NULL; - } - - NPY_IT_DBG_PRINT("Iterator: Indicating that a " - "reduction is occurring\n"); - /* Indicate that a reduction is occurring */ - NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE; - (*op_itflags) |= NPY_OP_ITFLAG_REDUCE; - } - } - } - } - } - else { - for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { - npy_int8 p; - - /* Apply the perm to get the original axis */ - p = perm[idim]; - if (p < 0) { - i = op_ndim + p; - } - else { - i = op_ndim - p - 1; - } - - if (i >= 0) { - NPY_IT_DBG_PRINT3("Iterator: Setting allocated stride %d " - "for iterator dimension %d to %d\n", (int)i, - (int)idim, (int)stride); - strides[i] = stride; - if (p < 0) { - reversestride[i] = 1; - anyreverse = 1; - } - else { - reversestride[i] = 0; - } - if (shape == NULL) { - new_shape[i] = NAD_SHAPE(axisdata); - stride *= new_shape[i]; - } - else { - stride *= shape[i]; - } - } - } - } - - /* - * If custom axes were specified, some dimensions may not have been used. - * Add the REDUCE itflag if this creates a reduction situation. - */ - if (shape == NULL) { - /* Ensure there are no dimension gaps in op_axes, and find op_ndim */ - op_ndim = ndim; - if (op_axes != NULL) { - for (i = 0; i < ndim; ++i) { - if (strides[i] == NPY_MAX_INTP) { - if (op_ndim == ndim) { - op_ndim = i; - } - } - /* - * If there's a gap in the array's dimensions, it's an error. - * For example, op_axes of [0,2] for the automatically - * allocated output. - */ - else if (op_ndim != ndim) { - PyErr_SetString(PyExc_ValueError, - "automatically allocated output array " - "specified with an inconsistent axis mapping"); - return NULL; - } - } - } - } - else { - for (i = 0; i < op_ndim; ++i) { - if (strides[i] == NPY_MAX_INTP) { - npy_intp factor, new_strides[NPY_MAXDIMS], - itemsize; - - /* Fill in the missing strides in C order */ - factor = 1; - itemsize = op_dtype->elsize; - for (i = op_ndim-1; i >= 0; --i) { - if (strides[i] == NPY_MAX_INTP) { - new_strides[i] = factor * itemsize; - factor *= shape[i]; - } - } - - /* - * Copy the missing strides, and multiply the existing strides - * by the calculated factor. This way, the missing strides - * are tighter together in memory, which is good for nested - * loops. - */ - for (i = 0; i < op_ndim; ++i) { - if (strides[i] == NPY_MAX_INTP) { - strides[i] = new_strides[i]; - } - else { - strides[i] *= factor; - } - } - - break; - } - } - } - - /* If shape was NULL, set it to the shape we calculated */ - if (shape == NULL) { - shape = new_shape; - } - - /* Allocate the temporary array */ - Py_INCREF(op_dtype); - ret = (PyArrayObject *)PyArray_NewFromDescr(subtype, op_dtype, op_ndim, - shape, strides, NULL, 0, NULL); - if (ret == NULL) { - return NULL; - } - - /* If there are any reversed axes, create a view that reverses them */ - if (anyreverse) { - char *dataptr = PyArray_DATA(ret); - PyArrayObject *newret; - - for (idim = 0; idim < op_ndim; ++idim) { - if (reversestride[idim]) { - dataptr += strides[idim]*(shape[idim]-1); - strides[idim] = -strides[idim]; - } - } - Py_INCREF(op_dtype); - newret = (PyArrayObject *)PyArray_NewFromDescr(subtype, - op_dtype, op_ndim, - shape, strides, dataptr, - NPY_ARRAY_WRITEABLE, NULL); - if (newret == NULL) { - Py_DECREF(ret); - return NULL; - } - newret->base = (PyObject *)ret; - ret = newret; - } - - /* Make sure all the flags are good */ - PyArray_UpdateFlags(ret, NPY_ARRAY_UPDATE_ALL); - - /* Double-check that the subtype didn't mess with the dimensions */ - if (subtype != &PyArray_Type) { - if (PyArray_NDIM(ret) != op_ndim || - !PyArray_CompareLists(shape, PyArray_DIMS(ret), op_ndim)) { - PyErr_SetString(PyExc_RuntimeError, - "Iterator automatic output has an array subtype " - "which changed the dimensions of the output"); - Py_DECREF(ret); - return NULL; - } - } - - return ret; -} - -static int -npyiter_allocate_arrays(NpyIter *iter, - npy_uint32 flags, - PyArray_Descr **op_dtype, PyTypeObject *subtype, - npy_uint32 *op_flags, char *op_itflags, - int **op_axes, int output_scalars) -{ - npy_uint32 itflags = NIT_ITFLAGS(iter); - int idim, ndim = NIT_NDIM(iter); - int iop, nop = NIT_NOP(iter); - - NpyIter_BufferData *bufferdata = NULL; - PyArrayObject **op = NIT_OPERANDS(iter); - - if (itflags&NPY_ITFLAG_BUFFER) { - bufferdata = NIT_BUFFERDATA(iter); - } - - - for (iop = 0; iop < nop; ++iop) { - /* NULL means an output the iterator should allocate */ - if (op[iop] == NULL) { - PyArrayObject *out; - PyTypeObject *op_subtype; - int ondim = output_scalars ? 0 : ndim; - - /* Check whether the subtype was disabled */ - op_subtype = (op_flags[iop]&NPY_ITER_NO_SUBTYPE) ? - &PyArray_Type : subtype; - - /* Allocate the output array */ - out = npyiter_new_temp_array(iter, op_subtype, - flags, &op_itflags[iop], - ondim, - NULL, - op_dtype[iop], - op_axes ? op_axes[iop] : NULL); - if (out == NULL) { - return 0; - } - - op[iop] = out; - - /* - * Now we need to replace the pointers and strides with values - * from the new array. - */ - npyiter_replace_axisdata(iter, iop, op[iop], ondim, - PyArray_DATA(op[iop]), op_axes ? op_axes[iop] : NULL); - - /* New arrays are aligned and need no cast */ - op_itflags[iop] |= NPY_OP_ITFLAG_ALIGNED; - op_itflags[iop] &= ~NPY_OP_ITFLAG_CAST; - } - /* - * If casting is required, the operand is read-only, and - * it's an array scalar, make a copy whether or not the - * copy flag is enabled. - */ - else if ((op_itflags[iop]&(NPY_OP_ITFLAG_CAST| - NPY_OP_ITFLAG_READ| - NPY_OP_ITFLAG_WRITE)) == (NPY_OP_ITFLAG_CAST| - NPY_OP_ITFLAG_READ) && - PyArray_NDIM(op[iop]) == 0) { - PyArrayObject *temp; - Py_INCREF(op_dtype[iop]); - temp = (PyArrayObject *)PyArray_NewFromDescr( - &PyArray_Type, op_dtype[iop], - 0, NULL, NULL, NULL, 0, NULL); - if (temp == NULL) { - return 0; - } - if (PyArray_CopyInto(temp, op[iop]) != 0) { - Py_DECREF(temp); - return 0; - } - Py_DECREF(op[iop]); - op[iop] = temp; - - /* - * Now we need to replace the pointers and strides with values - * from the temporary array. - */ - npyiter_replace_axisdata(iter, iop, op[iop], 0, - PyArray_DATA(op[iop]), NULL); - - /* - * New arrays are aligned need no cast, and in the case - * of scalars, always have stride 0 so never need buffering - */ - op_itflags[iop] |= (NPY_OP_ITFLAG_ALIGNED| - NPY_OP_ITFLAG_BUFNEVER); - op_itflags[iop] &= ~NPY_OP_ITFLAG_CAST; - if (itflags&NPY_ITFLAG_BUFFER) { - NBF_STRIDES(bufferdata)[iop] = 0; - } - } - /* If casting is required and permitted */ - else if ((op_itflags[iop]&NPY_OP_ITFLAG_CAST) && - (op_flags[iop]&(NPY_ITER_COPY|NPY_ITER_UPDATEIFCOPY))) { - PyArrayObject *temp; - int ondim = PyArray_NDIM(op[iop]); - - /* Allocate the temporary array, if possible */ - temp = npyiter_new_temp_array(iter, &PyArray_Type, - flags, &op_itflags[iop], - ondim, - PyArray_DIMS(op[iop]), - op_dtype[iop], - op_axes ? op_axes[iop] : NULL); - if (temp == NULL) { - return 0; - } - - /* If the data will be read, copy it into temp */ - if (op_itflags[iop]&NPY_OP_ITFLAG_READ) { - if (PyArray_CopyInto(temp, op[iop]) != 0) { - Py_DECREF(temp); - return 0; - } - } - /* If the data will be written to, set UPDATEIFCOPY */ - if (op_itflags[iop]&NPY_OP_ITFLAG_WRITE) { - PyArray_FLAGS(temp) |= NPY_ARRAY_UPDATEIFCOPY; - PyArray_FLAGS(op[iop]) &= ~NPY_ARRAY_WRITEABLE; - Py_INCREF(op[iop]); - temp->base = (PyObject *)op[iop]; - } - - Py_DECREF(op[iop]); - op[iop] = temp; - - /* - * Now we need to replace the pointers and strides with values - * from the temporary array. - */ - npyiter_replace_axisdata(iter, iop, op[iop], ondim, - PyArray_DATA(op[iop]), op_axes ? op_axes[iop] : NULL); - - /* The temporary copy is aligned and needs no cast */ - op_itflags[iop] |= NPY_OP_ITFLAG_ALIGNED; - op_itflags[iop] &= ~NPY_OP_ITFLAG_CAST; - } - else { - /* - * Buffering must be enabled for casting/conversion if copy - * wasn't specified. - */ - if ((op_itflags[iop]&NPY_OP_ITFLAG_CAST) && - !(itflags&NPY_ITFLAG_BUFFER)) { - PyErr_SetString(PyExc_TypeError, - "Iterator operand required copying or buffering, " - "but neither copying nor buffering was enabled"); - return 0; - } - - /* - * If the operand is aligned, any buffering can use aligned - * optimizations. - */ - if (PyArray_ISALIGNED(op[iop])) { - op_itflags[iop] |= NPY_OP_ITFLAG_ALIGNED; - } - } - - /* Here we can finally check for contiguous iteration */ - if (op_flags[iop]&NPY_ITER_CONTIG) { - NpyIter_AxisData *axisdata = NIT_AXISDATA(iter); - npy_intp stride = NAD_STRIDES(axisdata)[iop]; - - if (stride != op_dtype[iop]->elsize) { - NPY_IT_DBG_PRINT("Iterator: Setting NPY_OP_ITFLAG_CAST " - "because of NPY_ITER_CONTIG\n"); - op_itflags[iop] |= NPY_OP_ITFLAG_CAST; - if (!(itflags&NPY_ITFLAG_BUFFER)) { - PyErr_SetString(PyExc_TypeError, - "Iterator operand required buffering, " - "to be contiguous as requested, but " - "buffering is not enabled"); - return 0; - } - } - } - - /* - * If no alignment, byte swap, or casting is needed, and - * the inner stride of this operand works for the whole - * array, we can set NPY_OP_ITFLAG_BUFNEVER. - */ - if ((itflags&NPY_ITFLAG_BUFFER) && !(op_itflags[iop]&NPY_OP_ITFLAG_CAST)) { - NpyIter_AxisData *axisdata = NIT_AXISDATA(iter); - if (ndim == 1) { - op_itflags[iop] |= NPY_OP_ITFLAG_BUFNEVER; - NBF_STRIDES(bufferdata)[iop] = NAD_STRIDES(axisdata)[iop]; - } - else if (PyArray_NDIM(op[iop]) > 0) { - npy_intp stride, shape, innerstride = 0, innershape; - npy_intp sizeof_axisdata = - NIT_AXISDATA_SIZEOF(itflags, ndim, nop); - /* Find stride of the first non-empty shape */ - for (idim = 0; idim < ndim; ++idim) { - innershape = NAD_SHAPE(axisdata); - if (innershape != 1) { - innerstride = NAD_STRIDES(axisdata)[iop]; - break; - } - NIT_ADVANCE_AXISDATA(axisdata, 1); - } - ++idim; - NIT_ADVANCE_AXISDATA(axisdata, 1); - /* Check that everything could have coalesced together */ - for (; idim < ndim; ++idim) { - stride = NAD_STRIDES(axisdata)[iop]; - shape = NAD_SHAPE(axisdata); - if (shape != 1) { - /* - * If N times the inner stride doesn't equal this - * stride, the multi-dimensionality is needed. - */ - if (innerstride*innershape != stride) { - break; - } - else { - innershape *= shape; - } - } - NIT_ADVANCE_AXISDATA(axisdata, 1); - } - /* - * If we looped all the way to the end, one stride works. - * Set that stride, because it may not belong to the first - * dimension. - */ - if (idim == ndim) { - op_itflags[iop] |= NPY_OP_ITFLAG_BUFNEVER; - NBF_STRIDES(bufferdata)[iop] = innerstride; - } - } - } - } - - return 1; -} - -/* - * The __array_priority__ attribute of the inputs determines - * the subtype of any output arrays. This function finds the - * subtype of the input array with highest priority. - */ -static void -npyiter_get_priority_subtype(int nop, PyArrayObject **op, - char *op_itflags, - double *subtype_priority, - PyTypeObject **subtype) -{ - int iop; - - for (iop = 0; iop < nop; ++iop) { - if (op[iop] != NULL && op_itflags[iop]&NPY_OP_ITFLAG_READ) { - double priority = PyArray_GetPriority((PyObject *)op[iop], 0.0); - if (priority > *subtype_priority) { - *subtype_priority = priority; - *subtype = Py_TYPE(op[iop]); - } - } - } -} - -/* - * Calculates a dtype that all the types can be promoted to, using the - * ufunc rules. If only_inputs is 1, it leaves any operands that - * are not read from out of the calculation. - */ -static PyArray_Descr * -npyiter_get_common_dtype(int nop, PyArrayObject **op, - char *op_itflags, PyArray_Descr **op_dtype, - PyArray_Descr **op_request_dtypes, - int only_inputs, int output_scalars) -{ - int iop; - npy_intp narrs = 0, ndtypes = 0; - PyArrayObject *arrs[NPY_MAXARGS]; - PyArray_Descr *dtypes[NPY_MAXARGS]; - PyArray_Descr *ret; - - NPY_IT_DBG_PRINT("Iterator: Getting a common data type from operands\n"); - - for (iop = 0; iop < nop; ++iop) { - if (op_dtype[iop] != NULL && - (!only_inputs || (op_itflags[iop]&NPY_OP_ITFLAG_READ))) { - /* If no dtype was requested and the op is a scalar, pass the op */ - if ((op_request_dtypes == NULL || - op_request_dtypes[iop] == NULL) && - PyArray_NDIM(op[iop]) == 0) { - arrs[narrs++] = op[iop]; - } - /* Otherwise just pass in the dtype */ - else { - dtypes[ndtypes++] = op_dtype[iop]; - } - } - } - - if (narrs == 0) { - npy_intp i; - ret = dtypes[0]; - for (i = 1; i < ndtypes; ++i) { - if (ret != dtypes[i]) - break; - } - if (i == ndtypes) { - if (ndtypes == 1 || PyArray_ISNBO(ret->byteorder)) { - Py_INCREF(ret); - } - else { - ret = PyArray_DescrNewByteorder(ret, NPY_NATIVE); - } - } - else { - ret = PyArray_ResultType(narrs, arrs, ndtypes, dtypes); - } - } - else { - ret = PyArray_ResultType(narrs, arrs, ndtypes, dtypes); - } - - return ret; -} - -static int -npyiter_allocate_transfer_functions(NpyIter *iter) -{ - npy_uint32 itflags = NIT_ITFLAGS(iter); - /*int ndim = NIT_NDIM(iter);*/ - int iop = 0, nop = NIT_NOP(iter); - - npy_intp i; - char *op_itflags = NIT_OPITFLAGS(iter); - NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter); - NpyIter_AxisData *axisdata = NIT_AXISDATA(iter); - PyArrayObject **op = NIT_OPERANDS(iter); - PyArray_Descr **op_dtype = NIT_DTYPES(iter); - npy_intp *strides = NAD_STRIDES(axisdata), op_stride; - PyArray_StridedTransferFn **readtransferfn = NBF_READTRANSFERFN(bufferdata), - **writetransferfn = NBF_WRITETRANSFERFN(bufferdata); - NpyAuxData **readtransferdata = NBF_READTRANSFERDATA(bufferdata), - **writetransferdata = NBF_WRITETRANSFERDATA(bufferdata); - - PyArray_StridedTransferFn *stransfer = NULL; - NpyAuxData *transferdata = NULL; - int needs_api = 0; - - for (iop = 0; iop < nop; ++iop) { - char flags = op_itflags[iop]; - /* - * Reduction operands may be buffered with a different stride, - * so we must pass NPY_MAX_INTP to the transfer function factory. - */ - op_stride = (flags&NPY_OP_ITFLAG_REDUCE) ? NPY_MAX_INTP : - strides[iop]; - - /* - * If we have determined that a buffer may be needed, - * allocate the appropriate transfer functions - */ - if (!(flags&NPY_OP_ITFLAG_BUFNEVER)) { - if (flags&NPY_OP_ITFLAG_READ) { - int move_references = 0; - if (PyArray_GetDTypeTransferFunction( - (flags&NPY_OP_ITFLAG_ALIGNED) != 0, - op_stride, - op_dtype[iop]->elsize, - PyArray_DESCR(op[iop]), - op_dtype[iop], - move_references, - &stransfer, - &transferdata, - &needs_api) != NPY_SUCCEED) { - goto fail; - } - readtransferfn[iop] = stransfer; - readtransferdata[iop] = transferdata; - } - else { - readtransferfn[iop] = NULL; - } - if (flags&NPY_OP_ITFLAG_WRITE) { - int move_references = 1; - if (PyArray_GetDTypeTransferFunction( - (flags&NPY_OP_ITFLAG_ALIGNED) != 0, - op_dtype[iop]->elsize, - op_stride, - op_dtype[iop], - PyArray_DESCR(op[iop]), - move_references, - &stransfer, - &transferdata, - &needs_api) != NPY_SUCCEED) { - goto fail; - } - writetransferfn[iop] = stransfer; - writetransferdata[iop] = transferdata; - } - /* If no write back but there are references make a decref fn */ - else if (PyDataType_REFCHK(op_dtype[iop])) { - /* - * By passing NULL to dst_type and setting move_references - * to 1, we get back a function that just decrements the - * src references. - */ - if (PyArray_GetDTypeTransferFunction( - (flags&NPY_OP_ITFLAG_ALIGNED) != 0, - op_dtype[iop]->elsize, 0, - op_dtype[iop], NULL, - 1, - &stransfer, - &transferdata, - &needs_api) != NPY_SUCCEED) { - goto fail; - } - writetransferfn[iop] = stransfer; - writetransferdata[iop] = transferdata; - } - else { - writetransferfn[iop] = NULL; - } - } - else { - readtransferfn[iop] = NULL; - writetransferfn[iop] = NULL; - } - } - - /* If any of the dtype transfer functions needed the API, flag it */ - if (needs_api) { - NIT_ITFLAGS(iter) |= NPY_ITFLAG_NEEDSAPI; - } - - return 1; - -fail: - for (i = 0; i < iop; ++i) { - if (readtransferdata[iop] != NULL) { - NPY_AUXDATA_FREE(readtransferdata[iop]); - readtransferdata[iop] = NULL; - } - if (writetransferdata[iop] != NULL) { - NPY_AUXDATA_FREE(writetransferdata[iop]); - writetransferdata[iop] = NULL; - } - } - return 0; -} - -/* * * If errmsg is non-NULL, it should point to a variable which will * receive the error message, and no Python exception will be set. * This is so that the function can be called from code not holding * the GIL. */ -static int +NPY_NO_EXPORT int npyiter_allocate_buffers(NpyIter *iter, char **errmsg) { /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/ @@ -5227,7 +2009,7 @@ fail: * iterindex, updating the pointers as well. This function does * no error checking. */ -static void +NPY_NO_EXPORT void npyiter_goto_iterindex(NpyIter *iter, npy_intp iterindex) { npy_uint32 itflags = NIT_ITFLAGS(iter); @@ -5310,7 +2092,7 @@ npyiter_goto_iterindex(NpyIter *iter, npy_intp iterindex) * their data needs to be written back to the arrays. The multi-index * must be positioned for the beginning of the buffer. */ -static void +NPY_NO_EXPORT void npyiter_copy_from_buffers(NpyIter *iter) { npy_uint32 itflags = NIT_ITFLAGS(iter); @@ -5476,7 +2258,7 @@ npyiter_copy_from_buffers(NpyIter *iter) * for the start of a buffer. It decides which operands need a buffer, * and copies the data into the buffers. */ -static void +NPY_NO_EXPORT void npyiter_copy_to_buffers(NpyIter *iter, char **prev_dataptrs) { npy_uint32 itflags = NIT_ITFLAGS(iter); @@ -6405,3 +3187,5 @@ NpyIter_DebugPrint(NpyIter *iter) PyGILState_Release(gilstate); } + +#undef NPY_ITERATOR_IMPLEMENTATION_CODE diff --git a/numpy/core/src/multiarray/nditer_constr.c b/numpy/core/src/multiarray/nditer_constr.c new file mode 100644 index 000000000..bc5d9e9dc --- /dev/null +++ b/numpy/core/src/multiarray/nditer_constr.c @@ -0,0 +1,2956 @@ +/* + * This file implements the construction, copying, and destruction + * aspects of NumPy's nditer. + * + * Copyright (c) 2010-2011 by Mark Wiebe (mwwiebe@gmail.com) + * The Univerity of British Columbia + * + * Copyright (c) 2011 Enthought, Inc + * + * See LICENSE.txt for the license. + */ + +/* Indicate that this .c file is allowed to include the header */ +#define NPY_ITERATOR_IMPLEMENTATION_CODE +#include "nditer_impl.h" + +/* Internal helper functions private to this file */ +static int +npyiter_check_global_flags(npy_uint32 flags, npy_uint32* itflags); +static int +npyiter_check_op_axes(int nop, int oa_ndim, int **op_axes, + npy_intp *itershape); +static int +npyiter_calculate_ndim(int nop, PyArrayObject **op_in, + int oa_ndim); +static int +npyiter_check_per_op_flags(npy_uint32 flags, char *op_itflags); +static int +npyiter_prepare_one_operand(PyArrayObject **op, + char **op_dataptr, + PyArray_Descr *op_request_dtype, + PyArray_Descr** op_dtype, + npy_uint32 flags, + npy_uint32 op_flags, char *op_itflags); +static int +npyiter_prepare_operands(int nop, PyArrayObject **op_in, + PyArrayObject **op, + char **op_dataptr, + PyArray_Descr **op_request_dtypes, + PyArray_Descr **op_dtype, + npy_uint32 flags, + npy_uint32 *op_flags, char *op_itflags); +static int +npyiter_check_casting(int nop, PyArrayObject **op, + PyArray_Descr **op_dtype, + NPY_CASTING casting, + char *op_itflags); +static int +npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, char *op_itflags, + char **op_dataptr, + npy_uint32 *op_flags, int **op_axes, + npy_intp *itershape, + int output_scalars); +static void +npyiter_replace_axisdata(NpyIter *iter, int iop, + PyArrayObject *op, + int op_ndim, char *op_dataptr, + int *op_axes); +static void +npyiter_compute_index_strides(NpyIter *iter, npy_uint32 flags); +static void +npyiter_apply_forced_iteration_order(NpyIter *iter, NPY_ORDER order); +static void +npyiter_flip_negative_strides(NpyIter *iter); +static void +npyiter_reverse_axis_ordering(NpyIter *iter); +static void +npyiter_find_best_axis_ordering(NpyIter *iter); +static PyArray_Descr * +npyiter_get_common_dtype(int nop, PyArrayObject **op, + char *op_itflags, PyArray_Descr **op_dtype, + PyArray_Descr **op_request_dtypes, + int only_inputs, int output_scalars); +static PyArrayObject * +npyiter_new_temp_array(NpyIter *iter, PyTypeObject *subtype, + npy_uint32 flags, char *op_itflags, + int op_ndim, npy_intp *shape, + PyArray_Descr *op_dtype, int *op_axes); +static int +npyiter_allocate_arrays(NpyIter *iter, + npy_uint32 flags, + PyArray_Descr **op_dtype, PyTypeObject *subtype, + npy_uint32 *op_flags, char *op_itflags, + int **op_axes, int output_scalars); +static void +npyiter_get_priority_subtype(int nop, PyArrayObject **op, + char *op_itflags, + double *subtype_priority, PyTypeObject **subtype); +static int +npyiter_allocate_transfer_functions(NpyIter *iter); + + +/*NUMPY_API + * Allocate a new iterator for multiple array objects, and advanced + * options for controlling the broadcasting, shape, and buffer size. + */ +NPY_NO_EXPORT NpyIter * +NpyIter_AdvancedNew(int nop, PyArrayObject **op_in, npy_uint32 flags, + NPY_ORDER order, NPY_CASTING casting, + npy_uint32 *op_flags, + PyArray_Descr **op_request_dtypes, + int oa_ndim, int **op_axes, npy_intp *itershape, + npy_intp buffersize) +{ + npy_uint32 itflags = NPY_ITFLAG_IDENTPERM; + int idim, ndim; + int iop; + + /* The iterator being constructed */ + NpyIter *iter; + + /* Per-operand values */ + PyArrayObject **op; + PyArray_Descr **op_dtype; + char *op_itflags; + char **op_dataptr; + + npy_int8 *perm; + NpyIter_BufferData *bufferdata = NULL; + int any_allocate = 0, any_missing_dtypes = 0, + output_scalars = 0, need_subtype = 0; + + /* The subtype for automatically allocated outputs */ + double subtype_priority = NPY_PRIORITY; + PyTypeObject *subtype = &PyArray_Type; + +#if NPY_IT_CONSTRUCTION_TIMING + npy_intp c_temp, + c_start, + c_check_op_axes, + c_check_global_flags, + c_calculate_ndim, + c_malloc, + c_prepare_operands, + c_fill_axisdata, + c_compute_index_strides, + c_apply_forced_iteration_order, + c_find_best_axis_ordering, + c_get_priority_subtype, + c_find_output_common_dtype, + c_check_casting, + c_allocate_arrays, + c_coalesce_axes, + c_prepare_buffers; +#endif + + NPY_IT_TIME_POINT(c_start); + + if (nop > NPY_MAXARGS) { + PyErr_Format(PyExc_ValueError, + "Cannot construct an iterator with more than %d operands " + "(%d were requested)", (int)NPY_MAXARGS, (int)nop); + return NULL; + } + + /* Error check 'oa_ndim' and 'op_axes', which must be used together */ + if (!npyiter_check_op_axes(nop, oa_ndim, op_axes, itershape)) { + return NULL; + } + + NPY_IT_TIME_POINT(c_check_op_axes); + + /* Check the global iterator flags */ + if (!npyiter_check_global_flags(flags, &itflags)) { + return NULL; + } + + NPY_IT_TIME_POINT(c_check_global_flags); + + /* Calculate how many dimensions the iterator should have */ + ndim = npyiter_calculate_ndim(nop, op_in, oa_ndim); + + /* If 'ndim' is zero, any outputs should be scalars */ + if (ndim == 0) { + output_scalars = 1; + ndim = 1; + } + + NPY_IT_TIME_POINT(c_calculate_ndim); + + /* Allocate memory for the iterator */ + iter = (NpyIter*) + PyArray_malloc(NIT_SIZEOF_ITERATOR(itflags, ndim, nop)); + + NPY_IT_TIME_POINT(c_malloc); + + /* Fill in the basic data */ + NIT_ITFLAGS(iter) = itflags; + NIT_NDIM(iter) = ndim; + NIT_NOP(iter) = nop; + NIT_ITERINDEX(iter) = 0; + memset(NIT_BASEOFFSETS(iter), 0, (nop+1)*NPY_SIZEOF_INTP); + + op = NIT_OPERANDS(iter); + op_dtype = NIT_DTYPES(iter); + op_itflags = NIT_OPITFLAGS(iter); + op_dataptr = NIT_RESETDATAPTR(iter); + + /* Prepare all the operands */ + if (!npyiter_prepare_operands(nop, op_in, op, op_dataptr, + op_request_dtypes, op_dtype, + flags, + op_flags, op_itflags)) { + PyArray_free(iter); + return NULL; + } + /* Set resetindex to zero as well (it's just after the resetdataptr) */ + op_dataptr[nop] = 0; + + NPY_IT_TIME_POINT(c_prepare_operands); + + /* + * Initialize buffer data (must set the buffers and transferdata + * to NULL before we might deallocate the iterator). + */ + if (itflags&NPY_ITFLAG_BUFFER) { + bufferdata = NIT_BUFFERDATA(iter); + NBF_SIZE(bufferdata) = 0; + memset(NBF_BUFFERS(bufferdata), 0, nop*NPY_SIZEOF_INTP); + memset(NBF_READTRANSFERDATA(bufferdata), 0, nop*NPY_SIZEOF_INTP); + memset(NBF_WRITETRANSFERDATA(bufferdata), 0, nop*NPY_SIZEOF_INTP); + } + + /* Fill in the AXISDATA arrays and set the ITERSIZE field */ + if (!npyiter_fill_axisdata(iter, flags, op_itflags, op_dataptr, + op_flags, op_axes, itershape, + output_scalars)) { + NpyIter_Deallocate(iter); + return NULL; + } + + NPY_IT_TIME_POINT(c_fill_axisdata); + + if (itflags&NPY_ITFLAG_BUFFER) { + /* + * If buffering is enabled and no buffersize was given, use a default + * chosen to be big enough to get some amortization benefits, but + * small enough to be cache-friendly. + */ + if (buffersize <= 0) { + buffersize = NPY_BUFSIZE; + } + /* No point in a buffer bigger than the iteration size */ + if (buffersize > NIT_ITERSIZE(iter)) { + buffersize = NIT_ITERSIZE(iter); + } + NBF_BUFFERSIZE(bufferdata) = buffersize; + } + + /* + * If an index was requested, compute the strides for it. + * Note that we must do this before changing the order of the + * axes + */ + npyiter_compute_index_strides(iter, flags); + + NPY_IT_TIME_POINT(c_compute_index_strides); + + /* Initialize the perm to the identity */ + perm = NIT_PERM(iter); + for(idim = 0; idim < ndim; ++idim) { + perm[idim] = (npy_int8)idim; + } + + /* + * If an iteration order is being forced, apply it. + */ + npyiter_apply_forced_iteration_order(iter, order); + itflags = NIT_ITFLAGS(iter); + + NPY_IT_TIME_POINT(c_apply_forced_iteration_order); + + /* Set some flags for allocated outputs */ + for (iop = 0; iop < nop; ++iop) { + if (op[iop] == NULL) { + /* Flag this so later we can avoid flipping axes */ + any_allocate = 1; + /* If a subtype may be used, indicate so */ + if (!(op_flags[iop]&NPY_ITER_NO_SUBTYPE)) { + need_subtype = 1; + } + /* + * If the data type wasn't provided, will need to + * calculate it. + */ + if (op_dtype[iop] == NULL) { + any_missing_dtypes = 1; + } + } + } + + /* + * If the ordering was not forced, reorder the axes + * and flip negative strides to find the best one. + */ + if (!(itflags&NPY_ITFLAG_FORCEDORDER)) { + if (ndim > 1) { + npyiter_find_best_axis_ordering(iter); + } + /* + * If there's an output being allocated, we must not negate + * any strides. + */ + if (!any_allocate && !(flags&NPY_ITER_DONT_NEGATE_STRIDES)) { + npyiter_flip_negative_strides(iter); + } + itflags = NIT_ITFLAGS(iter); + } + + NPY_IT_TIME_POINT(c_find_best_axis_ordering); + + if (need_subtype) { + npyiter_get_priority_subtype(nop, op, op_itflags, + &subtype_priority, &subtype); + } + + NPY_IT_TIME_POINT(c_get_priority_subtype); + + /* + * If an automatically allocated output didn't have a specified + * dtype, we need to figure it out now, before allocating the outputs. + */ + if (any_missing_dtypes || (flags&NPY_ITER_COMMON_DTYPE)) { + PyArray_Descr *dtype; + int only_inputs = !(flags&NPY_ITER_COMMON_DTYPE); + + op = NIT_OPERANDS(iter); + op_dtype = NIT_DTYPES(iter); + + dtype = npyiter_get_common_dtype(nop, op, + op_itflags, op_dtype, + op_request_dtypes, + only_inputs, + output_scalars); + if (dtype == NULL) { + NpyIter_Deallocate(iter); + return NULL; + } + if (flags&NPY_ITER_COMMON_DTYPE) { + NPY_IT_DBG_PRINT("Iterator: Replacing all data types\n"); + /* Replace all the data types */ + for (iop = 0; iop < nop; ++iop) { + if (op_dtype[iop] != dtype) { + Py_XDECREF(op_dtype[iop]); + Py_INCREF(dtype); + op_dtype[iop] = dtype; + } + } + } + else { + NPY_IT_DBG_PRINT("Iterator: Setting unset output data types\n"); + /* Replace the NULL data types */ + for (iop = 0; iop < nop; ++iop) { + if (op_dtype[iop] == NULL) { + Py_INCREF(dtype); + op_dtype[iop] = dtype; + } + } + } + Py_DECREF(dtype); + } + + NPY_IT_TIME_POINT(c_find_output_common_dtype); + + /* + * All of the data types have been settled, so it's time + * to check that data type conversions are following the + * casting rules. + */ + if (!npyiter_check_casting(nop, op, op_dtype, casting, op_itflags)) { + NpyIter_Deallocate(iter); + return NULL; + } + + NPY_IT_TIME_POINT(c_check_casting); + + /* + * At this point, the iteration order has been finalized. so + * any allocation of ops that were NULL, or any temporary + * copying due to casting/byte order/alignment can be + * done now using a memory layout matching the iterator. + */ + if (!npyiter_allocate_arrays(iter, flags, op_dtype, subtype, op_flags, + op_itflags, op_axes, output_scalars)) { + NpyIter_Deallocate(iter); + return NULL; + } + + NPY_IT_TIME_POINT(c_allocate_arrays); + + /* + * Finally, if a multi-index wasn't requested, + * it may be possible to coalesce some axes together. + */ + if (ndim > 1 && !(itflags&NPY_ITFLAG_HASMULTIINDEX)) { + npyiter_coalesce_axes(iter); + /* + * The operation may have changed the layout, so we have to + * get the internal pointers again. + */ + itflags = NIT_ITFLAGS(iter); + ndim = NIT_NDIM(iter); + op = NIT_OPERANDS(iter); + op_dtype = NIT_DTYPES(iter); + op_itflags = NIT_OPITFLAGS(iter); + op_dataptr = NIT_RESETDATAPTR(iter); + } + + NPY_IT_TIME_POINT(c_coalesce_axes); + + /* + * Now that the axes are finished, check whether we can apply + * the single iteration optimization to the iternext function. + */ + if (!(itflags&NPY_ITFLAG_BUFFER)) { + NpyIter_AxisData *axisdata = NIT_AXISDATA(iter); + if (itflags&NPY_ITFLAG_EXLOOP) { + if (NIT_ITERSIZE(iter) == NAD_SHAPE(axisdata)) { + NIT_ITFLAGS(iter) |= NPY_ITFLAG_ONEITERATION; + } + } + else if (NIT_ITERSIZE(iter) == 1) { + NIT_ITFLAGS(iter) |= NPY_ITFLAG_ONEITERATION; + } + } + + /* + * If REFS_OK was specified, check whether there are any + * reference arrays and flag it if so. + */ + if (flags&NPY_ITER_REFS_OK) { + for (iop = 0; iop < nop; ++iop) { + PyArray_Descr *rdt = op_dtype[iop]; + if ((rdt->flags&(NPY_ITEM_REFCOUNT| + NPY_ITEM_IS_POINTER| + NPY_NEEDS_PYAPI)) != 0) { + /* Iteration needs API access */ + NIT_ITFLAGS(iter) |= NPY_ITFLAG_NEEDSAPI; + } + } + } + + /* If buffering is set without delayed allocation */ + if (itflags&NPY_ITFLAG_BUFFER) { + if (!npyiter_allocate_transfer_functions(iter)) { + NpyIter_Deallocate(iter); + return NULL; + } + if (itflags&NPY_ITFLAG_DELAYBUF) { + bufferdata = NIT_BUFFERDATA(iter); + /* Make the data pointers NULL */ + memset(NBF_PTRS(bufferdata), 0, nop*NPY_SIZEOF_INTP); + } + else { + /* Allocate the buffers */ + if (!npyiter_allocate_buffers(iter, NULL)) { + NpyIter_Deallocate(iter); + return NULL; + } + + /* Prepare the next buffers and set iterend/size */ + npyiter_copy_to_buffers(iter, NULL); + } + } + + NPY_IT_TIME_POINT(c_prepare_buffers); + +#if NPY_IT_CONSTRUCTION_TIMING + printf("\nIterator construction timing:\n"); + NPY_IT_PRINT_TIME_START(c_start); + NPY_IT_PRINT_TIME_VAR(c_check_op_axes); + NPY_IT_PRINT_TIME_VAR(c_check_global_flags); + NPY_IT_PRINT_TIME_VAR(c_calculate_ndim); + NPY_IT_PRINT_TIME_VAR(c_malloc); + NPY_IT_PRINT_TIME_VAR(c_prepare_operands); + NPY_IT_PRINT_TIME_VAR(c_fill_axisdata); + NPY_IT_PRINT_TIME_VAR(c_compute_index_strides); + NPY_IT_PRINT_TIME_VAR(c_apply_forced_iteration_order); + NPY_IT_PRINT_TIME_VAR(c_find_best_axis_ordering); + NPY_IT_PRINT_TIME_VAR(c_get_priority_subtype); + NPY_IT_PRINT_TIME_VAR(c_find_output_common_dtype); + NPY_IT_PRINT_TIME_VAR(c_check_casting); + NPY_IT_PRINT_TIME_VAR(c_allocate_arrays); + NPY_IT_PRINT_TIME_VAR(c_coalesce_axes); + NPY_IT_PRINT_TIME_VAR(c_prepare_buffers); + printf("\n"); +#endif + + return iter; +} + +/*NUMPY_API + * Allocate a new iterator for more than one array object, using + * standard NumPy broadcasting rules and the default buffer size. + */ +NPY_NO_EXPORT NpyIter * +NpyIter_MultiNew(int nop, PyArrayObject **op_in, npy_uint32 flags, + NPY_ORDER order, NPY_CASTING casting, + npy_uint32 *op_flags, + PyArray_Descr **op_request_dtypes) +{ + return NpyIter_AdvancedNew(nop, op_in, flags, order, casting, + op_flags, op_request_dtypes, + 0, NULL, NULL, 0); +} + +/*NUMPY_API + * Allocate a new iterator for one array object. + */ +NPY_NO_EXPORT NpyIter * +NpyIter_New(PyArrayObject *op, npy_uint32 flags, + NPY_ORDER order, NPY_CASTING casting, + PyArray_Descr* dtype) +{ + /* Split the flags into separate global and op flags */ + npy_uint32 op_flags = flags&NPY_ITER_PER_OP_FLAGS; + flags &= NPY_ITER_GLOBAL_FLAGS; + + return NpyIter_AdvancedNew(1, &op, flags, order, casting, + &op_flags, &dtype, + 0, NULL, NULL, 0); +} + +/*NUMPY_API + * Makes a copy of the iterator + */ +NPY_NO_EXPORT NpyIter * +NpyIter_Copy(NpyIter *iter) +{ + npy_uint32 itflags = NIT_ITFLAGS(iter); + int ndim = NIT_NDIM(iter); + int iop, nop = NIT_NOP(iter); + int out_of_memory = 0; + + npy_intp size; + NpyIter *newiter; + PyArrayObject **objects; + PyArray_Descr **dtypes; + + /* Allocate memory for the new iterator */ + size = NIT_SIZEOF_ITERATOR(itflags, ndim, nop); + newiter = (NpyIter*)PyArray_malloc(size); + + /* Copy the raw values to the new iterator */ + memcpy(newiter, iter, size); + + /* Take ownership of references to the operands and dtypes */ + objects = NIT_OPERANDS(newiter); + dtypes = NIT_DTYPES(newiter); + for (iop = 0; iop < nop; ++iop) { + Py_INCREF(objects[iop]); + Py_INCREF(dtypes[iop]); + } + + /* Allocate buffers and make copies of the transfer data if necessary */ + if (itflags&NPY_ITFLAG_BUFFER) { + NpyIter_BufferData *bufferdata; + npy_intp buffersize, itemsize; + char **buffers; + NpyAuxData **readtransferdata, **writetransferdata; + + bufferdata = NIT_BUFFERDATA(newiter); + buffers = NBF_BUFFERS(bufferdata); + readtransferdata = NBF_READTRANSFERDATA(bufferdata); + writetransferdata = NBF_WRITETRANSFERDATA(bufferdata); + buffersize = NBF_BUFFERSIZE(bufferdata); + + for (iop = 0; iop < nop; ++iop) { + if (buffers[iop] != NULL) { + if (out_of_memory) { + buffers[iop] = NULL; + } + else { + itemsize = dtypes[iop]->elsize; + buffers[iop] = PyArray_malloc(itemsize*buffersize); + if (buffers[iop] == NULL) { + out_of_memory = 1; + } + } + } + + if (readtransferdata[iop] != NULL) { + if (out_of_memory) { + readtransferdata[iop] = NULL; + } + else { + readtransferdata[iop] = + NPY_AUXDATA_CLONE(readtransferdata[iop]); + if (readtransferdata[iop] == NULL) { + out_of_memory = 1; + } + } + } + + if (writetransferdata[iop] != NULL) { + if (out_of_memory) { + writetransferdata[iop] = NULL; + } + else { + writetransferdata[iop] = + NPY_AUXDATA_CLONE(writetransferdata[iop]); + if (writetransferdata[iop] == NULL) { + out_of_memory = 1; + } + } + } + } + + /* Initialize the buffers to the current iterindex */ + if (!out_of_memory && NBF_SIZE(bufferdata) > 0) { + npyiter_goto_iterindex(newiter, NIT_ITERINDEX(newiter)); + + /* Prepare the next buffers and set iterend/size */ + npyiter_copy_to_buffers(newiter, NULL); + } + } + + if (out_of_memory) { + NpyIter_Deallocate(newiter); + PyErr_NoMemory(); + return NULL; + } + + return newiter; +} + +/*NUMPY_API + * Deallocate an iterator + */ +NPY_NO_EXPORT int +NpyIter_Deallocate(NpyIter *iter) +{ + npy_uint32 itflags = NIT_ITFLAGS(iter); + /*int ndim = NIT_NDIM(iter);*/ + int iop, nop = NIT_NOP(iter); + + PyArray_Descr **dtype = NIT_DTYPES(iter); + PyArrayObject **object = NIT_OPERANDS(iter); + + /* Deallocate any buffers and buffering data */ + if (itflags&NPY_ITFLAG_BUFFER) { + NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter); + char **buffers; + NpyAuxData **transferdata; + + /* buffers */ + buffers = NBF_BUFFERS(bufferdata); + for(iop = 0; iop < nop; ++iop, ++buffers) { + if (*buffers) { + PyArray_free(*buffers); + } + } + /* read bufferdata */ + transferdata = NBF_READTRANSFERDATA(bufferdata); + for(iop = 0; iop < nop; ++iop, ++transferdata) { + if (*transferdata) { + NPY_AUXDATA_FREE(*transferdata); + } + } + /* write bufferdata */ + transferdata = NBF_WRITETRANSFERDATA(bufferdata); + for(iop = 0; iop < nop; ++iop, ++transferdata) { + if (*transferdata) { + NPY_AUXDATA_FREE(*transferdata); + } + } + } + + /* Deallocate all the dtypes and objects that were iterated */ + for(iop = 0; iop < nop; ++iop, ++dtype, ++object) { + Py_XDECREF(*dtype); + Py_XDECREF(*object); + } + + /* Deallocate the iterator memory */ + PyArray_free(iter); + + return NPY_SUCCEED; +} + +/* Checks 'flags' for (C|F)_ORDER_INDEX, MULTI_INDEX, and EXTERNAL_LOOP, + * setting the appropriate internal flags in 'itflags'. + * + * Returns 1 on success, 0 on error. + */ +static int +npyiter_check_global_flags(npy_uint32 flags, npy_uint32* itflags) +{ + if ((flags&NPY_ITER_PER_OP_FLAGS) != 0) { + PyErr_SetString(PyExc_ValueError, + "A per-operand flag was passed as a global flag " + "to the iterator constructor"); + return 0; + } + + /* Check for an index */ + if (flags&(NPY_ITER_C_INDEX | NPY_ITER_F_INDEX)) { + if ((flags&(NPY_ITER_C_INDEX | NPY_ITER_F_INDEX)) == + (NPY_ITER_C_INDEX | NPY_ITER_F_INDEX)) { + PyErr_SetString(PyExc_ValueError, + "Iterator flags C_INDEX and " + "F_INDEX cannot both be specified"); + return 0; + } + (*itflags) |= NPY_ITFLAG_HASINDEX; + } + /* Check if a multi-index was requested */ + if (flags&NPY_ITER_MULTI_INDEX) { + /* + * This flag primarily disables dimension manipulations that + * would produce an incorrect multi-index. + */ + (*itflags) |= NPY_ITFLAG_HASMULTIINDEX; + } + /* Check if the caller wants to handle inner iteration */ + if (flags&NPY_ITER_EXTERNAL_LOOP) { + if ((*itflags)&(NPY_ITFLAG_HASINDEX|NPY_ITFLAG_HASMULTIINDEX)) { + PyErr_SetString(PyExc_ValueError, + "Iterator flag EXTERNAL_LOOP cannot be used " + "if an index or multi-index is being tracked"); + return 0; + } + (*itflags) |= NPY_ITFLAG_EXLOOP; + } + /* Ranged */ + if (flags&NPY_ITER_RANGED) { + (*itflags) |= NPY_ITFLAG_RANGE; + if ((flags&NPY_ITER_EXTERNAL_LOOP) && + !(flags&NPY_ITER_BUFFERED)) { + PyErr_SetString(PyExc_ValueError, + "Iterator flag RANGED cannot be used with " + "the flag EXTERNAL_LOOP unless " + "BUFFERED is also enabled"); + return 0; + } + } + /* Buffering */ + if (flags&NPY_ITER_BUFFERED) { + (*itflags) |= NPY_ITFLAG_BUFFER; + if (flags&NPY_ITER_GROWINNER) { + (*itflags) |= NPY_ITFLAG_GROWINNER; + } + if (flags&NPY_ITER_DELAY_BUFALLOC) { + (*itflags) |= NPY_ITFLAG_DELAYBUF; + } + } + + return 1; +} + +static int +npyiter_check_op_axes(int nop, int oa_ndim, int **op_axes, + npy_intp *itershape) +{ + char axes_dupcheck[NPY_MAXDIMS]; + int iop, idim; + + if (oa_ndim == 0 && (op_axes != NULL || itershape != NULL)) { + PyErr_Format(PyExc_ValueError, + "If 'op_axes' or 'itershape' is not NULL in the" + "iterator constructor, 'oa_ndim' must be greater than zero"); + return 0; + } + else if (oa_ndim > 0) { + if (oa_ndim > NPY_MAXDIMS) { + PyErr_Format(PyExc_ValueError, + "Cannot construct an iterator with more than %d dimensions " + "(%d were requested for op_axes)", + (int)NPY_MAXDIMS, oa_ndim); + return 0; + } + else if (op_axes == NULL) { + PyErr_Format(PyExc_ValueError, + "If 'oa_ndim' is greater than zero in the iterator " + "constructor, then op_axes cannot be NULL"); + return 0; + } + + /* Check that there are no duplicates in op_axes */ + for (iop = 0; iop < nop; ++iop) { + int *axes = op_axes[iop]; + if (axes != NULL) { + memset(axes_dupcheck, 0, NPY_MAXDIMS); + for (idim = 0; idim < oa_ndim; ++idim) { + npy_intp i = axes[idim]; + if (i >= 0) { + if (i >= NPY_MAXDIMS) { + PyErr_Format(PyExc_ValueError, + "The 'op_axes' provided to the iterator " + "constructor for operand %d " + "contained invalid " + "values %d", (int)iop, (int)i); + return 0; + } else if(axes_dupcheck[i] == 1) { + PyErr_Format(PyExc_ValueError, + "The 'op_axes' provided to the iterator " + "constructor for operand %d " + "contained duplicate " + "value %d", (int)iop, (int)i); + return 0; + } + else { + axes_dupcheck[i] = 1; + } + } + } + } + } + } + + return 1; +} + +static int +npyiter_calculate_ndim(int nop, PyArrayObject **op_in, + int oa_ndim) +{ + /* If 'op_axes' is being used, force 'ndim' */ + if (oa_ndim > 0 ) { + return oa_ndim; + } + /* Otherwise it's the maximum 'ndim' from the operands */ + else { + int ndim = 0, iop; + + for (iop = 0; iop < nop; ++iop) { + if (op_in[iop] != NULL) { + int ondim = PyArray_NDIM(op_in[iop]); + if (ondim > ndim) { + ndim = ondim; + } + } + + } + + return ndim; + } +} + +/* + * Checks the per-operand input flags, and fills in op_itflags. + * + * Returns 1 on success, 0 on failure. + */ +static int +npyiter_check_per_op_flags(npy_uint32 op_flags, char *op_itflags) +{ + if ((op_flags&NPY_ITER_GLOBAL_FLAGS) != 0) { + PyErr_SetString(PyExc_ValueError, + "A global iterator flag was passed as a per-operand flag " + "to the iterator constructor"); + return 0; + } + + /* Check the read/write flags */ + if (op_flags&NPY_ITER_READONLY) { + /* The read/write flags are mutually exclusive */ + if (op_flags&(NPY_ITER_READWRITE|NPY_ITER_WRITEONLY)) { + PyErr_SetString(PyExc_ValueError, + "Only one of the iterator flags READWRITE, " + "READONLY, and WRITEONLY may be " + "specified for an operand"); + return 0; + } + + *op_itflags = NPY_OP_ITFLAG_READ; + } + else if (op_flags&NPY_ITER_READWRITE) { + /* The read/write flags are mutually exclusive */ + if (op_flags&NPY_ITER_WRITEONLY) { + PyErr_SetString(PyExc_ValueError, + "Only one of the iterator flags READWRITE, " + "READONLY, and WRITEONLY may be " + "specified for an operand"); + return 0; + } + + *op_itflags = NPY_OP_ITFLAG_READ|NPY_OP_ITFLAG_WRITE; + } + else if(op_flags&NPY_ITER_WRITEONLY) { + *op_itflags = NPY_OP_ITFLAG_WRITE; + } + else { + PyErr_SetString(PyExc_ValueError, + "None of the iterator flags READWRITE, " + "READONLY, or WRITEONLY were " + "specified for an operand"); + return 0; + } + + /* Check the flags for temporary copies */ + if (((*op_itflags)&NPY_OP_ITFLAG_WRITE) && + (op_flags&(NPY_ITER_COPY| + NPY_ITER_UPDATEIFCOPY)) == NPY_ITER_COPY) { + PyErr_SetString(PyExc_ValueError, + "If an iterator operand is writeable, must use " + "the flag UPDATEIFCOPY instead of " + "COPY"); + return 0; + } + + return 1; +} + +/* + * Prepares a a constructor operand. Assumes a reference to 'op' + * is owned, and that 'op' may be replaced. Fills in 'op_dtype' + * and 'ndim'. + * + * Returns 1 on success, 0 on failure. + */ +static int +npyiter_prepare_one_operand(PyArrayObject **op, + char **op_dataptr, + PyArray_Descr *op_request_dtype, + PyArray_Descr **op_dtype, + npy_uint32 flags, + npy_uint32 op_flags, char *op_itflags) +{ + /* NULL operands must be automatically allocated outputs */ + if (*op == NULL) { + /* ALLOCATE should be enabled */ + if (!(op_flags&NPY_ITER_ALLOCATE)) { + PyErr_SetString(PyExc_ValueError, + "Iterator operand was NULL, but automatic allocation as an " + "output wasn't requested"); + return 0; + } + /* Writing should be enabled */ + if (!((*op_itflags)&NPY_OP_ITFLAG_WRITE)) { + PyErr_SetString(PyExc_ValueError, + "Automatic allocation was requested for an iterator " + "operand, but it wasn't flagged for writing"); + return 0; + } + /* + * Reading should be disabled if buffering is enabled without + * also enabling NPY_ITER_DELAY_BUFALLOC. In all other cases, + * the caller may initialize the allocated operand to a value + * before beginning iteration. + */ + if (((flags&(NPY_ITER_BUFFERED| + NPY_ITER_DELAY_BUFALLOC)) == NPY_ITER_BUFFERED) && + ((*op_itflags)&NPY_OP_ITFLAG_READ)) { + PyErr_SetString(PyExc_ValueError, + "Automatic allocation was requested for an iterator " + "operand, and it was flagged as readable, but buffering " + " without delayed allocation was enabled"); + return 0; + } + *op_dataptr = NULL; + /* If a requested dtype was provided, use it, otherwise NULL */ + Py_XINCREF(op_request_dtype); + *op_dtype = op_request_dtype; + + return 1; + } + + if (PyArray_Check(*op)) { + if (((*op_itflags)&NPY_OP_ITFLAG_WRITE) && + (!PyArray_CHKFLAGS(*op, NPY_ARRAY_WRITEABLE))) { + PyErr_SetString(PyExc_ValueError, + "Iterator operand was a non-writeable array, but was " + "flagged as writeable"); + return 0; + } + if (!(flags&NPY_ITER_ZEROSIZE_OK) && PyArray_SIZE(*op) == 0) { + PyErr_SetString(PyExc_ValueError, + "Iteration of zero-sized operands is not enabled"); + return 0; + } + *op_dataptr = PyArray_BYTES(*op); + /* PyArray_DESCR does not give us a reference */ + *op_dtype = PyArray_DESCR(*op); + if (*op_dtype == NULL) { + PyErr_SetString(PyExc_ValueError, + "Iterator input array object has no dtype descr"); + return 0; + } + Py_INCREF(*op_dtype); + /* + * If references weren't specifically allowed, make sure there + * are no references in the inputs or requested dtypes. + */ + if (!(flags&NPY_ITER_REFS_OK)) { + PyArray_Descr *dt = PyArray_DESCR(*op); + if (((dt->flags&(NPY_ITEM_REFCOUNT| + NPY_ITEM_IS_POINTER)) != 0) || + (dt != *op_dtype && + (((*op_dtype)->flags&(NPY_ITEM_REFCOUNT| + NPY_ITEM_IS_POINTER))) != 0)) { + PyErr_SetString(PyExc_TypeError, + "Iterator operand or requested dtype holds " + "references, but the REFS_OK flag was not enabled"); + return 0; + } + } + /* + * Checking whether casts are valid is done later, once the + * final data types have been selected. For now, just store the + * requested type. + */ + if (op_request_dtype != NULL) { + /* We just have a borrowed reference to op_request_dtype */ + Py_INCREF(op_request_dtype); + /* If the requested dtype is flexible, adapt it */ + PyArray_AdaptFlexibleDType((PyObject *)(*op), PyArray_DESCR(*op), + &op_request_dtype); + if (op_request_dtype == NULL) { + return 0; + } + + /* Store the requested dtype */ + Py_DECREF(*op_dtype); + *op_dtype = op_request_dtype; + } + + /* Check if the operand is in the byte order requested */ + if (op_flags&NPY_ITER_NBO) { + /* Check byte order */ + if (!PyArray_ISNBO((*op_dtype)->byteorder)) { + PyArray_Descr *nbo_dtype; + + /* Replace with a new descr which is in native byte order */ + nbo_dtype = PyArray_DescrNewByteorder(*op_dtype, NPY_NATIVE); + Py_DECREF(*op_dtype); + *op_dtype = nbo_dtype; + + NPY_IT_DBG_PRINT("Iterator: Setting NPY_OP_ITFLAG_CAST " + "because of NPY_ITER_NBO\n"); + /* Indicate that byte order or alignment needs fixing */ + *op_itflags |= NPY_OP_ITFLAG_CAST; + } + } + /* Check if the operand is aligned */ + if (op_flags&NPY_ITER_ALIGNED) { + /* Check alignment */ + if (!PyArray_ISALIGNED(*op)) { + NPY_IT_DBG_PRINT("Iterator: Setting NPY_OP_ITFLAG_CAST " + "because of NPY_ITER_ALIGNED\n"); + *op_itflags |= NPY_OP_ITFLAG_CAST; + } + } + /* + * The check for NPY_ITER_CONTIG can only be done later, + * once the final iteration order is settled. + */ + } + else { + PyErr_SetString(PyExc_ValueError, + "Iterator inputs must be ndarrays"); + return 0; + } + + return 1; +} + +/* + * Process all the operands, copying new references so further processing + * can replace the arrays if copying is necessary. + */ +static int +npyiter_prepare_operands(int nop, PyArrayObject **op_in, + PyArrayObject **op, + char **op_dataptr, + PyArray_Descr **op_request_dtypes, + PyArray_Descr **op_dtype, + npy_uint32 flags, + npy_uint32 *op_flags, char *op_itflags) +{ + int iop, i; + + for (iop = 0; iop < nop; ++iop) { + op[iop] = op_in[iop]; + Py_XINCREF(op[iop]); + op_dtype[iop] = NULL; + + /* Check the readonly/writeonly flags, and fill in op_itflags */ + if (!npyiter_check_per_op_flags(op_flags[iop], &op_itflags[iop])) { + for (i = 0; i <= iop; ++i) { + Py_XDECREF(op[i]); + Py_XDECREF(op_dtype[i]); + } + return 0; + } + + /* + * Prepare the operand. This produces an op_dtype[iop] reference + * on success. + */ + if (!npyiter_prepare_one_operand(&op[iop], + &op_dataptr[iop], + op_request_dtypes ? op_request_dtypes[iop] : NULL, + &op_dtype[iop], + flags, + op_flags[iop], &op_itflags[iop])) { + for (i = 0; i <= iop; ++i) { + Py_XDECREF(op[i]); + Py_XDECREF(op_dtype[i]); + } + return 0; + } + } + + + /* If all the operands were NULL, it's an error */ + if (op[0] == NULL) { + int all_null = 1; + for (iop = 1; iop < nop; ++iop) { + if (op[iop] != NULL) { + all_null = 0; + break; + } + } + if (all_null) { + for (i = 0; i < nop; ++i) { + Py_XDECREF(op[i]); + Py_XDECREF(op_dtype[i]); + } + PyErr_SetString(PyExc_ValueError, + "At least one iterator input must be non-NULL"); + return 0; + } + } + + return 1; +} + +static const char * +npyiter_casting_to_string(NPY_CASTING casting) +{ + switch (casting) { + case NPY_NO_CASTING: + return "'no'"; + case NPY_EQUIV_CASTING: + return "'equiv'"; + case NPY_SAFE_CASTING: + return "'safe'"; + case NPY_SAME_KIND_CASTING: + return "'same_kind'"; + case NPY_UNSAFE_CASTING: + return "'unsafe'"; + default: + return "<unknown>"; + } +} + +static PyObject * +npyiter_shape_string(npy_intp n, npy_intp *vals, char *ending) +{ + npy_intp i; + PyObject *ret, *tmp; + + /* + * Negative dimension indicates "newaxis", which can + * be discarded for printing if its a leading dimension. + * Find the first non-"newaxis" dimension. + */ + i = 0; + while (i < n && vals[i] < 0) { + ++i; + } + + if (i == n) { + return PyUString_FromFormat("()%s", ending); + } + else { + ret = PyUString_FromFormat("(%" NPY_INTP_FMT, vals[i++]); + if (ret == NULL) { + return NULL; + } + } + + for (; i < n; ++i) { + if (vals[i] < 0) { + tmp = PyUString_FromString(",newaxis"); + } + else { + tmp = PyUString_FromFormat(",%" NPY_INTP_FMT, vals[i]); + } + if (tmp == NULL) { + Py_DECREF(ret); + return NULL; + } + + PyUString_ConcatAndDel(&ret, tmp); + if (ret == NULL) { + return NULL; + } + } + + tmp = PyUString_FromFormat(")%s", ending); + PyUString_ConcatAndDel(&ret, tmp); + return ret; +} + +static int +npyiter_check_casting(int nop, PyArrayObject **op, + PyArray_Descr **op_dtype, + NPY_CASTING casting, + char *op_itflags) +{ + int iop; + + for(iop = 0; iop < nop; ++iop) { + NPY_IT_DBG_PRINT1("Iterator: Checking casting for operand %d\n", + (int)iop); +#if NPY_IT_DBG_TRACING + printf("op: "); + if (op[iop] != NULL) { + PyObject_Print((PyObject *)PyArray_DESCR(op[iop]), stdout, 0); + } + else { + printf("<null>"); + } + printf(", iter: "); + PyObject_Print((PyObject *)op_dtype[iop], stdout, 0); + printf("\n"); +#endif + /* If the types aren't equivalent, a cast is necessary */ + if (op[iop] != NULL && !PyArray_EquivTypes(PyArray_DESCR(op[iop]), + op_dtype[iop])) { + /* Check read (op -> temp) casting */ + if ((op_itflags[iop]&NPY_OP_ITFLAG_READ) && + !PyArray_CanCastArrayTo(op[iop], + op_dtype[iop], + casting)) { + PyObject *errmsg; + errmsg = PyUString_FromFormat( + "Iterator operand %d dtype could not be cast from ", + (int)iop); + PyUString_ConcatAndDel(&errmsg, + PyObject_Repr((PyObject *)PyArray_DESCR(op[iop]))); + PyUString_ConcatAndDel(&errmsg, + PyUString_FromString(" to ")); + PyUString_ConcatAndDel(&errmsg, + PyObject_Repr((PyObject *)op_dtype[iop])); + PyUString_ConcatAndDel(&errmsg, + PyUString_FromFormat(" according to the rule %s", + npyiter_casting_to_string(casting))); + PyErr_SetObject(PyExc_TypeError, errmsg); + return 0; + } + /* Check write (temp -> op) casting */ + if ((op_itflags[iop]&NPY_OP_ITFLAG_WRITE) && + !PyArray_CanCastTypeTo(op_dtype[iop], + PyArray_DESCR(op[iop]), + casting)) { + PyObject *errmsg; + errmsg = PyUString_FromString( + "Iterator requested dtype could not be cast from "); + PyUString_ConcatAndDel(&errmsg, + PyObject_Repr((PyObject *)op_dtype[iop])); + PyUString_ConcatAndDel(&errmsg, + PyUString_FromString(" to ")); + PyUString_ConcatAndDel(&errmsg, + PyObject_Repr((PyObject *)PyArray_DESCR(op[iop]))); + PyUString_ConcatAndDel(&errmsg, + PyUString_FromFormat(", the operand %d dtype, " + "according to the rule %s", + (int)iop, + npyiter_casting_to_string(casting))); + PyErr_SetObject(PyExc_TypeError, errmsg); + return 0; + } + + NPY_IT_DBG_PRINT("Iterator: Setting NPY_OP_ITFLAG_CAST " + "because the types aren't equivalent\n"); + /* Indicate that this operand needs casting */ + op_itflags[iop] |= NPY_OP_ITFLAG_CAST; + } + } + + return 1; +} + +/* + * Fills in the AXISDATA for the 'nop' operands, broadcasting + * the dimensionas as necessary. Also fills + * in the ITERSIZE data member. + * + * If op_axes is not NULL, it should point to an array of ndim-sized + * arrays, one for each op. + * + * Returns 1 on success, 0 on failure. + */ +static int +npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, char *op_itflags, + char **op_dataptr, + npy_uint32 *op_flags, int **op_axes, + npy_intp *itershape, + int output_scalars) +{ + npy_uint32 itflags = NIT_ITFLAGS(iter); + int idim, ndim = NIT_NDIM(iter); + int iop, nop = NIT_NOP(iter); + + int ondim; + NpyIter_AxisData *axisdata; + npy_intp sizeof_axisdata; + PyArrayObject **op = NIT_OPERANDS(iter), *op_cur; + npy_intp broadcast_shape[NPY_MAXDIMS]; + + /* First broadcast the shapes together */ + if (itershape == NULL) { + for (idim = 0; idim < ndim; ++idim) { + broadcast_shape[idim] = 1; + } + } + else { + for (idim = 0; idim < ndim; ++idim) { + broadcast_shape[idim] = itershape[idim]; + /* Negative shape entries are deduced from the operands */ + if (broadcast_shape[idim] < 0) { + broadcast_shape[idim] = 1; + } + } + } + for (iop = 0; iop < nop; ++iop) { + op_cur = op[iop]; + if (op_cur != NULL) { + npy_intp *shape = PyArray_DIMS(op_cur); + ondim = PyArray_NDIM(op_cur); + + if (op_axes == NULL || op_axes[iop] == NULL) { + /* + * Possible if op_axes are being used, but + * op_axes[iop] is NULL + */ + if (ondim > ndim) { + PyErr_SetString(PyExc_ValueError, + "input operand has more dimensions than allowed " + "by the axis remapping"); + return 0; + } + for (idim = 0; idim < ondim; ++idim) { + npy_intp bshape = broadcast_shape[idim+ndim-ondim], + op_shape = shape[idim]; + if (bshape == 1) { + broadcast_shape[idim+ndim-ondim] = op_shape; + } + else if (bshape != op_shape && op_shape != 1) { + goto broadcast_error; + } + } + } + else { + int *axes = op_axes[iop]; + for (idim = 0; idim < ndim; ++idim) { + int i = axes[idim]; + if (i >= 0) { + if (i < ondim) { + npy_intp bshape = broadcast_shape[idim], + op_shape = shape[i]; + if (bshape == 1) { + broadcast_shape[idim] = op_shape; + } + else if (bshape != op_shape && op_shape != 1) { + goto broadcast_error; + } + } + else { + PyErr_Format(PyExc_ValueError, + "Iterator input op_axes[%d][%d] (==%d) " + "is not a valid axis of op[%d], which " + "has %d dimensions ", + (int)iop, (int)(ndim-idim-1), (int)i, + (int)iop, (int)ondim); + return 0; + } + } + } + } + } + } + /* + * If a shape was provided with a 1 entry, make sure that entry didn't + * get expanded by broadcasting. + */ + if (itershape != NULL) { + for (idim = 0; idim < ndim; ++idim) { + if (itershape[idim] == 1 && broadcast_shape[idim] != 1) { + goto broadcast_error; + } + } + } + + axisdata = NIT_AXISDATA(iter); + sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop); + + /* Now process the operands, filling in the axisdata */ + for (idim = 0; idim < ndim; ++idim) { + npy_intp bshape = broadcast_shape[ndim-idim-1]; + npy_intp *strides = NAD_STRIDES(axisdata); + + NAD_SHAPE(axisdata) = bshape; + NAD_INDEX(axisdata) = 0; + memcpy(NAD_PTRS(axisdata), op_dataptr, NPY_SIZEOF_INTP*nop); + + for (iop = 0; iop < nop; ++iop) { + op_cur = op[iop]; + + if (op_axes == NULL || op_axes[iop] == NULL) { + if (op_cur == NULL) { + strides[iop] = 0; + } + else { + ondim = PyArray_NDIM(op_cur); + if (bshape == 1) { + strides[iop] = 0; + if (idim >= ondim && !output_scalars && + (op_flags[iop]&NPY_ITER_NO_BROADCAST)) { + goto operand_different_than_broadcast; + } + } + else if (idim >= ondim || + PyArray_DIM(op_cur, ondim-idim-1) == 1) { + strides[iop] = 0; + if (op_flags[iop]&NPY_ITER_NO_BROADCAST) { + goto operand_different_than_broadcast; + } + /* If it's writeable, this means a reduction */ + if (op_itflags[iop]&NPY_OP_ITFLAG_WRITE) { + if (!(flags&NPY_ITER_REDUCE_OK)) { + PyErr_SetString(PyExc_ValueError, + "output operand requires a reduction, but " + "reduction is not enabled"); + return 0; + } + if (!(op_itflags[iop]&NPY_OP_ITFLAG_READ)) { + PyErr_SetString(PyExc_ValueError, + "output operand requires a reduction, but " + "is flagged as write-only, not " + "read-write"); + return 0; + } + NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE; + op_itflags[iop] |= NPY_OP_ITFLAG_REDUCE; + } + } + else { + strides[iop] = PyArray_STRIDE(op_cur, ondim-idim-1); + } + } + } + else { + int *axes = op_axes[iop]; + int i = axes[ndim-idim-1]; + if (i >= 0) { + if (bshape == 1 || op_cur == NULL) { + strides[iop] = 0; + } + else if (PyArray_DIM(op_cur, i) == 1) { + strides[iop] = 0; + if (op_flags[iop]&NPY_ITER_NO_BROADCAST) { + goto operand_different_than_broadcast; + } + /* If it's writeable, this means a reduction */ + if (op_itflags[iop]&NPY_OP_ITFLAG_WRITE) { + if (!(flags&NPY_ITER_REDUCE_OK)) { + PyErr_SetString(PyExc_ValueError, + "output operand requires a reduction, but " + "reduction is not enabled"); + return 0; + } + if (!(op_itflags[iop]&NPY_OP_ITFLAG_READ)) { + PyErr_SetString(PyExc_ValueError, + "output operand requires a reduction, but " + "is flagged as write-only, not " + "read-write"); + return 0; + } + NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE; + op_itflags[iop] |= NPY_OP_ITFLAG_REDUCE; + } + } + else { + strides[iop] = PyArray_STRIDE(op_cur, i); + } + } + else if (bshape == 1) { + strides[iop] = 0; + } + else { + strides[iop] = 0; + /* If it's writeable, this means a reduction */ + if (op_itflags[iop]&NPY_OP_ITFLAG_WRITE) { + if (!(flags&NPY_ITER_REDUCE_OK)) { + PyErr_SetString(PyExc_ValueError, + "output operand requires a reduction, but " + "reduction is not enabled"); + return 0; + } + if (!(op_itflags[iop]&NPY_OP_ITFLAG_READ)) { + PyErr_SetString(PyExc_ValueError, + "output operand requires a reduction, but " + "is flagged as write-only, not " + "read-write"); + return 0; + } + NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE; + op_itflags[iop] |= NPY_OP_ITFLAG_REDUCE; + } + } + } + } + + NIT_ADVANCE_AXISDATA(axisdata, 1); + } + + /* Now fill in the ITERSIZE member */ + NIT_ITERSIZE(iter) = broadcast_shape[0]; + for (idim = 1; idim < ndim; ++idim) { + NIT_ITERSIZE(iter) *= broadcast_shape[idim]; + } + /* The range defaults to everything */ + NIT_ITERSTART(iter) = 0; + NIT_ITEREND(iter) = NIT_ITERSIZE(iter); + + return 1; + +broadcast_error: { + PyObject *errmsg, *tmp; + npy_intp remdims[NPY_MAXDIMS]; + char *tmpstr; + + if (op_axes == NULL) { + errmsg = PyUString_FromString("operands could not be broadcast " + "together with shapes "); + if (errmsg == NULL) { + return 0; + } + for (iop = 0; iop < nop; ++iop) { + if (op[iop] != NULL) { + tmp = npyiter_shape_string(PyArray_NDIM(op[iop]), + PyArray_DIMS(op[iop]), + " "); + if (tmp == NULL) { + Py_DECREF(errmsg); + return 0; + } + PyUString_ConcatAndDel(&errmsg, tmp); + if (errmsg == NULL) { + return 0; + } + } + } + if (itershape != NULL) { + tmp = PyUString_FromString("and requested shape "); + if (tmp == NULL) { + Py_DECREF(errmsg); + return 0; + } + PyUString_ConcatAndDel(&errmsg, tmp); + if (errmsg == NULL) { + return 0; + } + + tmp = npyiter_shape_string(ndim, itershape, ""); + if (tmp == NULL) { + Py_DECREF(errmsg); + return 0; + } + PyUString_ConcatAndDel(&errmsg, tmp); + if (errmsg == NULL) { + return 0; + } + + } + PyErr_SetObject(PyExc_ValueError, errmsg); + } + else { + errmsg = PyUString_FromString("operands could not be broadcast " + "together with remapped shapes " + "[original->remapped]: "); + for (iop = 0; iop < nop; ++iop) { + if (op[iop] != NULL) { + int *axes = op_axes[iop]; + + tmpstr = (axes == NULL) ? " " : "->"; + tmp = npyiter_shape_string(PyArray_NDIM(op[iop]), + PyArray_DIMS(op[iop]), + tmpstr); + if (tmp == NULL) { + return 0; + } + PyUString_ConcatAndDel(&errmsg, tmp); + if (errmsg == NULL) { + return 0; + } + + if (axes != NULL) { + for (idim = 0; idim < ndim; ++idim) { + npy_intp i = axes[idim]; + + if (i >= 0 && i < PyArray_NDIM(op[iop])) { + remdims[idim] = PyArray_DIM(op[iop], i); + } + else { + remdims[idim] = -1; + } + } + tmp = npyiter_shape_string(ndim, remdims, " "); + if (tmp == NULL) { + return 0; + } + PyUString_ConcatAndDel(&errmsg, tmp); + if (errmsg == NULL) { + return 0; + } + } + } + } + if (itershape != NULL) { + tmp = PyUString_FromString("and requested shape "); + if (tmp == NULL) { + Py_DECREF(errmsg); + return 0; + } + PyUString_ConcatAndDel(&errmsg, tmp); + if (errmsg == NULL) { + return 0; + } + + tmp = npyiter_shape_string(ndim, itershape, ""); + if (tmp == NULL) { + Py_DECREF(errmsg); + return 0; + } + PyUString_ConcatAndDel(&errmsg, tmp); + if (errmsg == NULL) { + return 0; + } + + } + PyErr_SetObject(PyExc_ValueError, errmsg); + } + + return 0; + } + +operand_different_than_broadcast: { + npy_intp remdims[NPY_MAXDIMS]; + PyObject *errmsg, *tmp; + + /* Start of error message */ + if (op_flags[iop]&NPY_ITER_READONLY) { + errmsg = PyUString_FromString("non-broadcastable operand " + "with shape "); + } + else { + errmsg = PyUString_FromString("non-broadcastable output " + "operand with shape "); + } + if (errmsg == NULL) { + return 0; + } + + /* Operand shape */ + tmp = npyiter_shape_string(PyArray_NDIM(op[iop]), + PyArray_DIMS(op[iop]), ""); + if (tmp == NULL) { + return 0; + } + PyUString_ConcatAndDel(&errmsg, tmp); + if (errmsg == NULL) { + return 0; + } + /* Remapped operand shape */ + if (op_axes != NULL && op_axes[iop] != NULL) { + int *axes = op_axes[iop]; + + for (idim = 0; idim < ndim; ++idim) { + npy_intp i = axes[ndim-idim-1]; + + if (i >= 0 && i < PyArray_NDIM(op[iop])) { + remdims[idim] = PyArray_DIM(op[iop], i); + } + else { + remdims[idim] = -1; + } + } + + tmp = PyUString_FromString(" [remapped to "); + if (tmp == NULL) { + return 0; + } + PyUString_ConcatAndDel(&errmsg, tmp); + if (errmsg == NULL) { + return 0; + } + + tmp = npyiter_shape_string(ndim, remdims, "]"); + if (tmp == NULL) { + return 0; + } + PyUString_ConcatAndDel(&errmsg, tmp); + if (errmsg == NULL) { + return 0; + } + } + + tmp = PyUString_FromString(" doesn't match the broadcast shape "); + if (tmp == NULL) { + return 0; + } + PyUString_ConcatAndDel(&errmsg, tmp); + if (errmsg == NULL) { + return 0; + } + + /* Broadcast shape */ + tmp = npyiter_shape_string(ndim, broadcast_shape, ""); + if (tmp == NULL) { + return 0; + } + PyUString_ConcatAndDel(&errmsg, tmp); + if (errmsg == NULL) { + return 0; + } + + PyErr_SetObject(PyExc_ValueError, errmsg); + + return 0; + } +} + +/* + * Replaces the AXISDATA for the iop'th operand, broadcasting + * the dimensions as necessary. Assumes the replacement array is + * exactly the same shape as the original array used when + * npy_fill_axisdata was called. + * + * If op_axes is not NULL, it should point to an ndim-sized + * array. + */ +static void +npyiter_replace_axisdata(NpyIter *iter, int iop, + PyArrayObject *op, + int op_ndim, char *op_dataptr, + int *op_axes) +{ + npy_uint32 itflags = NIT_ITFLAGS(iter); + int idim, ndim = NIT_NDIM(iter); + int nop = NIT_NOP(iter); + + NpyIter_AxisData *axisdata0, *axisdata; + npy_intp sizeof_axisdata; + npy_int8 *perm; + npy_intp baseoffset = 0; + + perm = NIT_PERM(iter); + axisdata0 = NIT_AXISDATA(iter); + sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop); + + /* + * Replace just the strides which were non-zero, and compute + * the base data address. + */ + axisdata = axisdata0; + + if (op_axes != NULL) { + for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { + npy_int8 p; + int i; + npy_intp shape; + + /* Apply the perm to get the original axis */ + p = perm[idim]; + if (p < 0) { + i = op_axes[ndim+p]; + } + else { + i = op_axes[ndim-p-1]; + } + + if (0 <= i && i < op_ndim) { + shape = PyArray_DIM(op, i); + if (shape != 1) { + npy_intp stride = PyArray_STRIDE(op, i); + if (p < 0) { + /* If the perm entry is negative, flip the axis */ + NAD_STRIDES(axisdata)[iop] = -stride; + baseoffset += stride*(shape-1); + } + else { + NAD_STRIDES(axisdata)[iop] = stride; + } + } + } + } + } + else { + for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { + npy_int8 p; + int i; + npy_intp shape; + + /* Apply the perm to get the original axis */ + p = perm[idim]; + if (p < 0) { + i = op_ndim+p; + } + else { + i = op_ndim-p-1; + } + + if (i >= 0) { + shape = PyArray_DIM(op, i); + if (shape != 1) { + npy_intp stride = PyArray_STRIDE(op, i); + if (p < 0) { + /* If the perm entry is negative, flip the axis */ + NAD_STRIDES(axisdata)[iop] = -stride; + baseoffset += stride*(shape-1); + } + else { + NAD_STRIDES(axisdata)[iop] = stride; + } + } + } + } + } + + op_dataptr += baseoffset; + + /* Now the base data pointer is calculated, set it everywhere it's needed */ + NIT_RESETDATAPTR(iter)[iop] = op_dataptr; + NIT_BASEOFFSETS(iter)[iop] = baseoffset; + axisdata = axisdata0; + for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { + NAD_PTRS(axisdata)[iop] = op_dataptr; + } +} + +/* + * Computes the iterator's index strides and initializes the index values + * to zero. + * + * This must be called before the axes (i.e. the AXISDATA array) may + * be reordered. + */ +static void +npyiter_compute_index_strides(NpyIter *iter, npy_uint32 flags) +{ + npy_uint32 itflags = NIT_ITFLAGS(iter); + int idim, ndim = NIT_NDIM(iter); + int nop = NIT_NOP(iter); + + npy_intp indexstride; + NpyIter_AxisData *axisdata; + npy_intp sizeof_axisdata; + + /* + * If there is only one element being iterated, we just have + * to touch the first AXISDATA because nothing will ever be + * incremented. + */ + if (NIT_ITERSIZE(iter) == 1) { + if (itflags&NPY_ITFLAG_HASINDEX) { + axisdata = NIT_AXISDATA(iter); + NAD_PTRS(axisdata)[nop] = 0; + } + return; + } + + if (flags&NPY_ITER_C_INDEX) { + sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop); + axisdata = NIT_AXISDATA(iter); + indexstride = 1; + for(idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { + npy_intp shape = NAD_SHAPE(axisdata); + + if (shape == 1) { + NAD_STRIDES(axisdata)[nop] = 0; + } + else { + NAD_STRIDES(axisdata)[nop] = indexstride; + } + NAD_PTRS(axisdata)[nop] = 0; + indexstride *= shape; + } + } + else if (flags&NPY_ITER_F_INDEX) { + sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop); + axisdata = NIT_INDEX_AXISDATA(NIT_AXISDATA(iter), ndim-1); + indexstride = 1; + for(idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, -1)) { + npy_intp shape = NAD_SHAPE(axisdata); + + if (shape == 1) { + NAD_STRIDES(axisdata)[nop] = 0; + } + else { + NAD_STRIDES(axisdata)[nop] = indexstride; + } + NAD_PTRS(axisdata)[nop] = 0; + indexstride *= shape; + } + } +} + +/* + * If the order is NPY_KEEPORDER, lets the iterator find the best + * iteration order, otherwise forces it. Indicates in the itflags that + * whether the iteration order was forced. + */ +static void +npyiter_apply_forced_iteration_order(NpyIter *iter, NPY_ORDER order) +{ + /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/ + int ndim = NIT_NDIM(iter); + int iop, nop = NIT_NOP(iter); + + switch (order) { + case NPY_CORDER: + NIT_ITFLAGS(iter) |= NPY_ITFLAG_FORCEDORDER; + break; + case NPY_FORTRANORDER: + NIT_ITFLAGS(iter) |= NPY_ITFLAG_FORCEDORDER; + /* Only need to actually do something if there is more than 1 dim */ + if (ndim > 1) { + npyiter_reverse_axis_ordering(iter); + } + break; + case NPY_ANYORDER: + NIT_ITFLAGS(iter) |= NPY_ITFLAG_FORCEDORDER; + /* Only need to actually do something if there is more than 1 dim */ + if (ndim > 1) { + PyArrayObject **op = NIT_OPERANDS(iter); + int forder = 1; + + /* Check that all the array inputs are fortran order */ + for (iop = 0; iop < nop; ++iop, ++op) { + if (*op && !PyArray_CHKFLAGS(*op, NPY_ARRAY_F_CONTIGUOUS)) { + forder = 0; + break; + } + } + + if (forder) { + npyiter_reverse_axis_ordering(iter); + } + } + break; + case NPY_KEEPORDER: + /* Don't set the forced order flag here... */ + break; + } +} + +/* + * This function negates any strides in the iterator + * which are negative. When iterating more than one + * object, it only flips strides when they are all + * negative or zero. + */ +static void +npyiter_flip_negative_strides(NpyIter *iter) +{ + npy_uint32 itflags = NIT_ITFLAGS(iter); + int idim, ndim = NIT_NDIM(iter); + int iop, nop = NIT_NOP(iter); + + npy_intp istrides, nstrides = NAD_NSTRIDES(); + NpyIter_AxisData *axisdata, *axisdata0; + npy_intp *baseoffsets; + npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop); + int any_flipped = 0; + + axisdata0 = axisdata = NIT_AXISDATA(iter); + baseoffsets = NIT_BASEOFFSETS(iter); + for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { + npy_intp *strides = NAD_STRIDES(axisdata); + int any_negative = 0; + + /* + * Check the signs of all the strides, excluding + * the index stride at the end. + */ + for (iop = 0; iop < nop; ++iop) { + if (strides[iop] < 0) { + any_negative = 1; + } + else if (strides[iop] != 0) { + break; + } + } + /* + * If at least on stride is negative and none are positive, + * flip all the strides for this dimension. + */ + if (any_negative && iop == nop) { + npy_intp shapem1 = NAD_SHAPE(axisdata) - 1; + + for (istrides = 0; istrides < nstrides; ++istrides) { + npy_intp stride = strides[istrides]; + + /* Adjust the base pointers to start at the end */ + baseoffsets[istrides] += shapem1 * stride; + /* Flip the stride */ + strides[istrides] = -stride; + } + /* + * Make the perm entry negative so get_multi_index + * knows it's flipped + */ + NIT_PERM(iter)[idim] = -1-NIT_PERM(iter)[idim]; + + any_flipped = 1; + } + } + + /* + * If any strides were flipped, the base pointers were adjusted + * in the first AXISDATA, and need to be copied to all the rest + */ + if (any_flipped) { + char **resetdataptr = NIT_RESETDATAPTR(iter); + + for (istrides = 0; istrides < nstrides; ++istrides) { + resetdataptr[istrides] += baseoffsets[istrides]; + } + axisdata = axisdata0; + for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { + char **ptrs = NAD_PTRS(axisdata); + for (istrides = 0; istrides < nstrides; ++istrides) { + ptrs[istrides] = resetdataptr[istrides]; + } + } + /* + * Indicate that some of the perm entries are negative, + * and that it's not (strictly speaking) the identity perm. + */ + NIT_ITFLAGS(iter) = (NIT_ITFLAGS(iter)|NPY_ITFLAG_NEGPERM) & + ~NPY_ITFLAG_IDENTPERM; + } +} + +static void +npyiter_reverse_axis_ordering(NpyIter *iter) +{ + npy_uint32 itflags = NIT_ITFLAGS(iter); + int ndim = NIT_NDIM(iter); + int nop = NIT_NOP(iter); + + npy_intp i, temp, size; + npy_intp *first, *last; + npy_int8 *perm; + + size = NIT_AXISDATA_SIZEOF(itflags, ndim, nop)/NPY_SIZEOF_INTP; + first = (npy_intp*)NIT_AXISDATA(iter); + last = first + (ndim-1)*size; + + /* This loop reverses the order of the AXISDATA array */ + while (first < last) { + for (i = 0; i < size; ++i) { + temp = first[i]; + first[i] = last[i]; + last[i] = temp; + } + first += size; + last -= size; + } + + /* Store the perm we applied */ + perm = NIT_PERM(iter); + for(i = ndim-1; i >= 0; --i, ++perm) { + *perm = (npy_int8)i; + } + + NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_IDENTPERM; +} + +static npy_intp +intp_abs(npy_intp x) +{ + return (x < 0) ? -x : x; +} + +static void +npyiter_find_best_axis_ordering(NpyIter *iter) +{ + npy_uint32 itflags = NIT_ITFLAGS(iter); + int idim, ndim = NIT_NDIM(iter); + int iop, nop = NIT_NOP(iter); + + npy_intp ax_i0, ax_i1, ax_ipos; + npy_int8 ax_j0, ax_j1; + npy_int8 *perm; + NpyIter_AxisData *axisdata = NIT_AXISDATA(iter); + npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop); + int permuted = 0; + + perm = NIT_PERM(iter); + + /* + * Do a custom stable insertion sort. Note that because + * the AXISDATA has been reversed from C order, this + * is sorting from smallest stride to biggest stride. + */ + for (ax_i0 = 1; ax_i0 < ndim; ++ax_i0) { + npy_intp *strides0; + + /* 'ax_ipos' is where perm[ax_i0] will get inserted */ + ax_ipos = ax_i0; + ax_j0 = perm[ax_i0]; + + strides0 = NAD_STRIDES(NIT_INDEX_AXISDATA(axisdata, ax_j0)); + for (ax_i1 = ax_i0-1; ax_i1 >= 0; --ax_i1) { + int ambig = 1, shouldswap = 0; + npy_intp *strides1; + + ax_j1 = perm[ax_i1]; + + strides1 = NAD_STRIDES(NIT_INDEX_AXISDATA(axisdata, ax_j1)); + + for (iop = 0; iop < nop; ++iop) { + if (strides0[iop] != 0 && strides1[iop] != 0) { + if (intp_abs(strides1[iop]) <= + intp_abs(strides0[iop])) { + /* + * Set swap even if it's not ambiguous already, + * because in the case of conflicts between + * different operands, C-order wins. + */ + shouldswap = 0; + } + else { + /* Only set swap if it's still ambiguous */ + if (ambig) { + shouldswap = 1; + } + } + + /* + * A comparison has been done, so it's + * no longer ambiguous + */ + ambig = 0; + } + } + /* + * If the comparison was unambiguous, either shift + * 'ax_ipos' to 'ax_i1' or stop looking for an insertion + * point + */ + if (!ambig) { + if (shouldswap) { + ax_ipos = ax_i1; + } + else { + break; + } + } + } + + /* Insert perm[ax_i0] into the right place */ + if (ax_ipos != ax_i0) { + for (ax_i1 = ax_i0; ax_i1 > ax_ipos; --ax_i1) { + perm[ax_i1] = perm[ax_i1-1]; + } + perm[ax_ipos] = ax_j0; + permuted = 1; + } + } + + /* Apply the computed permutation to the AXISDATA array */ + if (permuted == 1) { + npy_intp i, size = sizeof_axisdata/NPY_SIZEOF_INTP; + NpyIter_AxisData *ad_i; + + /* Use the index as a flag, set each to 1 */ + ad_i = axisdata; + for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(ad_i, 1)) { + NAD_INDEX(ad_i) = 1; + } + /* Apply the permutation by following the cycles */ + for (idim = 0; idim < ndim; ++idim) { + ad_i = NIT_INDEX_AXISDATA(axisdata, idim); + + /* If this axis hasn't been touched yet, process it */ + if (NAD_INDEX(ad_i) == 1) { + npy_int8 pidim = perm[idim]; + npy_intp tmp; + NpyIter_AxisData *ad_p, *ad_q; + + if (pidim != idim) { + /* Follow the cycle, copying the data */ + for (i = 0; i < size; ++i) { + pidim = perm[idim]; + ad_q = ad_i; + tmp = *((npy_intp*)ad_q + i); + while (pidim != idim) { + ad_p = NIT_INDEX_AXISDATA(axisdata, pidim); + *((npy_intp*)ad_q + i) = *((npy_intp*)ad_p + i); + + ad_q = ad_p; + pidim = perm[(int)pidim]; + } + *((npy_intp*)ad_q + i) = tmp; + } + /* Follow the cycle again, marking it as done */ + pidim = perm[idim]; + + while (pidim != idim) { + NAD_INDEX(NIT_INDEX_AXISDATA(axisdata, pidim)) = 0; + pidim = perm[(int)pidim]; + } + } + NAD_INDEX(ad_i) = 0; + } + } + /* Clear the identity perm flag */ + NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_IDENTPERM; + } +} + +/* + * Calculates a dtype that all the types can be promoted to, using the + * ufunc rules. If only_inputs is 1, it leaves any operands that + * are not read from out of the calculation. + */ +static PyArray_Descr * +npyiter_get_common_dtype(int nop, PyArrayObject **op, + char *op_itflags, PyArray_Descr **op_dtype, + PyArray_Descr **op_request_dtypes, + int only_inputs, int output_scalars) +{ + int iop; + npy_intp narrs = 0, ndtypes = 0; + PyArrayObject *arrs[NPY_MAXARGS]; + PyArray_Descr *dtypes[NPY_MAXARGS]; + PyArray_Descr *ret; + + NPY_IT_DBG_PRINT("Iterator: Getting a common data type from operands\n"); + + for (iop = 0; iop < nop; ++iop) { + if (op_dtype[iop] != NULL && + (!only_inputs || (op_itflags[iop]&NPY_OP_ITFLAG_READ))) { + /* If no dtype was requested and the op is a scalar, pass the op */ + if ((op_request_dtypes == NULL || + op_request_dtypes[iop] == NULL) && + PyArray_NDIM(op[iop]) == 0) { + arrs[narrs++] = op[iop]; + } + /* Otherwise just pass in the dtype */ + else { + dtypes[ndtypes++] = op_dtype[iop]; + } + } + } + + if (narrs == 0) { + npy_intp i; + ret = dtypes[0]; + for (i = 1; i < ndtypes; ++i) { + if (ret != dtypes[i]) + break; + } + if (i == ndtypes) { + if (ndtypes == 1 || PyArray_ISNBO(ret->byteorder)) { + Py_INCREF(ret); + } + else { + ret = PyArray_DescrNewByteorder(ret, NPY_NATIVE); + } + } + else { + ret = PyArray_ResultType(narrs, arrs, ndtypes, dtypes); + } + } + else { + ret = PyArray_ResultType(narrs, arrs, ndtypes, dtypes); + } + + return ret; +} + +/* + * Allocates a temporary array which can be used to replace op + * in the iteration. Its dtype will be op_dtype. + * + * The result array has a memory ordering which matches the iterator, + * which may or may not match that of op. The parameter 'shape' may be + * NULL, in which case it is filled in from the iterator's shape. + * + * This function must be called before any axes are coalesced. + */ +static PyArrayObject * +npyiter_new_temp_array(NpyIter *iter, PyTypeObject *subtype, + npy_uint32 flags, char *op_itflags, + int op_ndim, npy_intp *shape, + PyArray_Descr *op_dtype, int *op_axes) +{ + npy_uint32 itflags = NIT_ITFLAGS(iter); + int idim, ndim = NIT_NDIM(iter); + int nop = NIT_NOP(iter); + + npy_int8 *perm = NIT_PERM(iter); + npy_intp new_shape[NPY_MAXDIMS], strides[NPY_MAXDIMS], + stride = op_dtype->elsize; + char reversestride[NPY_MAXDIMS], anyreverse = 0; + NpyIter_AxisData *axisdata; + npy_intp sizeof_axisdata; + npy_intp i; + + PyArrayObject *ret; + + /* If it's a scalar, don't need to check the axes */ + if (op_ndim == 0) { + Py_INCREF(op_dtype); + ret = (PyArrayObject *)PyArray_NewFromDescr(subtype, op_dtype, 0, + NULL, NULL, NULL, 0, NULL); + + /* Double-check that the subtype didn't mess with the dimensions */ + if (PyArray_NDIM(ret) != 0) { + PyErr_SetString(PyExc_RuntimeError, + "Iterator automatic output has an array subtype " + "which changed the dimensions of the output"); + Py_DECREF(ret); + return NULL; + } + + return ret; + } + + axisdata = NIT_AXISDATA(iter); + sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop); + + memset(reversestride, 0, NPY_MAXDIMS); + /* Initialize the strides to invalid values */ + for (i = 0; i < NPY_MAXDIMS; ++i) { + strides[i] = NPY_MAX_INTP; + } + + if (op_axes != NULL) { + for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { + npy_int8 p; + + /* Apply the perm to get the original axis */ + p = perm[idim]; + if (p < 0) { + i = op_axes[ndim+p]; + } + else { + i = op_axes[ndim-p-1]; + } + + if (i >= 0) { + NPY_IT_DBG_PRINT3("Iterator: Setting allocated stride %d " + "for iterator dimension %d to %d\n", (int)i, + (int)idim, (int)stride); + strides[i] = stride; + if (p < 0) { + reversestride[i] = 1; + anyreverse = 1; + } + else { + reversestride[i] = 0; + } + if (shape == NULL) { + new_shape[i] = NAD_SHAPE(axisdata); + stride *= new_shape[i]; + if (i >= ndim) { + PyErr_SetString(PyExc_ValueError, + "automatically allocated output array " + "specified with an inconsistent axis mapping"); + return NULL; + } + } + else { + stride *= shape[i]; + } + } + else { + if (shape == NULL) { + /* + * If deleting this axis produces a reduction, but + * reduction wasn't enabled, throw an error + */ + if (NAD_SHAPE(axisdata) != 1) { + if (!(flags&NPY_ITER_REDUCE_OK)) { + PyErr_SetString(PyExc_ValueError, + "output requires a reduction, but " + "reduction is not enabled"); + return NULL; + } + if (!((*op_itflags)&NPY_OP_ITFLAG_READ)) { + PyErr_SetString(PyExc_ValueError, + "output requires a reduction, but " + "is flagged as write-only, not read-write"); + return NULL; + } + + NPY_IT_DBG_PRINT("Iterator: Indicating that a " + "reduction is occurring\n"); + /* Indicate that a reduction is occurring */ + NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE; + (*op_itflags) |= NPY_OP_ITFLAG_REDUCE; + } + } + } + } + } + else { + for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { + npy_int8 p; + + /* Apply the perm to get the original axis */ + p = perm[idim]; + if (p < 0) { + i = op_ndim + p; + } + else { + i = op_ndim - p - 1; + } + + if (i >= 0) { + NPY_IT_DBG_PRINT3("Iterator: Setting allocated stride %d " + "for iterator dimension %d to %d\n", (int)i, + (int)idim, (int)stride); + strides[i] = stride; + if (p < 0) { + reversestride[i] = 1; + anyreverse = 1; + } + else { + reversestride[i] = 0; + } + if (shape == NULL) { + new_shape[i] = NAD_SHAPE(axisdata); + stride *= new_shape[i]; + } + else { + stride *= shape[i]; + } + } + } + } + + /* + * If custom axes were specified, some dimensions may not have been used. + * Add the REDUCE itflag if this creates a reduction situation. + */ + if (shape == NULL) { + /* Ensure there are no dimension gaps in op_axes, and find op_ndim */ + op_ndim = ndim; + if (op_axes != NULL) { + for (i = 0; i < ndim; ++i) { + if (strides[i] == NPY_MAX_INTP) { + if (op_ndim == ndim) { + op_ndim = i; + } + } + /* + * If there's a gap in the array's dimensions, it's an error. + * For example, op_axes of [0,2] for the automatically + * allocated output. + */ + else if (op_ndim != ndim) { + PyErr_SetString(PyExc_ValueError, + "automatically allocated output array " + "specified with an inconsistent axis mapping"); + return NULL; + } + } + } + } + else { + for (i = 0; i < op_ndim; ++i) { + if (strides[i] == NPY_MAX_INTP) { + npy_intp factor, new_strides[NPY_MAXDIMS], + itemsize; + + /* Fill in the missing strides in C order */ + factor = 1; + itemsize = op_dtype->elsize; + for (i = op_ndim-1; i >= 0; --i) { + if (strides[i] == NPY_MAX_INTP) { + new_strides[i] = factor * itemsize; + factor *= shape[i]; + } + } + + /* + * Copy the missing strides, and multiply the existing strides + * by the calculated factor. This way, the missing strides + * are tighter together in memory, which is good for nested + * loops. + */ + for (i = 0; i < op_ndim; ++i) { + if (strides[i] == NPY_MAX_INTP) { + strides[i] = new_strides[i]; + } + else { + strides[i] *= factor; + } + } + + break; + } + } + } + + /* If shape was NULL, set it to the shape we calculated */ + if (shape == NULL) { + shape = new_shape; + } + + /* Allocate the temporary array */ + Py_INCREF(op_dtype); + ret = (PyArrayObject *)PyArray_NewFromDescr(subtype, op_dtype, op_ndim, + shape, strides, NULL, 0, NULL); + if (ret == NULL) { + return NULL; + } + + /* If there are any reversed axes, create a view that reverses them */ + if (anyreverse) { + char *dataptr = PyArray_DATA(ret); + PyArrayObject *newret; + + for (idim = 0; idim < op_ndim; ++idim) { + if (reversestride[idim]) { + dataptr += strides[idim]*(shape[idim]-1); + strides[idim] = -strides[idim]; + } + } + Py_INCREF(op_dtype); + newret = (PyArrayObject *)PyArray_NewFromDescr(subtype, + op_dtype, op_ndim, + shape, strides, dataptr, + NPY_ARRAY_WRITEABLE, NULL); + if (newret == NULL) { + Py_DECREF(ret); + return NULL; + } + newret->base = (PyObject *)ret; + ret = newret; + } + + /* Make sure all the flags are good */ + PyArray_UpdateFlags(ret, NPY_ARRAY_UPDATE_ALL); + + /* Double-check that the subtype didn't mess with the dimensions */ + if (subtype != &PyArray_Type) { + if (PyArray_NDIM(ret) != op_ndim || + !PyArray_CompareLists(shape, PyArray_DIMS(ret), op_ndim)) { + PyErr_SetString(PyExc_RuntimeError, + "Iterator automatic output has an array subtype " + "which changed the dimensions of the output"); + Py_DECREF(ret); + return NULL; + } + } + + return ret; +} + +static int +npyiter_allocate_arrays(NpyIter *iter, + npy_uint32 flags, + PyArray_Descr **op_dtype, PyTypeObject *subtype, + npy_uint32 *op_flags, char *op_itflags, + int **op_axes, int output_scalars) +{ + npy_uint32 itflags = NIT_ITFLAGS(iter); + int idim, ndim = NIT_NDIM(iter); + int iop, nop = NIT_NOP(iter); + + NpyIter_BufferData *bufferdata = NULL; + PyArrayObject **op = NIT_OPERANDS(iter); + + if (itflags&NPY_ITFLAG_BUFFER) { + bufferdata = NIT_BUFFERDATA(iter); + } + + + for (iop = 0; iop < nop; ++iop) { + /* NULL means an output the iterator should allocate */ + if (op[iop] == NULL) { + PyArrayObject *out; + PyTypeObject *op_subtype; + int ondim = output_scalars ? 0 : ndim; + + /* Check whether the subtype was disabled */ + op_subtype = (op_flags[iop]&NPY_ITER_NO_SUBTYPE) ? + &PyArray_Type : subtype; + + /* Allocate the output array */ + out = npyiter_new_temp_array(iter, op_subtype, + flags, &op_itflags[iop], + ondim, + NULL, + op_dtype[iop], + op_axes ? op_axes[iop] : NULL); + if (out == NULL) { + return 0; + } + + op[iop] = out; + + /* + * Now we need to replace the pointers and strides with values + * from the new array. + */ + npyiter_replace_axisdata(iter, iop, op[iop], ondim, + PyArray_DATA(op[iop]), op_axes ? op_axes[iop] : NULL); + + /* New arrays are aligned and need no cast */ + op_itflags[iop] |= NPY_OP_ITFLAG_ALIGNED; + op_itflags[iop] &= ~NPY_OP_ITFLAG_CAST; + } + /* + * If casting is required, the operand is read-only, and + * it's an array scalar, make a copy whether or not the + * copy flag is enabled. + */ + else if ((op_itflags[iop]&(NPY_OP_ITFLAG_CAST| + NPY_OP_ITFLAG_READ| + NPY_OP_ITFLAG_WRITE)) == (NPY_OP_ITFLAG_CAST| + NPY_OP_ITFLAG_READ) && + PyArray_NDIM(op[iop]) == 0) { + PyArrayObject *temp; + Py_INCREF(op_dtype[iop]); + temp = (PyArrayObject *)PyArray_NewFromDescr( + &PyArray_Type, op_dtype[iop], + 0, NULL, NULL, NULL, 0, NULL); + if (temp == NULL) { + return 0; + } + if (PyArray_CopyInto(temp, op[iop]) != 0) { + Py_DECREF(temp); + return 0; + } + Py_DECREF(op[iop]); + op[iop] = temp; + + /* + * Now we need to replace the pointers and strides with values + * from the temporary array. + */ + npyiter_replace_axisdata(iter, iop, op[iop], 0, + PyArray_DATA(op[iop]), NULL); + + /* + * New arrays are aligned need no cast, and in the case + * of scalars, always have stride 0 so never need buffering + */ + op_itflags[iop] |= (NPY_OP_ITFLAG_ALIGNED| + NPY_OP_ITFLAG_BUFNEVER); + op_itflags[iop] &= ~NPY_OP_ITFLAG_CAST; + if (itflags&NPY_ITFLAG_BUFFER) { + NBF_STRIDES(bufferdata)[iop] = 0; + } + } + /* If casting is required and permitted */ + else if ((op_itflags[iop]&NPY_OP_ITFLAG_CAST) && + (op_flags[iop]&(NPY_ITER_COPY|NPY_ITER_UPDATEIFCOPY))) { + PyArrayObject *temp; + int ondim = PyArray_NDIM(op[iop]); + + /* Allocate the temporary array, if possible */ + temp = npyiter_new_temp_array(iter, &PyArray_Type, + flags, &op_itflags[iop], + ondim, + PyArray_DIMS(op[iop]), + op_dtype[iop], + op_axes ? op_axes[iop] : NULL); + if (temp == NULL) { + return 0; + } + + /* If the data will be read, copy it into temp */ + if (op_itflags[iop]&NPY_OP_ITFLAG_READ) { + if (PyArray_CopyInto(temp, op[iop]) != 0) { + Py_DECREF(temp); + return 0; + } + } + /* If the data will be written to, set UPDATEIFCOPY */ + if (op_itflags[iop]&NPY_OP_ITFLAG_WRITE) { + PyArray_FLAGS(temp) |= NPY_ARRAY_UPDATEIFCOPY; + PyArray_FLAGS(op[iop]) &= ~NPY_ARRAY_WRITEABLE; + Py_INCREF(op[iop]); + temp->base = (PyObject *)op[iop]; + } + + Py_DECREF(op[iop]); + op[iop] = temp; + + /* + * Now we need to replace the pointers and strides with values + * from the temporary array. + */ + npyiter_replace_axisdata(iter, iop, op[iop], ondim, + PyArray_DATA(op[iop]), op_axes ? op_axes[iop] : NULL); + + /* The temporary copy is aligned and needs no cast */ + op_itflags[iop] |= NPY_OP_ITFLAG_ALIGNED; + op_itflags[iop] &= ~NPY_OP_ITFLAG_CAST; + } + else { + /* + * Buffering must be enabled for casting/conversion if copy + * wasn't specified. + */ + if ((op_itflags[iop]&NPY_OP_ITFLAG_CAST) && + !(itflags&NPY_ITFLAG_BUFFER)) { + PyErr_SetString(PyExc_TypeError, + "Iterator operand required copying or buffering, " + "but neither copying nor buffering was enabled"); + return 0; + } + + /* + * If the operand is aligned, any buffering can use aligned + * optimizations. + */ + if (PyArray_ISALIGNED(op[iop])) { + op_itflags[iop] |= NPY_OP_ITFLAG_ALIGNED; + } + } + + /* Here we can finally check for contiguous iteration */ + if (op_flags[iop]&NPY_ITER_CONTIG) { + NpyIter_AxisData *axisdata = NIT_AXISDATA(iter); + npy_intp stride = NAD_STRIDES(axisdata)[iop]; + + if (stride != op_dtype[iop]->elsize) { + NPY_IT_DBG_PRINT("Iterator: Setting NPY_OP_ITFLAG_CAST " + "because of NPY_ITER_CONTIG\n"); + op_itflags[iop] |= NPY_OP_ITFLAG_CAST; + if (!(itflags&NPY_ITFLAG_BUFFER)) { + PyErr_SetString(PyExc_TypeError, + "Iterator operand required buffering, " + "to be contiguous as requested, but " + "buffering is not enabled"); + return 0; + } + } + } + + /* + * If no alignment, byte swap, or casting is needed, and + * the inner stride of this operand works for the whole + * array, we can set NPY_OP_ITFLAG_BUFNEVER. + */ + if ((itflags&NPY_ITFLAG_BUFFER) && !(op_itflags[iop]&NPY_OP_ITFLAG_CAST)) { + NpyIter_AxisData *axisdata = NIT_AXISDATA(iter); + if (ndim == 1) { + op_itflags[iop] |= NPY_OP_ITFLAG_BUFNEVER; + NBF_STRIDES(bufferdata)[iop] = NAD_STRIDES(axisdata)[iop]; + } + else if (PyArray_NDIM(op[iop]) > 0) { + npy_intp stride, shape, innerstride = 0, innershape; + npy_intp sizeof_axisdata = + NIT_AXISDATA_SIZEOF(itflags, ndim, nop); + /* Find stride of the first non-empty shape */ + for (idim = 0; idim < ndim; ++idim) { + innershape = NAD_SHAPE(axisdata); + if (innershape != 1) { + innerstride = NAD_STRIDES(axisdata)[iop]; + break; + } + NIT_ADVANCE_AXISDATA(axisdata, 1); + } + ++idim; + NIT_ADVANCE_AXISDATA(axisdata, 1); + /* Check that everything could have coalesced together */ + for (; idim < ndim; ++idim) { + stride = NAD_STRIDES(axisdata)[iop]; + shape = NAD_SHAPE(axisdata); + if (shape != 1) { + /* + * If N times the inner stride doesn't equal this + * stride, the multi-dimensionality is needed. + */ + if (innerstride*innershape != stride) { + break; + } + else { + innershape *= shape; + } + } + NIT_ADVANCE_AXISDATA(axisdata, 1); + } + /* + * If we looped all the way to the end, one stride works. + * Set that stride, because it may not belong to the first + * dimension. + */ + if (idim == ndim) { + op_itflags[iop] |= NPY_OP_ITFLAG_BUFNEVER; + NBF_STRIDES(bufferdata)[iop] = innerstride; + } + } + } + } + + return 1; +} + +/* + * The __array_priority__ attribute of the inputs determines + * the subtype of any output arrays. This function finds the + * subtype of the input array with highest priority. + */ +static void +npyiter_get_priority_subtype(int nop, PyArrayObject **op, + char *op_itflags, + double *subtype_priority, + PyTypeObject **subtype) +{ + int iop; + + for (iop = 0; iop < nop; ++iop) { + if (op[iop] != NULL && op_itflags[iop]&NPY_OP_ITFLAG_READ) { + double priority = PyArray_GetPriority((PyObject *)op[iop], 0.0); + if (priority > *subtype_priority) { + *subtype_priority = priority; + *subtype = Py_TYPE(op[iop]); + } + } + } +} + +static int +npyiter_allocate_transfer_functions(NpyIter *iter) +{ + npy_uint32 itflags = NIT_ITFLAGS(iter); + /*int ndim = NIT_NDIM(iter);*/ + int iop = 0, nop = NIT_NOP(iter); + + npy_intp i; + char *op_itflags = NIT_OPITFLAGS(iter); + NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter); + NpyIter_AxisData *axisdata = NIT_AXISDATA(iter); + PyArrayObject **op = NIT_OPERANDS(iter); + PyArray_Descr **op_dtype = NIT_DTYPES(iter); + npy_intp *strides = NAD_STRIDES(axisdata), op_stride; + PyArray_StridedTransferFn **readtransferfn = NBF_READTRANSFERFN(bufferdata), + **writetransferfn = NBF_WRITETRANSFERFN(bufferdata); + NpyAuxData **readtransferdata = NBF_READTRANSFERDATA(bufferdata), + **writetransferdata = NBF_WRITETRANSFERDATA(bufferdata); + + PyArray_StridedTransferFn *stransfer = NULL; + NpyAuxData *transferdata = NULL; + int needs_api = 0; + + for (iop = 0; iop < nop; ++iop) { + char flags = op_itflags[iop]; + /* + * Reduction operands may be buffered with a different stride, + * so we must pass NPY_MAX_INTP to the transfer function factory. + */ + op_stride = (flags&NPY_OP_ITFLAG_REDUCE) ? NPY_MAX_INTP : + strides[iop]; + + /* + * If we have determined that a buffer may be needed, + * allocate the appropriate transfer functions + */ + if (!(flags&NPY_OP_ITFLAG_BUFNEVER)) { + if (flags&NPY_OP_ITFLAG_READ) { + int move_references = 0; + if (PyArray_GetDTypeTransferFunction( + (flags&NPY_OP_ITFLAG_ALIGNED) != 0, + op_stride, + op_dtype[iop]->elsize, + PyArray_DESCR(op[iop]), + op_dtype[iop], + move_references, + &stransfer, + &transferdata, + &needs_api) != NPY_SUCCEED) { + goto fail; + } + readtransferfn[iop] = stransfer; + readtransferdata[iop] = transferdata; + } + else { + readtransferfn[iop] = NULL; + } + if (flags&NPY_OP_ITFLAG_WRITE) { + int move_references = 1; + if (PyArray_GetDTypeTransferFunction( + (flags&NPY_OP_ITFLAG_ALIGNED) != 0, + op_dtype[iop]->elsize, + op_stride, + op_dtype[iop], + PyArray_DESCR(op[iop]), + move_references, + &stransfer, + &transferdata, + &needs_api) != NPY_SUCCEED) { + goto fail; + } + writetransferfn[iop] = stransfer; + writetransferdata[iop] = transferdata; + } + /* If no write back but there are references make a decref fn */ + else if (PyDataType_REFCHK(op_dtype[iop])) { + /* + * By passing NULL to dst_type and setting move_references + * to 1, we get back a function that just decrements the + * src references. + */ + if (PyArray_GetDTypeTransferFunction( + (flags&NPY_OP_ITFLAG_ALIGNED) != 0, + op_dtype[iop]->elsize, 0, + op_dtype[iop], NULL, + 1, + &stransfer, + &transferdata, + &needs_api) != NPY_SUCCEED) { + goto fail; + } + writetransferfn[iop] = stransfer; + writetransferdata[iop] = transferdata; + } + else { + writetransferfn[iop] = NULL; + } + } + else { + readtransferfn[iop] = NULL; + writetransferfn[iop] = NULL; + } + } + + /* If any of the dtype transfer functions needed the API, flag it */ + if (needs_api) { + NIT_ITFLAGS(iter) |= NPY_ITFLAG_NEEDSAPI; + } + + return 1; + +fail: + for (i = 0; i < iop; ++i) { + if (readtransferdata[iop] != NULL) { + NPY_AUXDATA_FREE(readtransferdata[iop]); + readtransferdata[iop] = NULL; + } + if (writetransferdata[iop] != NULL) { + NPY_AUXDATA_FREE(writetransferdata[iop]); + writetransferdata[iop] = NULL; + } + } + return 0; +} + + + +#undef NPY_ITERATOR_IMPLEMENTATION_CODE diff --git a/numpy/core/src/multiarray/nditer_impl.h b/numpy/core/src/multiarray/nditer_impl.h new file mode 100644 index 000000000..f79ac3415 --- /dev/null +++ b/numpy/core/src/multiarray/nditer_impl.h @@ -0,0 +1,301 @@ +/* + * This is a PRIVATE INTERNAL NumPy header, intended to be used *ONLY* + * by the iterator implementation code. All other internal NumPy code + * should use the exposed iterator API. + */ +#ifndef NPY_ITERATOR_IMPLEMENTATION_CODE +#error "This header is intended for use ONLY by iterator implementation code." +#endif + +#ifndef _NPY_PRIVATE__NDITER_IMPL_H_ +#define _NPY_PRIVATE__NDITER_IMPL_H_ + +#define PY_SSIZE_T_CLEAN +#include "Python.h" +#include "structmember.h" + +#define NPY_NO_DEPRECATED_API +#define _MULTIARRAYMODULE +#include <numpy/ndarrayobject.h> +#include <numpy/npy_3kcompat.h> +#include "convert_datatype.h" + +#include "lowlevel_strided_loops.h" + +/********** ITERATOR CONSTRUCTION TIMING **************/ +#define NPY_IT_CONSTRUCTION_TIMING 0 + +#if NPY_IT_CONSTRUCTION_TIMING +#define NPY_IT_TIME_POINT(var) { \ + unsigned int hi, lo; \ + __asm__ __volatile__ ( \ + "rdtsc" \ + : "=d" (hi), "=a" (lo)); \ + var = (((unsigned long long)hi) << 32) | lo; \ + } +#define NPY_IT_PRINT_TIME_START(var) { \ + printf("%30s: start\n", #var); \ + c_temp = var; \ + } +#define NPY_IT_PRINT_TIME_VAR(var) { \ + printf("%30s: %6.0f clocks\n", #var, \ + ((double)(var-c_temp))); \ + c_temp = var; \ + } +#else +#define NPY_IT_TIME_POINT(var) +#endif + +/******************************************************/ + +/********** PRINTF DEBUG TRACING **************/ +#define NPY_IT_DBG_TRACING 0 + +#if NPY_IT_DBG_TRACING +#define NPY_IT_DBG_PRINT(s) printf("%s", s) +#define NPY_IT_DBG_PRINT1(s, p1) printf(s, p1) +#define NPY_IT_DBG_PRINT2(s, p1, p2) printf(s, p1, p2) +#define NPY_IT_DBG_PRINT3(s, p1, p2, p3) printf(s, p1, p2, p3) +#else +#define NPY_IT_DBG_PRINT(s) +#define NPY_IT_DBG_PRINT1(s, p1) +#define NPY_IT_DBG_PRINT2(s, p1, p2) +#define NPY_IT_DBG_PRINT3(s, p1, p2, p3) +#endif +/**********************************************/ + +/* Rounds up a number of bytes to be divisible by sizeof intp */ +#if NPY_SIZEOF_INTP == 4 +#define NPY_INTP_ALIGNED(size) ((size + 0x3)&(-0x4)) +#else +#define NPY_INTP_ALIGNED(size) ((size + 0x7)&(-0x8)) +#endif + +/* Internal iterator flags */ + +/* The perm is the identity */ +#define NPY_ITFLAG_IDENTPERM 0x0001 +/* The perm has negative entries (indicating flipped axes) */ +#define NPY_ITFLAG_NEGPERM 0x0002 +/* The iterator is tracking an index */ +#define NPY_ITFLAG_HASINDEX 0x0004 +/* The iterator is tracking a multi-index */ +#define NPY_ITFLAG_HASMULTIINDEX 0x0008 +/* The iteration order was forced on construction */ +#define NPY_ITFLAG_FORCEDORDER 0x0010 +/* The inner loop is handled outside the iterator */ +#define NPY_ITFLAG_EXLOOP 0x0020 +/* The iterator is ranged */ +#define NPY_ITFLAG_RANGE 0x0040 +/* The iterator is buffered */ +#define NPY_ITFLAG_BUFFER 0x0080 +/* The iterator should grow the buffered inner loop when possible */ +#define NPY_ITFLAG_GROWINNER 0x0100 +/* There is just one iteration, can specialize iternext for that */ +#define NPY_ITFLAG_ONEITERATION 0x0200 +/* Delay buffer allocation until first Reset* call */ +#define NPY_ITFLAG_DELAYBUF 0x0400 +/* Iteration needs API access during iternext */ +#define NPY_ITFLAG_NEEDSAPI 0x0800 +/* Iteration includes one or more operands being reduced */ +#define NPY_ITFLAG_REDUCE 0x1000 +/* Reduce iteration doesn't need to recalculate reduce loops next time */ +#define NPY_ITFLAG_REUSE_REDUCE_LOOPS 0x2000 + +/* Internal iterator per-operand iterator flags */ + +/* The operand will be written to */ +#define NPY_OP_ITFLAG_WRITE 0x01 +/* The operand will be read from */ +#define NPY_OP_ITFLAG_READ 0x02 +/* The operand needs type conversion/byte swapping/alignment */ +#define NPY_OP_ITFLAG_CAST 0x04 +/* The operand never needs buffering */ +#define NPY_OP_ITFLAG_BUFNEVER 0x08 +/* The operand is aligned */ +#define NPY_OP_ITFLAG_ALIGNED 0x10 +/* The operand is being reduced */ +#define NPY_OP_ITFLAG_REDUCE 0x20 + +/* + * The data layout of the iterator is fully specified by + * a triple (itflags, ndim, nop). These three variables + * are expected to exist in all functions calling these macros, + * either as true variables initialized to the correct values + * from the iterator, or as constants in the case of specialized + * functions such as the various iternext functions. + */ + +struct NpyIter_InternalOnly { + /* Initial fixed position data */ + npy_uint32 itflags; + npy_uint16 ndim, nop; + npy_intp itersize, iterstart, iterend; + /* iterindex is only used if RANGED or BUFFERED is set */ + npy_intp iterindex; + /* The rest is variable */ + char iter_flexdata; +}; + +typedef struct NpyIter_AD NpyIter_AxisData; +typedef struct NpyIter_BD NpyIter_BufferData; + +/* Byte sizes of the iterator members */ +#define NIT_PERM_SIZEOF(itflags, ndim, nop) \ + NPY_INTP_ALIGNED(NPY_MAXDIMS) +#define NIT_DTYPES_SIZEOF(itflags, ndim, nop) \ + ((NPY_SIZEOF_INTP)*(nop)) +#define NIT_RESETDATAPTR_SIZEOF(itflags, ndim, nop) \ + ((NPY_SIZEOF_INTP)*(nop+1)) +#define NIT_BASEOFFSETS_SIZEOF(itflags, ndim, nop) \ + ((NPY_SIZEOF_INTP)*(nop+1)) +#define NIT_OPERANDS_SIZEOF(itflags, ndim, nop) \ + ((NPY_SIZEOF_INTP)*(nop)) +#define NIT_OPITFLAGS_SIZEOF(itflags, ndim, nop) \ + (NPY_INTP_ALIGNED(nop)) +#define NIT_BUFFERDATA_SIZEOF(itflags, ndim, nop) \ + ((itflags&NPY_ITFLAG_BUFFER) ? ((NPY_SIZEOF_INTP)*(6 + 9*nop)) : 0) + +/* Byte offsets of the iterator members starting from iter->iter_flexdata */ +#define NIT_PERM_OFFSET() \ + (0) +#define NIT_DTYPES_OFFSET(itflags, ndim, nop) \ + (NIT_PERM_OFFSET() + \ + NIT_PERM_SIZEOF(itflags, ndim, nop)) +#define NIT_RESETDATAPTR_OFFSET(itflags, ndim, nop) \ + (NIT_DTYPES_OFFSET(itflags, ndim, nop) + \ + NIT_DTYPES_SIZEOF(itflags, ndim, nop)) +#define NIT_BASEOFFSETS_OFFSET(itflags, ndim, nop) \ + (NIT_RESETDATAPTR_OFFSET(itflags, ndim, nop) + \ + NIT_RESETDATAPTR_SIZEOF(itflags, ndim, nop)) +#define NIT_OPERANDS_OFFSET(itflags, ndim, nop) \ + (NIT_BASEOFFSETS_OFFSET(itflags, ndim, nop) + \ + NIT_BASEOFFSETS_SIZEOF(itflags, ndim, nop)) +#define NIT_OPITFLAGS_OFFSET(itflags, ndim, nop) \ + (NIT_OPERANDS_OFFSET(itflags, ndim, nop) + \ + NIT_OPERANDS_SIZEOF(itflags, ndim, nop)) +#define NIT_BUFFERDATA_OFFSET(itflags, ndim, nop) \ + (NIT_OPITFLAGS_OFFSET(itflags, ndim, nop) + \ + NIT_OPITFLAGS_SIZEOF(itflags, ndim, nop)) +#define NIT_AXISDATA_OFFSET(itflags, ndim, nop) \ + (NIT_BUFFERDATA_OFFSET(itflags, ndim, nop) + \ + NIT_BUFFERDATA_SIZEOF(itflags, ndim, nop)) + +/* Internal-only ITERATOR DATA MEMBER ACCESS */ +#define NIT_ITFLAGS(iter) \ + ((iter)->itflags) +#define NIT_NDIM(iter) \ + ((iter)->ndim) +#define NIT_NOP(iter) \ + ((iter)->nop) +#define NIT_ITERSIZE(iter) \ + (iter->itersize) +#define NIT_ITERSTART(iter) \ + (iter->iterstart) +#define NIT_ITEREND(iter) \ + (iter->iterend) +#define NIT_ITERINDEX(iter) \ + (iter->iterindex) +#define NIT_PERM(iter) ((npy_int8 *)( \ + &(iter)->iter_flexdata + NIT_PERM_OFFSET())) +#define NIT_DTYPES(iter) ((PyArray_Descr **)( \ + &(iter)->iter_flexdata + NIT_DTYPES_OFFSET(itflags, ndim, nop))) +#define NIT_RESETDATAPTR(iter) ((char **)( \ + &(iter)->iter_flexdata + NIT_RESETDATAPTR_OFFSET(itflags, ndim, nop))) +#define NIT_BASEOFFSETS(iter) ((npy_intp *)( \ + &(iter)->iter_flexdata + NIT_BASEOFFSETS_OFFSET(itflags, ndim, nop))) +#define NIT_OPERANDS(iter) ((PyArrayObject **)( \ + &(iter)->iter_flexdata + NIT_OPERANDS_OFFSET(itflags, ndim, nop))) +#define NIT_OPITFLAGS(iter) ( \ + &(iter)->iter_flexdata + NIT_OPITFLAGS_OFFSET(itflags, ndim, nop)) +#define NIT_BUFFERDATA(iter) ((NpyIter_BufferData *)( \ + &(iter)->iter_flexdata + NIT_BUFFERDATA_OFFSET(itflags, ndim, nop))) +#define NIT_AXISDATA(iter) ((NpyIter_AxisData *)( \ + &(iter)->iter_flexdata + NIT_AXISDATA_OFFSET(itflags, ndim, nop))) + +/* Internal-only BUFFERDATA MEMBER ACCESS */ +struct NpyIter_BD { + npy_intp buffersize, size, bufiterend, + reduce_pos, reduce_outersize, reduce_outerdim; + npy_intp bd_flexdata; +}; +#define NBF_BUFFERSIZE(bufferdata) ((bufferdata)->buffersize) +#define NBF_SIZE(bufferdata) ((bufferdata)->size) +#define NBF_BUFITEREND(bufferdata) ((bufferdata)->bufiterend) +#define NBF_REDUCE_POS(bufferdata) ((bufferdata)->reduce_pos) +#define NBF_REDUCE_OUTERSIZE(bufferdata) ((bufferdata)->reduce_outersize) +#define NBF_REDUCE_OUTERDIM(bufferdata) ((bufferdata)->reduce_outerdim) +#define NBF_STRIDES(bufferdata) ( \ + &(bufferdata)->bd_flexdata + 0) +#define NBF_PTRS(bufferdata) ((char **) \ + (&(bufferdata)->bd_flexdata + 1*(nop))) +#define NBF_REDUCE_OUTERSTRIDES(bufferdata) ( \ + (&(bufferdata)->bd_flexdata + 2*(nop))) +#define NBF_REDUCE_OUTERPTRS(bufferdata) ((char **) \ + (&(bufferdata)->bd_flexdata + 3*(nop))) +#define NBF_READTRANSFERFN(bufferdata) ((PyArray_StridedTransferFn **) \ + (&(bufferdata)->bd_flexdata + 4*(nop))) +#define NBF_READTRANSFERDATA(bufferdata) ((NpyAuxData **) \ + (&(bufferdata)->bd_flexdata + 5*(nop))) +#define NBF_WRITETRANSFERFN(bufferdata) ((PyArray_StridedTransferFn **) \ + (&(bufferdata)->bd_flexdata + 6*(nop))) +#define NBF_WRITETRANSFERDATA(bufferdata) ((NpyAuxData **) \ + (&(bufferdata)->bd_flexdata + 7*(nop))) +#define NBF_BUFFERS(bufferdata) ((char **) \ + (&(bufferdata)->bd_flexdata + 8*(nop))) + +/* Internal-only AXISDATA MEMBER ACCESS. */ +struct NpyIter_AD { + npy_intp shape, index; + npy_intp ad_flexdata; +}; +#define NAD_SHAPE(axisdata) ((axisdata)->shape) +#define NAD_INDEX(axisdata) ((axisdata)->index) +#define NAD_STRIDES(axisdata) ( \ + &(axisdata)->ad_flexdata + 0) +#define NAD_PTRS(axisdata) ((char **) \ + &(axisdata)->ad_flexdata + 1*(nop+1)) + +#define NAD_NSTRIDES() \ + ((nop) + ((itflags&NPY_ITFLAG_HASINDEX) ? 1 : 0)) + +/* Size of one AXISDATA struct within the iterator */ +#define NIT_AXISDATA_SIZEOF(itflags, ndim, nop) (( \ + /* intp shape */ \ + 1 + \ + /* intp index */ \ + 1 + \ + /* intp stride[nop+1] AND char* ptr[nop+1] */ \ + 2*((nop)+1) \ + )*NPY_SIZEOF_INTP ) + +/* + * Macro to advance an AXISDATA pointer by a specified count. + * Requires that sizeof_axisdata be previously initialized + * to NIT_AXISDATA_SIZEOF(itflags, ndim, nop). + */ +#define NIT_INDEX_AXISDATA(axisdata, index) ((NpyIter_AxisData *) \ + (((char *)(axisdata)) + (index)*sizeof_axisdata)) +#define NIT_ADVANCE_AXISDATA(axisdata, count) \ + axisdata = NIT_INDEX_AXISDATA(axisdata, count) + +/* Size of the whole iterator */ +#define NIT_SIZEOF_ITERATOR(itflags, ndim, nop) ( \ + sizeof(struct NpyIter_InternalOnly) + \ + NIT_AXISDATA_OFFSET(itflags, ndim, nop) + \ + NIT_AXISDATA_SIZEOF(itflags, ndim, nop)*(ndim)) + +/* Internal helper functions shared between implementation files */ +NPY_NO_EXPORT void +npyiter_coalesce_axes(NpyIter *iter); +NPY_NO_EXPORT int +npyiter_allocate_buffers(NpyIter *iter, char **errmsg); +NPY_NO_EXPORT void +npyiter_goto_iterindex(NpyIter *iter, npy_intp iterindex); +NPY_NO_EXPORT void +npyiter_copy_from_buffers(NpyIter *iter); +NPY_NO_EXPORT void +npyiter_copy_to_buffers(NpyIter *iter, char **prev_dataptrs); + + +#endif |