diff options
author | Mark Wiebe <mwwiebe@gmail.com> | 2011-01-31 19:40:25 -0800 |
---|---|---|
committer | Mark Wiebe <mwwiebe@gmail.com> | 2011-02-03 14:48:23 -0800 |
commit | 85993f895e7676f477f80e940a2ee925b3a0c19c (patch) | |
tree | 7fe30fe031de99838eb9086e3cf4711546cf1fd3 | |
parent | fcc6cc73ddcb1fc85446ba9256ac24ecdda6c6d8 (diff) | |
download | numpy-85993f895e7676f477f80e940a2ee925b3a0c19c.tar.gz |
ENH: iter: Add timing code, rewrite some sections to be faster/more clear
-rw-r--r-- | numpy/core/src/multiarray/new_iterator.c.src | 889 |
1 files changed, 488 insertions, 401 deletions
diff --git a/numpy/core/src/multiarray/new_iterator.c.src b/numpy/core/src/multiarray/new_iterator.c.src index 49d8ad8b5..caf8afb5a 100644 --- a/numpy/core/src/multiarray/new_iterator.c.src +++ b/numpy/core/src/multiarray/new_iterator.c.src @@ -18,6 +18,32 @@ #include "lowlevel_strided_loops.h" +/********** ITERATOR CONSTRUCTION TIMING **************/ +#define NPY_IT_CONSTRUCTION_TIMING 0 + +#if NPY_IT_CONSTRUCTION_TIMING +#define NPY_IT_TIME_POINT(var) { \ + unsigned int hi, lo; \ + __asm__ __volatile__ ( \ + "rdtsc" \ + : "=d" (hi), "=a" (lo)); \ + var = (((unsigned long long)hi) << 32) | lo; \ + } +#define NPY_IT_PRINT_TIME_START(var) { \ + printf("%30s: start\n", #var); \ + c_temp = var; \ + } +#define NPY_IT_PRINT_TIME_VAR(var) { \ + printf("%30s: %6.0f clocks\n", #var, \ + ((double)(var-c_temp))); \ + c_temp = var; \ + } +#else +#define NPY_IT_TIME_POINT(var) +#endif + +/******************************************************/ + /********** PRINTF DEBUG TRACING **************/ #define NPY_IT_DBG_TRACING 0 @@ -371,6 +397,28 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags, double subtype_priority = NPY_PRIORITY; PyTypeObject *subtype = &PyArray_Type; +#if NPY_IT_CONSTRUCTION_TIMING + npy_intp c_temp, + c_start, + c_check_op_axes, + c_check_global_flags, + c_calculate_ndim, + c_malloc, + c_prepare_operands, + c_fill_axisdata, + c_compute_index_strides, + c_apply_forced_iteration_order, + c_find_best_axis_ordering, + c_get_priority_subtype, + c_find_output_common_dtype, + c_check_casting, + c_allocate_arrays, + c_coalesce_axes, + c_prepare_buffers; +#endif + + NPY_IT_TIME_POINT(c_start); + if (niter > NPY_MAXARGS) { PyErr_Format(PyExc_ValueError, "Cannot construct an iterator with more than %d operands " @@ -383,11 +431,15 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags, return NULL; } + NPY_IT_TIME_POINT(c_check_op_axes); + /* Check the global iterator flags */ if (!npyiter_check_global_flags(flags, &itflags)) { return NULL; } + NPY_IT_TIME_POINT(c_check_global_flags); + /* Calculate how many dimensions the iterator should have */ ndim = npyiter_calculate_ndim(niter, op_in, oa_ndim); @@ -397,10 +449,14 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags, ndim = 1; } + NPY_IT_TIME_POINT(c_calculate_ndim); + /* Allocate memory for the iterator */ iter = (NpyIter*) PyArray_malloc(NIT_SIZEOF_ITERATOR(itflags, ndim, niter)); + NPY_IT_TIME_POINT(c_malloc); + /* Fill in the basic data */ NIT_ITFLAGS(iter) = itflags; NIT_NDIM(iter) = ndim; @@ -424,6 +480,8 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags, /* Set resetindex to zero as well (it's just after the resetdataptr) */ op_dataptr[niter] = 0; + NPY_IT_TIME_POINT(c_prepare_operands); + /* * Initialize buffer data (must set the buffers and transferdata * to NULL before we might deallocate the iterator). @@ -443,6 +501,8 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags, return NULL; } + NPY_IT_TIME_POINT(c_fill_axisdata); + if (itflags&NPY_ITFLAG_BUFFER) { /* * If buffering is enabled and no buffersize was given, use a default @@ -466,6 +526,8 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags, */ npyiter_compute_index_strides(iter, flags); + NPY_IT_TIME_POINT(c_compute_index_strides); + /* Initialize the perm to the identity */ perm = NIT_PERM(iter); for(idim = 0; idim < ndim; ++idim) { @@ -478,6 +540,8 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags, npyiter_apply_forced_iteration_order(iter, order); itflags = NIT_ITFLAGS(iter); + NPY_IT_TIME_POINT(c_apply_forced_iteration_order); + /* Set some flags for allocated outputs */ for (iiter = 0; iiter < niter; ++iiter) { if (op[iiter] == NULL) { @@ -515,11 +579,15 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags, itflags = NIT_ITFLAGS(iter); } + NPY_IT_TIME_POINT(c_find_best_axis_ordering); + if (need_subtype) { npyiter_get_priority_subtype(niter, op, op_itflags, &subtype_priority, &subtype); } + NPY_IT_TIME_POINT(c_get_priority_subtype); + /* * If an automatically allocated output didn't have a specified * dtype, we need to figure it out now, before allocating the outputs. @@ -544,9 +612,11 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags, NPY_IT_DBG_PRINTF("Iterator: Replacing all data types\n"); /* Replace all the data types */ for (iiter = 0; iiter < niter; ++iiter) { - Py_XDECREF(op_dtype[iiter]); - Py_INCREF(dtype); - op_dtype[iiter] = dtype; + if (op_dtype[iiter] != dtype) { + Py_XDECREF(op_dtype[iiter]); + Py_INCREF(dtype); + op_dtype[iiter] = dtype; + } } } else { @@ -562,6 +632,8 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags, Py_DECREF(dtype); } + NPY_IT_TIME_POINT(c_find_output_common_dtype); + /* * All of the data types have been settled, so it's time * to check that data type conversions are following the @@ -572,6 +644,8 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags, return NULL; } + NPY_IT_TIME_POINT(c_check_casting); + /* * At this point, the iteration order has been finalized. so * any allocation of ops that were NULL, or any temporary @@ -584,6 +658,8 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags, return NULL; } + NPY_IT_TIME_POINT(c_allocate_arrays); + /* * Finally, if coords weren't requested, * it may be possible to coalesce some axes together. @@ -602,6 +678,8 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags, op_dataptr = NIT_RESETDATAPTR(iter); } + NPY_IT_TIME_POINT(c_coalesce_axes); + /* * Now that the axes are finished, check whether we can apply * the single iteration optimization to the iternext function. @@ -657,6 +735,29 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags, } } + NPY_IT_TIME_POINT(c_prepare_buffers); + +#if NPY_IT_CONSTRUCTION_TIMING + printf("\nIterator construction timing:\n"); + NPY_IT_PRINT_TIME_START(c_start); + NPY_IT_PRINT_TIME_VAR(c_check_op_axes); + NPY_IT_PRINT_TIME_VAR(c_check_global_flags); + NPY_IT_PRINT_TIME_VAR(c_calculate_ndim); + NPY_IT_PRINT_TIME_VAR(c_malloc); + NPY_IT_PRINT_TIME_VAR(c_prepare_operands); + NPY_IT_PRINT_TIME_VAR(c_fill_axisdata); + NPY_IT_PRINT_TIME_VAR(c_compute_index_strides); + NPY_IT_PRINT_TIME_VAR(c_apply_forced_iteration_order); + NPY_IT_PRINT_TIME_VAR(c_find_best_axis_ordering); + NPY_IT_PRINT_TIME_VAR(c_get_priority_subtype); + NPY_IT_PRINT_TIME_VAR(c_find_output_common_dtype); + NPY_IT_PRINT_TIME_VAR(c_check_casting); + NPY_IT_PRINT_TIME_VAR(c_allocate_arrays); + NPY_IT_PRINT_TIME_VAR(c_coalesce_axes); + NPY_IT_PRINT_TIME_VAR(c_prepare_buffers); + printf("\n"); +#endif + return iter; } @@ -3242,297 +3343,200 @@ npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, char *op_itflags, npy_intp iiter, niter = NIT_NITER(iter); npy_intp ondim; - char *odataptr; - NpyIter_AxisData *axisdata0, *axisdata; + NpyIter_AxisData *axisdata; npy_intp sizeof_axisdata; - PyArrayObject **op = NIT_OPERANDS(iter); - - axisdata0 = NIT_AXISDATA(iter); - sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, niter); - - /* Process the first operand */ - if (op_axes == NULL || op_axes[0] == NULL) { - /* Default broadcasting rules if op_axes is not specified */ - axisdata = axisdata0; - ondim = (op[0] == NULL) ? 0 : PyArray_NDIM(op[0]); - odataptr = op_dataptr[0]; - /* Possible if op_axes are being used, but op_axes[0] is NULL */ - if (ondim > ndim) { - PyErr_SetString(PyExc_ValueError, - "Iterator input has more dimensions than allowed " - "by the 'op_axes' specified"); - return 0; - } - for (idim = 0; idim < ondim; ++idim) { - npy_intp shape; - - /* op[0] != NULL, because we set ondim to 0 in that case */ - shape = PyArray_DIM(op[0], ondim-idim-1); - - NAD_SHAPE(axisdata) = shape; - NAD_COORD(axisdata) = 0; - if (shape == 1) { - NAD_STRIDES(axisdata)[0] = 0; - } - else { - NAD_STRIDES(axisdata)[0] = PyArray_STRIDE(op[0], ondim-idim-1); - } - NAD_PTRS(axisdata)[0] = odataptr; - - NIT_ADVANCE_AXISDATA(axisdata, 1); - } - for (idim = ondim; idim < ndim; ++idim) { - NAD_SHAPE(axisdata) = 1; - NAD_COORD(axisdata) = 0; - NAD_STRIDES(axisdata)[0] = 0; - NAD_PTRS(axisdata)[0] = odataptr; + PyArrayObject **op = NIT_OPERANDS(iter), *op_cur; + npy_intp broadcast_shape[NPY_MAXDIMS]; - NIT_ADVANCE_AXISDATA(axisdata, 1); - } + /* First broadcast the shapes together */ + for (idim = 0; idim < ndim; ++idim) { + broadcast_shape[idim] = 1; } - else { - npy_intp *axes = op_axes[0]; - - /* Use op_axes to choose the axes */ - axisdata = axisdata0; - ondim = (op[0] == NULL) ? ndim : PyArray_NDIM(op[0]); - odataptr = op_dataptr[0]; - for (idim = 0; idim < ndim; ++idim) { - npy_intp i = axes[ndim-idim-1]; - if (i < 0) { - NAD_SHAPE(axisdata) = 1; - NAD_COORD(axisdata) = 0; - NAD_STRIDES(axisdata)[0] = 0; - NAD_PTRS(axisdata)[0] = odataptr; - } - else if (i < ondim) { - npy_intp shape; - - if (op[0] != NULL) { - shape = PyArray_DIM(op[0], i); - } - else { - shape = 1; - } + for (iiter = 0; iiter < niter; ++iiter) { + op_cur = op[iiter]; + if (op_cur != NULL) { + npy_intp *shape = PyArray_DIMS(op_cur); + ondim = PyArray_NDIM(op_cur); - NAD_SHAPE(axisdata) = shape; - NAD_COORD(axisdata) = 0; - if (shape == 1) { - NAD_STRIDES(axisdata)[0] = 0; + if (op_axes == NULL || op_axes[iiter] == NULL) { + /* + * Possible if op_axes are being used, but + * op_axes[iiter] is NULL + */ + if (ondim > ndim) { + PyErr_SetString(PyExc_ValueError, + "input operand has more dimensions than allowed " + "by the axis remapping"); + return 0; } - else { - NAD_STRIDES(axisdata)[0] = PyArray_STRIDE(op[0], i); + for (idim = 0; idim < ondim; ++idim) { + npy_intp bshape = broadcast_shape[idim+ndim-ondim], + op_shape = shape[idim]; + if (bshape == 1) { + broadcast_shape[idim+ndim-ondim] = op_shape; + } + else if (bshape != op_shape && op_shape != 1) { + goto broadcast_error; + } } - NAD_PTRS(axisdata)[0] = odataptr; } else { - PyErr_Format(PyExc_ValueError, - "Iterator input op_axes[0][%d] (==%d) is not a valid " - "axis of op[0], which has %d dimensions", - (int)(ndim-idim-1), (int)i, (int)ondim); - return 0; + npy_intp *axes = op_axes[iiter]; + for (idim = 0; idim < ndim; ++idim) { + npy_intp i = axes[idim]; + if (i >= 0) { + if (i < ondim) { + npy_intp bshape = broadcast_shape[idim], + op_shape = shape[i]; + if (bshape == 1) { + broadcast_shape[idim] = op_shape; + } + else if (bshape != op_shape && op_shape != 1) { + goto broadcast_error; + } + } + else { + PyErr_Format(PyExc_ValueError, + "Iterator input op_axes[%d][%d] (==%d) " + "is not a valid axis of op[%d], which " + "has %d dimensions ", + (int)iiter, (int)(ndim-idim-1), (int)i, + (int)iiter, (int)ondim); + return 0; + } + } + } } - - NIT_ADVANCE_AXISDATA(axisdata, 1); } } - /* - * Process the rest of the operands, using the broadcasting rules - * to combine them. - */ - for (iiter = 1; iiter < niter; ++iiter) { - if (op_axes == NULL || op_axes[iiter] == NULL) { - axisdata = axisdata0; - ondim = (op[iiter] == NULL) ? 0 : PyArray_NDIM(op[iiter]); - odataptr = op_dataptr[iiter]; - /* Possible if op_axes are being used, but op_axes[iiter] is NULL */ - if (ondim > ndim) { - PyErr_SetString(PyExc_ValueError, - "input operand has more dimensions than allowed " - "by the axis remapping"); - return 0; - } - for (idim = 0; idim < ondim; ++idim) { - npy_intp shape; - - /* op[iiter] != NULL, because we set ondim to 0 in that case */ - shape = PyArray_DIM(op[iiter], ondim-idim-1); + axisdata = NIT_AXISDATA(iter); + sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, niter); - if (shape == 1) { - NAD_STRIDES(axisdata)[iiter] = 0; - } - else { - if (NAD_SHAPE(axisdata) == 1) { - NAD_SHAPE(axisdata) = shape; - } - else if (NAD_SHAPE(axisdata) != shape) { - goto broadcast_error; - } - NAD_STRIDES(axisdata)[iiter] = PyArray_STRIDE( - op[iiter], ondim-idim-1); - } - NAD_PTRS(axisdata)[iiter] = odataptr; + /* Now process the operands, filling in the axisdata */ + for (idim = 0; idim < ndim; ++idim) { + npy_intp bshape = broadcast_shape[ndim-idim-1]; + npy_intp *strides = NAD_STRIDES(axisdata); - NIT_ADVANCE_AXISDATA(axisdata, 1); - } - for (idim = ondim; idim < ndim; ++idim) { - NAD_STRIDES(axisdata)[iiter] = 0; - NAD_PTRS(axisdata)[iiter] = odataptr; + NAD_SHAPE(axisdata) = bshape; + NAD_COORD(axisdata) = 0; + memcpy(NAD_PTRS(axisdata), op_dataptr, NPY_SIZEOF_INTP*niter); - NIT_ADVANCE_AXISDATA(axisdata, 1); - } - } - else { - npy_intp *axes = op_axes[iiter]; + for (iiter = 0; iiter < niter; ++iiter) { + op_cur = op[iiter]; - /* Use op_axes to choose the axes */ - axisdata = axisdata0; - ondim = (op[iiter] == NULL) ? ndim : PyArray_NDIM(op[iiter]); - odataptr = op_dataptr[iiter]; - for (idim = 0; idim < ndim; ++idim) { - npy_intp i = axes[ndim-idim-1]; - if (i < 0) { - NAD_STRIDES(axisdata)[iiter] = 0; - NAD_PTRS(axisdata)[iiter] = odataptr; + if (op_axes == NULL || op_axes[iiter] == NULL) { + if (op_cur == NULL) { + strides[iiter] = 0; } - else if (i < ondim) { - npy_intp shape; - - if (op[iiter] != NULL) { - shape = PyArray_DIM(op[iiter], i); - } - else { - shape = 1; - } - - if (shape == 1) { - NAD_STRIDES(axisdata)[iiter] = 0; + else { + ondim = PyArray_NDIM(op_cur); + if (bshape == 1) { + strides[iiter] = 0; + if (idim >= ondim && !output_scalars && + (op_flags[iiter]&NPY_ITER_NO_BROADCAST)) { + goto operand_different_than_broadcast; + } } - else { - if (NAD_SHAPE(axisdata) == 1) { - NAD_SHAPE(axisdata) = shape; + else if (idim >= ondim || + PyArray_DIM(op_cur, ondim-idim-1) == 1) { + strides[iiter] = 0; + if (op_flags[iiter]&NPY_ITER_NO_BROADCAST) { + goto operand_different_than_broadcast; } - else if (NAD_SHAPE(axisdata) != shape) { - goto broadcast_error; + /* If it's writeable, this means a reduction */ + if (op_itflags[iiter]&NPY_OP_ITFLAG_WRITE) { + if (!(flags&NPY_ITER_REDUCE_OK)) { + PyErr_SetString(PyExc_ValueError, + "output operand requires a reduction, but " + "reduction is not enabled"); + return 0; + } + if (!(op_itflags[iiter]&NPY_OP_ITFLAG_READ)) { + PyErr_SetString(PyExc_ValueError, + "output operand requires a reduction, but " + "is flagged as write-only, not " + "read-write"); + return 0; + } + NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE; + op_itflags[iiter] |= NPY_OP_ITFLAG_REDUCE; } - NAD_STRIDES(axisdata)[iiter] = - PyArray_STRIDE(op[iiter], i); } - NAD_PTRS(axisdata)[iiter] = odataptr; - } - else { - PyErr_Format(PyExc_ValueError, - "Iterator input op_axes[%d][%d] (==%d) is not a " - "valid axis of op[%d], which has %d dimensions ", - (int)iiter, (int)(ndim-idim-1), (int)i, - (int)iiter, (int)ondim); - return 0; + else { + strides[iiter] = PyArray_STRIDE(op_cur, ondim-idim-1); + } } - - NIT_ADVANCE_AXISDATA(axisdata, 1); } - } - } - - /* Go through and check for operands for broadcasting and reduction */ - for (iiter = 0; iiter < niter; ++iiter) { - if (op[iiter] != NULL) { - /* - * If broadcasting is disallowed for this operand, - * unless scalars are output, which means all operands are scalar - * and no broadcasting errors could occur - */ - if ((op_flags[iiter]&NPY_ITER_NO_BROADCAST) && !output_scalars) { - npy_intp *axes; - - axes = op_axes ? op_axes[iiter] : NULL; - axisdata = axisdata0; - for (idim = 0; idim < ndim; ++idim) { - npy_intp i; - if (axes) { - i = axes[ndim-idim-1]; - } - else { - i = ndim-idim-1; + else { + npy_intp *axes = op_axes[iiter]; + npy_intp i = axes[ndim-idim-1]; + if (i >= 0) { + if (bshape == 1 || op_cur == NULL) { + strides[iiter] = 0; } - if (i >= 0 && i < PyArray_NDIM(op[iiter])) { - if (PyArray_DIM(op[iiter], i) != NAD_SHAPE(axisdata)) { + else if (PyArray_DIM(op_cur, i) == 1) { + strides[iiter] = 0; + if (op_flags[iiter]&NPY_ITER_NO_BROADCAST) { goto operand_different_than_broadcast; } - } - /* - * Also disallow broadcasting by adding additional - * dimensions, unless that part of the broadcasting - * was done explicitly through op_axes. - */ - else if (!axes) { - goto operand_different_than_broadcast; - } - /* If its writeable, this may cause a reduction */ - else if ((op_itflags[iiter]&NPY_OP_ITFLAG_WRITE) && - NAD_SHAPE(axisdata) > 1) { - if (!(flags&NPY_ITER_REDUCE_OK)) { - PyErr_SetString(PyExc_ValueError, - "operand requires a reduction, but " - "reduction is not enabled"); - return 0; - } - if (!(op_itflags[iiter]&NPY_OP_ITFLAG_READ)) { - PyErr_SetString(PyExc_ValueError, - "operand requires a reduction, but " - "is flagged as write-only, not " - "read-write"); - return 0; + /* If it's writeable, this means a reduction */ + if (op_itflags[iiter]&NPY_OP_ITFLAG_WRITE) { + if (!(flags&NPY_ITER_REDUCE_OK)) { + PyErr_SetString(PyExc_ValueError, + "output operand requires a reduction, but " + "reduction is not enabled"); + return 0; + } + if (!(op_itflags[iiter]&NPY_OP_ITFLAG_READ)) { + PyErr_SetString(PyExc_ValueError, + "output operand requires a reduction, but " + "is flagged as write-only, not " + "read-write"); + return 0; + } + NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE; + op_itflags[iiter] |= NPY_OP_ITFLAG_REDUCE; } - NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE; - op_itflags[iiter] |= NPY_OP_ITFLAG_REDUCE; } - - NIT_ADVANCE_AXISDATA(axisdata, 1); + else { + strides[iiter] = PyArray_STRIDE(op_cur, i); + } } - } - /* Check whether this operand includes any reduction */ - else if (op_itflags[iiter]&NPY_OP_ITFLAG_WRITE) { - axisdata = axisdata0; - for (idim = 0; idim < ndim; ++idim) { - /* - * If the stride is 0 and the shape is bigger than - * one, that's a reduction. - */ - if (NAD_SHAPE(axisdata) > 1 && - NAD_STRIDES(axisdata)[iiter] == 0) { + else if (bshape == 1) { + strides[iiter] = 0; + } + else { + strides[iiter] = 0; + /* If it's writeable, this means a reduction */ + if (op_itflags[iiter]&NPY_OP_ITFLAG_WRITE) { if (!(flags&NPY_ITER_REDUCE_OK)) { PyErr_SetString(PyExc_ValueError, - "operand requires a reduction, but " + "output operand requires a reduction, but " "reduction is not enabled"); return 0; } if (!(op_itflags[iiter]&NPY_OP_ITFLAG_READ)) { PyErr_SetString(PyExc_ValueError, - "operand requires a reduction, but " + "output operand requires a reduction, but " "is flagged as write-only, not " "read-write"); return 0; } NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE; op_itflags[iiter] |= NPY_OP_ITFLAG_REDUCE; - break; } - - NIT_ADVANCE_AXISDATA(axisdata, 1); } } } + + NIT_ADVANCE_AXISDATA(axisdata, 1); } /* Now fill in the ITERSIZE member */ - NIT_ITERSIZE(iter) = 1; - axisdata = axisdata0; - for (idim = 0; idim < ndim; ++idim) { - NIT_ITERSIZE(iter) *= NAD_SHAPE(axisdata); - - NIT_ADVANCE_AXISDATA(axisdata, 1); + NIT_ITERSIZE(iter) = broadcast_shape[0]; + for (idim = 1; idim < ndim; ++idim) { + NIT_ITERSIZE(iter) *= broadcast_shape[idim]; } /* The range defaults to everything */ NIT_ITERSTART(iter) = 0; @@ -3740,38 +3744,62 @@ npyiter_replace_axisdata(NpyIter *iter, npy_intp iiter, */ axisdata = axisdata0; - for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { - char p; - npy_intp i, shape; + if (op_axes != NULL) { + for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { + char p; + npy_intp i, shape; - /* Apply the perm to get the original axis */ - p = perm[idim]; - if (p < 0) { - i = ndim+p; - } - else { - i = ndim-p-1; - } + /* Apply the perm to get the original axis */ + p = perm[idim]; + if (p < 0) { + i = op_axes[ndim+p]; + } + else { + i = op_axes[ndim-p-1]; + } - /* Apply op_axes */ - if (op_axes != NULL) { - i = op_axes[i]; - } - else { - i -= (ndim - op_ndim); + if ((npy_uintp)i < (npy_uintp)op_ndim) { + shape = PyArray_DIM(op, i); + if (shape != 1) { + npy_intp stride = PyArray_STRIDE(op, i); + if (p < 0) { + /* If the perm entry is negative, flip the axis */ + NAD_STRIDES(axisdata)[iiter] = -stride; + baseoffset += stride*(shape-1); + } + else { + NAD_STRIDES(axisdata)[iiter] = stride; + } + } + } } + } + else { + for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { + char p; + npy_intp i, shape; - if (i >= 0 && i < op_ndim) { - shape = PyArray_DIM(op, i); - if (shape != 1) { - npy_intp stride = PyArray_STRIDE(op, i); - if (p < 0) { - /* If the perm entry is negative, flip the axis */ - NAD_STRIDES(axisdata)[iiter] = -stride; - baseoffset += stride*(shape-1); - } - else { - NAD_STRIDES(axisdata)[iiter] = stride; + /* Apply the perm to get the original axis */ + p = perm[idim]; + if (p < 0) { + i = op_ndim+p; + } + else { + i = op_ndim-p-1; + } + + if (i >= 0) { + shape = PyArray_DIM(op, i); + if (shape != 1) { + npy_intp stride = PyArray_STRIDE(op, i); + if (p < 0) { + /* If the perm entry is negative, flip the axis */ + NAD_STRIDES(axisdata)[iiter] = -stride; + baseoffset += stride*(shape-1); + } + else { + NAD_STRIDES(axisdata)[iiter] = stride; + } } } } @@ -4266,119 +4294,149 @@ npyiter_new_temp_array(NpyIter *iter, PyTypeObject *subtype, npy_intp new_shape[NPY_MAXDIMS], strides[NPY_MAXDIMS], stride = op_dtype->elsize; char reversestride[NPY_MAXDIMS], anyreverse = 0; - NpyIter_AxisData *axisdata = NIT_AXISDATA(iter); - npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, niter); - npy_intp tmp_op_axes = -1; - npy_intp i, array_i[NPY_MAXDIMS]; + NpyIter_AxisData *axisdata; + npy_intp sizeof_axisdata; + npy_intp i; PyArrayObject *ret; - /* - * If it's an automatically allocated output, start by assuming - * the shape will have the same length as the iterator - */ - if (shape == NULL) { - /* - * If it's a scalar output, trigger scalar allocation below - * by making an op_axes of -1 - */ - if (op_ndim == 0 && ndim == 1 && op_axes == NULL) { - op_axes = &tmp_op_axes; + /* If it's a scalar, don't need to check the axes */ + if (op_ndim == 0) { + Py_INCREF(op_dtype); + ret = (PyArrayObject *)PyArray_NewFromDescr(subtype, op_dtype, 0, + NULL, NULL, NULL, 0, NULL); + + /* Double-check that the subtype didn't mess with the dimensions */ + if (PyArray_NDIM(ret) != 0) { + PyErr_SetString(PyExc_RuntimeError, + "Iterator automatic output has an array subtype " + "which changed the dimensions of the output"); + Py_DECREF(ret); + return NULL; } - op_ndim = ndim; - } - /* Initially no strides have been set */ - for (idim = 0; idim < op_ndim; ++idim) { - strides[idim] = NPY_MAX_INTP; - reversestride[idim] = 0; + return ret; } - for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { - char p; - - /* Apply the perm to get the original axis */ - p = perm[idim]; - if (p < 0) { - i = ndim+p; - } - else { - i = ndim-p-1; - } + axisdata = NIT_AXISDATA(iter); + sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, niter); - /* Apply op_axes */ - if (op_axes != NULL) { - i = op_axes[i]; - } - else { - i -= (ndim - op_ndim); - } + memset(reversestride, 0, NPY_MAXDIMS); + /* Initialize the strides to invalid values */ + for (i = 0; i < NPY_MAXDIMS; ++i) { + strides[i] = NPY_MAX_INTP; + } - array_i[idim] = i; + if (op_axes != NULL) { + for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { + char p; - if (i >= 0) { - NPY_IT_DBG_PRINTF("Iterator: Setting allocated stride %d " - "for iterator dimension %d to %d\n", (int)i, - (int)idim, (int)stride); - strides[i] = stride; + /* Apply the perm to get the original axis */ + p = perm[idim]; if (p < 0) { - reversestride[i] = 1; - anyreverse = 1; - } - if (shape == NULL) { - new_shape[i] = NAD_SHAPE(axisdata); - stride *= new_shape[i]; + i = op_axes[ndim+p]; } else { - stride *= shape[i]; + i = op_axes[ndim-p-1]; } - } - } - /* - * If custom axes were specified, some dimensions may not have been used. - * Add the REDUCE itflag if this creates a reduction situation. - */ - if (shape == NULL) { - axisdata = NIT_AXISDATA(iter); - for (idim = 0; idim < op_ndim; ++idim) { - i = array_i[idim]; - NPY_IT_DBG_PRINTF("Iterator: Checking output " - "dimension %d (iterator dim %d) with stride %d\n", - (int)i, (int)idim, (int)strides[idim]); - if (i < 0) { - NPY_IT_DBG_PRINTF("Iterator: The axis wasn't used, " - "and its dimension is %d\n", - (int)NAD_SHAPE(axisdata)); - /* - * If deleting this axis produces a reduction, but - * reduction wasn't enabled, throw an error - */ - if (NAD_SHAPE(axisdata) != 1) { - if (!(flags&NPY_ITER_REDUCE_OK)) { - PyErr_SetString(PyExc_ValueError, - "output requires a reduction, but " - "reduction is not enabled"); - return NULL; - } - if (!((*op_itflags)&NPY_OP_ITFLAG_READ)) { + if (i >= 0) { + NPY_IT_DBG_PRINTF("Iterator: Setting allocated stride %d " + "for iterator dimension %d to %d\n", (int)i, + (int)idim, (int)stride); + strides[i] = stride; + if (p < 0) { + reversestride[i] = 1; + anyreverse = 1; + } + else { + reversestride[i] = 0; + } + if (shape == NULL) { + new_shape[i] = NAD_SHAPE(axisdata); + stride *= new_shape[i]; + if (i >= ndim) { PyErr_SetString(PyExc_ValueError, - "output requires a reduction, but " - "is flagged as write-only, not read-write"); + "automatically allocated output array " + "specified with an inconsistent axis mapping"); return NULL; } + } + else { + stride *= shape[i]; + } + } + else { + if (shape == NULL) { + /* + * If deleting this axis produces a reduction, but + * reduction wasn't enabled, throw an error + */ + if (NAD_SHAPE(axisdata) != 1) { + if (!(flags&NPY_ITER_REDUCE_OK)) { + PyErr_SetString(PyExc_ValueError, + "output requires a reduction, but " + "reduction is not enabled"); + return NULL; + } + if (!((*op_itflags)&NPY_OP_ITFLAG_READ)) { + PyErr_SetString(PyExc_ValueError, + "output requires a reduction, but " + "is flagged as write-only, not read-write"); + return NULL; + } - NPY_IT_DBG_PRINTF("Iterator: Indicating that a reduction " - "is occurring\n"); - /* Indicate that a reduction is occurring */ - NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE; - (*op_itflags) |= NPY_OP_ITFLAG_REDUCE; + NPY_IT_DBG_PRINTF("Iterator: Indicating that a reduction " + "is occurring\n"); + /* Indicate that a reduction is occurring */ + NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE; + (*op_itflags) |= NPY_OP_ITFLAG_REDUCE; + } } } + } + } + else { + for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { + char p; - NIT_ADVANCE_AXISDATA(axisdata, 1); + /* Apply the perm to get the original axis */ + p = perm[idim]; + if (p < 0) { + i = op_ndim+p; + } + else { + i = op_ndim-p-1; + } + + if (i >= 0) { + NPY_IT_DBG_PRINTF("Iterator: Setting allocated stride %d " + "for iterator dimension %d to %d\n", (int)i, + (int)idim, (int)stride); + strides[i] = stride; + if (p < 0) { + reversestride[i] = 1; + anyreverse = 1; + } + else { + reversestride[i] = 0; + } + if (shape == NULL) { + new_shape[i] = NAD_SHAPE(axisdata); + stride *= new_shape[i]; + } + else { + stride *= shape[i]; + } + } } + } + /* + * If custom axes were specified, some dimensions may not have been used. + * Add the REDUCE itflag if this creates a reduction situation. + */ + if (shape == NULL) { /* Ensure there are no dimension gaps in op_axes, and find op_ndim */ op_ndim = ndim; if (op_axes != NULL) { @@ -4395,7 +4453,7 @@ npyiter_new_temp_array(NpyIter *iter, PyTypeObject *subtype, */ else if (op_ndim != ndim) { PyErr_SetString(PyExc_ValueError, - "automatically allocated reduction output array " + "automatically allocated output array " "specified with an inconsistent axis mapping"); return NULL; } @@ -4682,49 +4740,54 @@ npyiter_allocate_arrays(NpyIter *iter, * the inner stride of this operand works for the whole * array, we can set NPY_OP_ITFLAG_BUFNEVER. */ - if (PyArray_NDIM(op[iiter]) > 0 && (itflags&NPY_ITFLAG_BUFFER) && - !(op_itflags[iiter]&NPY_OP_ITFLAG_CAST)) { - npy_intp stride, shape, innerstride = 0, innershape; + if ((itflags&NPY_ITFLAG_BUFFER) && !(op_itflags[iiter]&NPY_OP_ITFLAG_CAST)) { NpyIter_AxisData *axisdata = NIT_AXISDATA(iter); - npy_intp sizeof_axisdata = - NIT_AXISDATA_SIZEOF(itflags, ndim, niter); - /* Find stride of the first non-empty shape */ - for (idim = 0; idim < ndim; ++idim) { - innershape = NAD_SHAPE(axisdata); - if (innershape != 1) { - innerstride = NAD_STRIDES(axisdata)[iiter]; - break; - } - NIT_ADVANCE_AXISDATA(axisdata, 1); + if (ndim == 1) { + op_itflags[iiter] |= NPY_OP_ITFLAG_BUFNEVER; + NBF_STRIDES(bufferdata)[iiter] = NAD_STRIDES(axisdata)[iiter]; } - ++idim; - NIT_ADVANCE_AXISDATA(axisdata, 1); - /* Check that everything could have coalesced together */ - for (; idim < ndim; ++idim) { - stride = NAD_STRIDES(axisdata)[iiter]; - shape = NAD_SHAPE(axisdata); - if (shape != 1) { - /* - * If N times the inner stride doesn't equal this - * stride, the multi-dimensionality is needed. - */ - if (innerstride*innershape != stride) { + else if (PyArray_NDIM(op[iiter]) > 0) { + npy_intp stride, shape, innerstride = 0, innershape; + npy_intp sizeof_axisdata = + NIT_AXISDATA_SIZEOF(itflags, ndim, niter); + /* Find stride of the first non-empty shape */ + for (idim = 0; idim < ndim; ++idim) { + innershape = NAD_SHAPE(axisdata); + if (innershape != 1) { + innerstride = NAD_STRIDES(axisdata)[iiter]; break; } - else { - innershape *= shape; - } + NIT_ADVANCE_AXISDATA(axisdata, 1); } + ++idim; NIT_ADVANCE_AXISDATA(axisdata, 1); - } - /* - * If we looped all the way to the end, one stride works. - * Set that stride, because it may not belong to the first - * dimension. - */ - if (idim == ndim) { - op_itflags[iiter] |= NPY_OP_ITFLAG_BUFNEVER; - NBF_STRIDES(bufferdata)[iiter] = innerstride; + /* Check that everything could have coalesced together */ + for (; idim < ndim; ++idim) { + stride = NAD_STRIDES(axisdata)[iiter]; + shape = NAD_SHAPE(axisdata); + if (shape != 1) { + /* + * If N times the inner stride doesn't equal this + * stride, the multi-dimensionality is needed. + */ + if (innerstride*innershape != stride) { + break; + } + else { + innershape *= shape; + } + } + NIT_ADVANCE_AXISDATA(axisdata, 1); + } + /* + * If we looped all the way to the end, one stride works. + * Set that stride, because it may not belong to the first + * dimension. + */ + if (idim == ndim) { + op_itflags[iiter] |= NPY_OP_ITFLAG_BUFNEVER; + NBF_STRIDES(bufferdata)[iiter] = innerstride; + } } } } @@ -4771,6 +4834,7 @@ npyiter_get_common_dtype(npy_intp niter, PyArrayObject **op, npy_intp narrs = 0, ndtypes = 0; PyArrayObject *arrs[NPY_MAXARGS]; PyArray_Descr *dtypes[NPY_MAXARGS]; + PyArray_Descr *ret; NPY_IT_DBG_PRINTF("Iterator: Getting a common data type from operands\n"); @@ -4790,7 +4854,30 @@ npyiter_get_common_dtype(npy_intp niter, PyArrayObject **op, } } - return PyArray_ResultType(narrs, arrs, ndtypes, dtypes); + if (narrs == 0) { + npy_intp i; + ret = dtypes[0]; + for (i = 1; i < ndtypes; ++i) { + if (ret != dtypes[i]) + break; + } + if (i == ndtypes) { + if (ndtypes == 1 || PyArray_ISNBO(ret->byteorder)) { + Py_INCREF(ret); + } + else { + ret = PyArray_DescrNewByteorder(ret, NPY_NATIVE); + } + } + else { + ret = PyArray_ResultType(narrs, arrs, ndtypes, dtypes); + } + } + else { + ret = PyArray_ResultType(narrs, arrs, ndtypes, dtypes); + } + + return ret; } static int |