ENH: iter: Add timing code, rewrite some sections to be faster/more clear

author: Mark Wiebe <mwwiebe@gmail.com> 2011-01-31 19:40:25 -0800
committer: Mark Wiebe <mwwiebe@gmail.com> 2011-02-03 14:48:23 -0800
commit: 85993f895e7676f477f80e940a2ee925b3a0c19c (patch)
tree: 7fe30fe031de99838eb9086e3cf4711546cf1fd3
parent: fcc6cc73ddcb1fc85446ba9256ac24ecdda6c6d8 (diff)
download: numpy-85993f895e7676f477f80e940a2ee925b3a0c19c.tar.gz
1 files changed, 488 insertions, 401 deletions
diff --git a/numpy/core/src/multiarray/new_iterator.c.src b/numpy/core/src/multiarray/new_iterator.c.src
index 49d8ad8b5..caf8afb5a 100644
--- a/numpy/core/src/multiarray/new_iterator.c.src
+++ b/numpy/core/src/multiarray/new_iterator.c.src
@@ -18,6 +18,32 @@
 
 #include "lowlevel_strided_loops.h"
 
+/********** ITERATOR CONSTRUCTION TIMING **************/
+#define NPY_IT_CONSTRUCTION_TIMING 0
+
+#if NPY_IT_CONSTRUCTION_TIMING
+#define NPY_IT_TIME_POINT(var) { \
+            unsigned int hi, lo; \
+            __asm__ __volatile__ ( \
+                "rdtsc" \
+                : "=d" (hi), "=a" (lo)); \
+            var = (((unsigned long long)hi) << 32) | lo; \
+        }
+#define NPY_IT_PRINT_TIME_START(var) { \
+            printf("%30s: start\n", #var); \
+            c_temp = var; \
+        }
+#define NPY_IT_PRINT_TIME_VAR(var) { \
+            printf("%30s: %6.0f clocks\n", #var, \
+                    ((double)(var-c_temp))); \
+            c_temp = var; \
+        }
+#else
+#define NPY_IT_TIME_POINT(var)
+#endif
+
+/******************************************************/
+
 /********** PRINTF DEBUG TRACING **************/
 #define NPY_IT_DBG_TRACING 0
 
@@ -371,6 +397,28 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags,
     double subtype_priority = NPY_PRIORITY;
     PyTypeObject *subtype = &PyArray_Type;
 
+#if NPY_IT_CONSTRUCTION_TIMING
+    npy_intp c_temp,
+            c_start,
+            c_check_op_axes,
+            c_check_global_flags,
+            c_calculate_ndim,
+            c_malloc,
+            c_prepare_operands,
+            c_fill_axisdata,
+            c_compute_index_strides,
+            c_apply_forced_iteration_order,
+            c_find_best_axis_ordering,
+            c_get_priority_subtype,
+            c_find_output_common_dtype,
+            c_check_casting,
+            c_allocate_arrays,
+            c_coalesce_axes,
+            c_prepare_buffers;
+#endif
+
+    NPY_IT_TIME_POINT(c_start);
+
     if (niter > NPY_MAXARGS) {
         PyErr_Format(PyExc_ValueError,
             "Cannot construct an iterator with more than %d operands "
@@ -383,11 +431,15 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags,
         return NULL;
     }
 
+    NPY_IT_TIME_POINT(c_check_op_axes);
+
     /* Check the global iterator flags */
     if (!npyiter_check_global_flags(flags, &itflags)) {
         return NULL;
     }
 
+    NPY_IT_TIME_POINT(c_check_global_flags);
+
     /* Calculate how many dimensions the iterator should have */
     ndim = npyiter_calculate_ndim(niter, op_in, oa_ndim);
 
@@ -397,10 +449,14 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags,
         ndim = 1;
     }
 
+    NPY_IT_TIME_POINT(c_calculate_ndim);
+
     /* Allocate memory for the iterator */
     iter = (NpyIter*)
                 PyArray_malloc(NIT_SIZEOF_ITERATOR(itflags, ndim, niter));
 
+    NPY_IT_TIME_POINT(c_malloc);
+
     /* Fill in the basic data */
     NIT_ITFLAGS(iter) = itflags;
     NIT_NDIM(iter) = ndim;
@@ -424,6 +480,8 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags,
     /* Set resetindex to zero as well (it's just after the resetdataptr) */
     op_dataptr[niter] = 0;
 
+    NPY_IT_TIME_POINT(c_prepare_operands);
+
     /*
      * Initialize buffer data (must set the buffers and transferdata
      * to NULL before we might deallocate the iterator).
@@ -443,6 +501,8 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags,
         return NULL;
     }
 
+    NPY_IT_TIME_POINT(c_fill_axisdata);
+
     if (itflags&NPY_ITFLAG_BUFFER) {
         /*
          * If buffering is enabled and no buffersize was given, use a default
@@ -466,6 +526,8 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags,
      */
     npyiter_compute_index_strides(iter, flags);
 
+    NPY_IT_TIME_POINT(c_compute_index_strides);
+
     /* Initialize the perm to the identity */
     perm = NIT_PERM(iter);
     for(idim = 0; idim < ndim; ++idim) {
@@ -478,6 +540,8 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags,
     npyiter_apply_forced_iteration_order(iter, order);
     itflags = NIT_ITFLAGS(iter);
 
+    NPY_IT_TIME_POINT(c_apply_forced_iteration_order);
+
     /* Set some flags for allocated outputs */
     for (iiter = 0; iiter < niter; ++iiter) {
         if (op[iiter] == NULL) {
@@ -515,11 +579,15 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags,
         itflags = NIT_ITFLAGS(iter);
     }
 
+    NPY_IT_TIME_POINT(c_find_best_axis_ordering);
+
     if (need_subtype) {
         npyiter_get_priority_subtype(niter, op, op_itflags,
                                      &subtype_priority, &subtype);
     }
 
+    NPY_IT_TIME_POINT(c_get_priority_subtype);
+
     /*
      * If an automatically allocated output didn't have a specified
      * dtype, we need to figure it out now, before allocating the outputs.
@@ -544,9 +612,11 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags,
             NPY_IT_DBG_PRINTF("Iterator: Replacing all data types\n");
             /* Replace all the data types */
             for (iiter = 0; iiter < niter; ++iiter) {
-                Py_XDECREF(op_dtype[iiter]);
-                Py_INCREF(dtype);
-                op_dtype[iiter] = dtype;
+                if (op_dtype[iiter] != dtype) {
+                    Py_XDECREF(op_dtype[iiter]);
+                    Py_INCREF(dtype);
+                    op_dtype[iiter] = dtype;
+                }
             }
         }
         else {
@@ -562,6 +632,8 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags,
         Py_DECREF(dtype);
     }
 
+    NPY_IT_TIME_POINT(c_find_output_common_dtype);
+
     /*
      * All of the data types have been settled, so it's time
      * to check that data type conversions are following the
@@ -572,6 +644,8 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags,
         return NULL;
     }
 
+    NPY_IT_TIME_POINT(c_check_casting);
+
     /*
      * At this point, the iteration order has been finalized. so
      * any allocation of ops that were NULL, or any temporary
@@ -584,6 +658,8 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags,
         return NULL;
     }
 
+    NPY_IT_TIME_POINT(c_allocate_arrays);
+
     /*
      * Finally, if coords weren't requested,
      * it may be possible to coalesce some axes together.
@@ -602,6 +678,8 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags,
         op_dataptr = NIT_RESETDATAPTR(iter);
     }
 
+    NPY_IT_TIME_POINT(c_coalesce_axes);
+
     /*
      * Now that the axes are finished, check whether we can apply
      * the single iteration optimization to the iternext function.
@@ -657,6 +735,29 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags,
         }
     }
 
+    NPY_IT_TIME_POINT(c_prepare_buffers);
+
+#if NPY_IT_CONSTRUCTION_TIMING
+    printf("\nIterator construction timing:\n");
+    NPY_IT_PRINT_TIME_START(c_start);
+    NPY_IT_PRINT_TIME_VAR(c_check_op_axes);
+    NPY_IT_PRINT_TIME_VAR(c_check_global_flags);
+    NPY_IT_PRINT_TIME_VAR(c_calculate_ndim);
+    NPY_IT_PRINT_TIME_VAR(c_malloc);
+    NPY_IT_PRINT_TIME_VAR(c_prepare_operands);
+    NPY_IT_PRINT_TIME_VAR(c_fill_axisdata);
+    NPY_IT_PRINT_TIME_VAR(c_compute_index_strides);
+    NPY_IT_PRINT_TIME_VAR(c_apply_forced_iteration_order);
+    NPY_IT_PRINT_TIME_VAR(c_find_best_axis_ordering);
+    NPY_IT_PRINT_TIME_VAR(c_get_priority_subtype);
+    NPY_IT_PRINT_TIME_VAR(c_find_output_common_dtype);
+    NPY_IT_PRINT_TIME_VAR(c_check_casting);
+    NPY_IT_PRINT_TIME_VAR(c_allocate_arrays);
+    NPY_IT_PRINT_TIME_VAR(c_coalesce_axes);
+    NPY_IT_PRINT_TIME_VAR(c_prepare_buffers);
+    printf("\n");
+#endif
+
     return iter;
 }
 
@@ -3242,297 +3343,200 @@ npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, char *op_itflags,
     npy_intp iiter, niter = NIT_NITER(iter);
 
     npy_intp ondim;
-    char *odataptr;
-    NpyIter_AxisData *axisdata0, *axisdata;
+    NpyIter_AxisData *axisdata;
     npy_intp sizeof_axisdata;
-    PyArrayObject **op = NIT_OPERANDS(iter);
-
-    axisdata0 = NIT_AXISDATA(iter);
-    sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, niter);
-
-    /* Process the first operand */
-    if (op_axes == NULL || op_axes[0] == NULL) {
-        /* Default broadcasting rules if op_axes is not specified */
-        axisdata = axisdata0;
-        ondim = (op[0] == NULL) ? 0 : PyArray_NDIM(op[0]);
-        odataptr = op_dataptr[0];
-        /* Possible if op_axes are being used, but op_axes[0] is NULL */
-        if (ondim > ndim) {
-            PyErr_SetString(PyExc_ValueError,
-                    "Iterator input has more dimensions than allowed "
-                    "by the 'op_axes' specified");
-            return 0;
-        }
-        for (idim = 0; idim < ondim; ++idim) {
-            npy_intp shape;
-
-            /* op[0] != NULL, because we set ondim to 0 in that case */
-            shape = PyArray_DIM(op[0], ondim-idim-1);
-
-            NAD_SHAPE(axisdata) = shape;
-            NAD_COORD(axisdata) = 0;
-            if (shape == 1) {
-                NAD_STRIDES(axisdata)[0] = 0;
-            }
-            else {
-                NAD_STRIDES(axisdata)[0] = PyArray_STRIDE(op[0], ondim-idim-1);
-            }
-            NAD_PTRS(axisdata)[0] = odataptr;
-
-            NIT_ADVANCE_AXISDATA(axisdata, 1);
-        }
-        for (idim = ondim; idim < ndim; ++idim) {
-            NAD_SHAPE(axisdata) = 1;
-            NAD_COORD(axisdata) = 0;
-            NAD_STRIDES(axisdata)[0] = 0;
-            NAD_PTRS(axisdata)[0] = odataptr;
+    PyArrayObject **op = NIT_OPERANDS(iter), *op_cur;
+    npy_intp broadcast_shape[NPY_MAXDIMS];
 
-            NIT_ADVANCE_AXISDATA(axisdata, 1);
-        }
+    /* First broadcast the shapes together */
+    for (idim = 0; idim < ndim; ++idim) {
+        broadcast_shape[idim] = 1;
     }
-    else {
-        npy_intp *axes = op_axes[0];
-
-        /* Use op_axes to choose the axes */
-        axisdata = axisdata0;
-        ondim = (op[0] == NULL) ? ndim : PyArray_NDIM(op[0]);
-        odataptr = op_dataptr[0];
-        for (idim = 0; idim < ndim; ++idim) {
-            npy_intp i = axes[ndim-idim-1];
-            if (i < 0) {
-                NAD_SHAPE(axisdata) = 1;
-                NAD_COORD(axisdata) = 0;
-                NAD_STRIDES(axisdata)[0] = 0;
-                NAD_PTRS(axisdata)[0] = odataptr;
-            }
-            else if (i < ondim) {
-                npy_intp shape;
-
-                if (op[0] != NULL) {
-                    shape = PyArray_DIM(op[0], i);
-                }
-                else {
-                    shape = 1;
-                }
+    for (iiter = 0; iiter < niter; ++iiter) {
+        op_cur = op[iiter];
+        if (op_cur != NULL) {
+            npy_intp *shape = PyArray_DIMS(op_cur);
+            ondim = PyArray_NDIM(op_cur);
 
-                NAD_SHAPE(axisdata) = shape;
-                NAD_COORD(axisdata) = 0;
-                if (shape == 1) {
-                    NAD_STRIDES(axisdata)[0] = 0;
+            if (op_axes == NULL || op_axes[iiter] == NULL) {
+                /*
+                 * Possible if op_axes are being used, but
+                 * op_axes[iiter] is NULL
+                 */
+                if (ondim > ndim) {
+                    PyErr_SetString(PyExc_ValueError,
+                            "input operand has more dimensions than allowed "
+                            "by the axis remapping");
+                    return 0;
                 }
-                else {
-                    NAD_STRIDES(axisdata)[0] = PyArray_STRIDE(op[0], i);
+                for (idim = 0; idim < ondim; ++idim) {
+                    npy_intp bshape = broadcast_shape[idim+ndim-ondim],
+                                      op_shape = shape[idim];
+                    if (bshape == 1) {
+                        broadcast_shape[idim+ndim-ondim] = op_shape;
+                    }
+                    else if (bshape != op_shape && op_shape != 1) {
+                        goto broadcast_error;
+                    }
                 }
-                NAD_PTRS(axisdata)[0] = odataptr;
             }
             else {
-                PyErr_Format(PyExc_ValueError,
-                        "Iterator input op_axes[0][%d] (==%d) is not a valid "
-                        "axis of op[0], which has %d dimensions",
-                        (int)(ndim-idim-1), (int)i, (int)ondim);
-                return 0;
+                npy_intp *axes = op_axes[iiter];
+                for (idim = 0; idim < ndim; ++idim) {
+                    npy_intp i = axes[idim];
+                    if (i >= 0) {
+                        if (i < ondim) {
+                            npy_intp bshape = broadcast_shape[idim],
+                                              op_shape = shape[i];
+                            if (bshape == 1) {
+                                broadcast_shape[idim] = op_shape;
+                            }
+                            else if (bshape != op_shape && op_shape != 1) {
+                                goto broadcast_error;
+                            }
+                        }
+                        else {
+                            PyErr_Format(PyExc_ValueError,
+                                    "Iterator input op_axes[%d][%d] (==%d) "
+                                    "is not a valid axis of op[%d], which "
+                                    "has %d dimensions ",
+                                    (int)iiter, (int)(ndim-idim-1), (int)i,
+                                    (int)iiter, (int)ondim);
+                            return 0;
+                        }
+                    }
+                }
             }
-
-            NIT_ADVANCE_AXISDATA(axisdata, 1);
         }
     }
 
-    /*
-     * Process the rest of the operands, using the broadcasting rules
-     * to combine them.
-     */
-    for (iiter = 1; iiter < niter; ++iiter) {
-        if (op_axes == NULL || op_axes[iiter] == NULL) {
-            axisdata = axisdata0;
-            ondim = (op[iiter] == NULL) ? 0 : PyArray_NDIM(op[iiter]);
-            odataptr = op_dataptr[iiter];
-            /* Possible if op_axes are being used, but op_axes[iiter] is NULL */
-            if (ondim > ndim) {
-                PyErr_SetString(PyExc_ValueError,
-                        "input operand has more dimensions than allowed "
-                        "by the axis remapping");
-                return 0;
-            }
-            for (idim = 0; idim < ondim; ++idim) {
-                npy_intp shape;
-
-                /* op[iiter] != NULL, because we set ondim to 0 in that case */
-                shape = PyArray_DIM(op[iiter], ondim-idim-1);
+    axisdata = NIT_AXISDATA(iter);
+    sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, niter);
 
-                if (shape == 1) {
-                    NAD_STRIDES(axisdata)[iiter] = 0;
-                }
-                else {
-                    if (NAD_SHAPE(axisdata) == 1) {
-                        NAD_SHAPE(axisdata) = shape;
-                    }
-                    else if (NAD_SHAPE(axisdata) != shape) {
-                        goto broadcast_error;
-                    }
-                    NAD_STRIDES(axisdata)[iiter] = PyArray_STRIDE(
-                                                      op[iiter], ondim-idim-1);
-                }
-                NAD_PTRS(axisdata)[iiter] = odataptr;
+    /* Now process the operands, filling in the axisdata */
+    for (idim = 0; idim < ndim; ++idim) {
+        npy_intp bshape = broadcast_shape[ndim-idim-1];
+        npy_intp *strides = NAD_STRIDES(axisdata);
 
-                NIT_ADVANCE_AXISDATA(axisdata, 1);
-            }
-            for (idim = ondim; idim < ndim; ++idim) {
-                NAD_STRIDES(axisdata)[iiter] = 0;
-                NAD_PTRS(axisdata)[iiter] = odataptr;
+        NAD_SHAPE(axisdata) = bshape;
+        NAD_COORD(axisdata) = 0;
+        memcpy(NAD_PTRS(axisdata), op_dataptr, NPY_SIZEOF_INTP*niter);
 
-                NIT_ADVANCE_AXISDATA(axisdata, 1);
-            }
-        }
-        else {
-            npy_intp *axes = op_axes[iiter];
+        for (iiter = 0; iiter < niter; ++iiter) {
+            op_cur = op[iiter];
 
-            /* Use op_axes to choose the axes */
-            axisdata = axisdata0;
-            ondim = (op[iiter] == NULL) ? ndim : PyArray_NDIM(op[iiter]);
-            odataptr = op_dataptr[iiter];
-            for (idim = 0; idim < ndim; ++idim) {
-                npy_intp i = axes[ndim-idim-1];
-                if (i < 0) {
-                    NAD_STRIDES(axisdata)[iiter] = 0;
-                    NAD_PTRS(axisdata)[iiter] = odataptr;
+            if (op_axes == NULL || op_axes[iiter] == NULL) {
+                if (op_cur == NULL) {
+                    strides[iiter] = 0;
                 }
-                else if (i < ondim) {
-                    npy_intp shape;
-
-                    if (op[iiter] != NULL) {
-                        shape = PyArray_DIM(op[iiter], i);
-                    }
-                    else {
-                        shape = 1;
-                    }
-
-                    if (shape == 1) {
-                        NAD_STRIDES(axisdata)[iiter] = 0;
+                else {
+                    ondim = PyArray_NDIM(op_cur);
+                    if (bshape == 1) {
+                        strides[iiter] = 0;
+                        if (idim >= ondim && !output_scalars &&
+                                        (op_flags[iiter]&NPY_ITER_NO_BROADCAST)) {
+                            goto operand_different_than_broadcast;
+                        }
                     }
-                    else {
-                        if (NAD_SHAPE(axisdata) == 1) {
-                            NAD_SHAPE(axisdata) = shape;
+                    else if (idim >= ondim ||
+                                        PyArray_DIM(op_cur, ondim-idim-1) == 1) {
+                        strides[iiter] = 0;
+                        if (op_flags[iiter]&NPY_ITER_NO_BROADCAST) {
+                            goto operand_different_than_broadcast;
                         }
-                        else if (NAD_SHAPE(axisdata) != shape) {
-                            goto broadcast_error;
+                        /* If it's writeable, this means a reduction */
+                        if (op_itflags[iiter]&NPY_OP_ITFLAG_WRITE) {
+                            if (!(flags&NPY_ITER_REDUCE_OK)) {
+                                PyErr_SetString(PyExc_ValueError,
+                                        "output operand requires a reduction, but "
+                                        "reduction is not enabled");
+                                return 0;
+                            }
+                            if (!(op_itflags[iiter]&NPY_OP_ITFLAG_READ)) {
+                                PyErr_SetString(PyExc_ValueError,
+                                        "output operand requires a reduction, but "
+                                        "is flagged as write-only, not "
+                                        "read-write");
+                                return 0;
+                            }
+                            NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE;
+                            op_itflags[iiter] |= NPY_OP_ITFLAG_REDUCE;
                         }
-                        NAD_STRIDES(axisdata)[iiter] =
-                                                PyArray_STRIDE(op[iiter], i);
                     }
-                    NAD_PTRS(axisdata)[iiter] = odataptr;
-                }
-                else {
-                    PyErr_Format(PyExc_ValueError,
-                            "Iterator input op_axes[%d][%d] (==%d) is not a "
-                            "valid axis of op[%d], which has %d dimensions ",
-                            (int)iiter, (int)(ndim-idim-1), (int)i,
-                            (int)iiter, (int)ondim);
-                    return 0;
+                    else {
+                        strides[iiter] = PyArray_STRIDE(op_cur, ondim-idim-1);
+                    }
                 }
-
-                NIT_ADVANCE_AXISDATA(axisdata, 1);
             }
-        }
-    }
-
-    /* Go through and check for operands for broadcasting and reduction */
-    for (iiter = 0; iiter < niter; ++iiter) {
-        if (op[iiter] != NULL) {
-            /*
-             * If broadcasting is disallowed for this operand,
-             * unless scalars are output, which means all operands are scalar
-             * and no broadcasting errors could occur
-             */
-            if ((op_flags[iiter]&NPY_ITER_NO_BROADCAST) && !output_scalars) {
-                npy_intp *axes;
-
-                axes = op_axes ? op_axes[iiter] : NULL;
-                axisdata = axisdata0;
-                for (idim = 0; idim < ndim; ++idim) {
-                    npy_intp i;
-                    if (axes) {
-                        i = axes[ndim-idim-1];
-                    }
-                    else {
-                        i = ndim-idim-1;
+            else {
+                npy_intp *axes = op_axes[iiter];
+                npy_intp i = axes[ndim-idim-1];
+                if (i >= 0) {
+                    if (bshape == 1 || op_cur == NULL) {
+                        strides[iiter] = 0;
                     }
-                    if (i >= 0 && i < PyArray_NDIM(op[iiter])) {
-                        if (PyArray_DIM(op[iiter], i) != NAD_SHAPE(axisdata)) {
+                    else if (PyArray_DIM(op_cur, i) == 1) {
+                        strides[iiter] = 0;
+                        if (op_flags[iiter]&NPY_ITER_NO_BROADCAST) {
                             goto operand_different_than_broadcast;
                         }
-                    }
-                    /*
-                     * Also disallow broadcasting by adding additional
-                     * dimensions, unless that part of the broadcasting
-                     * was done explicitly through op_axes.
-                     */
-                    else if (!axes) {
-                        goto operand_different_than_broadcast;
-                    }
-                    /* If its writeable, this may cause a reduction */
-                    else if ((op_itflags[iiter]&NPY_OP_ITFLAG_WRITE) &&
-                                                    NAD_SHAPE(axisdata) > 1) {
-                        if (!(flags&NPY_ITER_REDUCE_OK)) {
-                            PyErr_SetString(PyExc_ValueError,
-                                    "operand requires a reduction, but "
-                                    "reduction is not enabled");
-                            return 0;
-                        }
-                        if (!(op_itflags[iiter]&NPY_OP_ITFLAG_READ)) {
-                            PyErr_SetString(PyExc_ValueError,
-                                    "operand requires a reduction, but "
-                                    "is flagged as write-only, not "
-                                    "read-write");
-                            return 0;
+                        /* If it's writeable, this means a reduction */
+                        if (op_itflags[iiter]&NPY_OP_ITFLAG_WRITE) {
+                            if (!(flags&NPY_ITER_REDUCE_OK)) {
+                                PyErr_SetString(PyExc_ValueError,
+                                        "output operand requires a reduction, but "
+                                        "reduction is not enabled");
+                                return 0;
+                            }
+                            if (!(op_itflags[iiter]&NPY_OP_ITFLAG_READ)) {
+                                PyErr_SetString(PyExc_ValueError,
+                                        "output operand requires a reduction, but "
+                                        "is flagged as write-only, not "
+                                        "read-write");
+                                return 0;
+                            }
+                            NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE;
+                            op_itflags[iiter] |= NPY_OP_ITFLAG_REDUCE;
                         }
-                        NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE;
-                        op_itflags[iiter] |= NPY_OP_ITFLAG_REDUCE;
                     }
-
-                    NIT_ADVANCE_AXISDATA(axisdata, 1);
+                    else {
+                        strides[iiter] = PyArray_STRIDE(op_cur, i);
+                    }
                 }
-            }
-            /* Check whether this operand includes any reduction */
-            else if (op_itflags[iiter]&NPY_OP_ITFLAG_WRITE) {
-                axisdata = axisdata0;
-                for (idim = 0; idim < ndim; ++idim) {
-                    /*
-                     * If the stride is 0 and the shape is bigger than
-                     * one, that's a reduction.
-                     */
-                    if (NAD_SHAPE(axisdata) > 1 &&
-                                    NAD_STRIDES(axisdata)[iiter] == 0) {
+                else if (bshape == 1) {
+                    strides[iiter] = 0;
+                }
+                else {
+                    strides[iiter] = 0;
+                    /* If it's writeable, this means a reduction */
+                    if (op_itflags[iiter]&NPY_OP_ITFLAG_WRITE) {
                         if (!(flags&NPY_ITER_REDUCE_OK)) {
                             PyErr_SetString(PyExc_ValueError,
-                                    "operand requires a reduction, but "
+                                    "output operand requires a reduction, but "
                                     "reduction is not enabled");
                             return 0;
                         }
                         if (!(op_itflags[iiter]&NPY_OP_ITFLAG_READ)) {
                             PyErr_SetString(PyExc_ValueError,
-                                    "operand requires a reduction, but "
+                                    "output operand requires a reduction, but "
                                     "is flagged as write-only, not "
                                     "read-write");
                             return 0;
                         }
                         NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE;
                         op_itflags[iiter] |= NPY_OP_ITFLAG_REDUCE;
-                        break;
                     }
-
-                    NIT_ADVANCE_AXISDATA(axisdata, 1);
                 }
             }
         }
+
+        NIT_ADVANCE_AXISDATA(axisdata, 1);
     }
 
     /* Now fill in the ITERSIZE member */
-    NIT_ITERSIZE(iter) = 1;
-    axisdata = axisdata0;
-    for (idim = 0; idim < ndim; ++idim) {
-        NIT_ITERSIZE(iter) *= NAD_SHAPE(axisdata);
-
-        NIT_ADVANCE_AXISDATA(axisdata, 1);
+    NIT_ITERSIZE(iter) = broadcast_shape[0];
+    for (idim = 1; idim < ndim; ++idim) {
+        NIT_ITERSIZE(iter) *= broadcast_shape[idim];
     }
     /* The range defaults to everything */
     NIT_ITERSTART(iter) = 0;
@@ -3740,38 +3744,62 @@ npyiter_replace_axisdata(NpyIter *iter, npy_intp iiter,
      */
     axisdata = axisdata0;
 
-    for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
-        char p;
-        npy_intp i, shape;
+    if (op_axes != NULL) {
+        for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
+            char p;
+            npy_intp i, shape;
 
-        /* Apply the perm to get the original axis */
-        p = perm[idim];
-        if (p < 0) {
-            i = ndim+p;
-        }
-        else {
-            i = ndim-p-1;
-        }
+            /* Apply the perm to get the original axis */
+            p = perm[idim];
+            if (p < 0) {
+                i = op_axes[ndim+p];
+            }
+            else {
+                i = op_axes[ndim-p-1];
+            }
 
-        /* Apply op_axes */
-        if (op_axes != NULL) {
-            i = op_axes[i];
-        }
-        else {
-            i -= (ndim - op_ndim);
+            if ((npy_uintp)i < (npy_uintp)op_ndim) {
+                shape = PyArray_DIM(op, i);
+                if (shape != 1) {
+                    npy_intp stride = PyArray_STRIDE(op, i);
+                    if (p < 0) {
+                        /* If the perm entry is negative, flip the axis */
+                        NAD_STRIDES(axisdata)[iiter] = -stride;
+                        baseoffset += stride*(shape-1);
+                    }
+                    else {
+                        NAD_STRIDES(axisdata)[iiter] = stride;
+                    }
+                }
+            }
         }
+    }
+    else {
+        for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
+            char p;
+            npy_intp i, shape;
 
-        if (i >= 0 && i < op_ndim) {
-            shape = PyArray_DIM(op, i);
-            if (shape != 1) {
-                npy_intp stride = PyArray_STRIDE(op, i);
-                if (p < 0) {
-                    /* If the perm entry is negative, flip the axis */
-                    NAD_STRIDES(axisdata)[iiter] = -stride;
-                    baseoffset += stride*(shape-1);
-                }
-                else {
-                    NAD_STRIDES(axisdata)[iiter] = stride;
+            /* Apply the perm to get the original axis */
+            p = perm[idim];
+            if (p < 0) {
+                i = op_ndim+p;
+            }
+            else {
+                i = op_ndim-p-1;
+            }
+
+            if (i >= 0) {
+                shape = PyArray_DIM(op, i);
+                if (shape != 1) {
+                    npy_intp stride = PyArray_STRIDE(op, i);
+                    if (p < 0) {
+                        /* If the perm entry is negative, flip the axis */
+                        NAD_STRIDES(axisdata)[iiter] = -stride;
+                        baseoffset += stride*(shape-1);
+                    }
+                    else {
+                        NAD_STRIDES(axisdata)[iiter] = stride;
+                    }
                 }
             }
         }
@@ -4266,119 +4294,149 @@ npyiter_new_temp_array(NpyIter *iter, PyTypeObject *subtype,
     npy_intp new_shape[NPY_MAXDIMS], strides[NPY_MAXDIMS],
              stride = op_dtype->elsize;
     char reversestride[NPY_MAXDIMS], anyreverse = 0;
-    NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
-    npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, niter);
-    npy_intp tmp_op_axes = -1;
-    npy_intp i, array_i[NPY_MAXDIMS];
+    NpyIter_AxisData *axisdata;
+    npy_intp sizeof_axisdata;
+    npy_intp i;
 
     PyArrayObject *ret;
 
-    /*
-     * If it's an automatically allocated output, start by assuming
-     * the shape will have the same length as the iterator
-     */
-    if (shape == NULL) {
-        /*
-         * If it's a scalar output, trigger scalar allocation below
-         * by making an op_axes of -1
-         */
-        if (op_ndim == 0 && ndim == 1 && op_axes == NULL) {
-            op_axes = &tmp_op_axes;
+    /* If it's a scalar, don't need to check the axes */
+    if (op_ndim == 0) {
+        Py_INCREF(op_dtype);
+        ret = (PyArrayObject *)PyArray_NewFromDescr(subtype, op_dtype, 0,
+                               NULL, NULL, NULL, 0, NULL);
+
+        /* Double-check that the subtype didn't mess with the dimensions */
+        if (PyArray_NDIM(ret) != 0) {
+            PyErr_SetString(PyExc_RuntimeError,
+                    "Iterator automatic output has an array subtype "
+                    "which changed the dimensions of the output");
+            Py_DECREF(ret);
+            return NULL;
         }
-        op_ndim = ndim;
-    }
 
-    /* Initially no strides have been set */
-    for (idim = 0; idim < op_ndim; ++idim) {
-        strides[idim] = NPY_MAX_INTP;
-        reversestride[idim] = 0;
+        return ret;
     }
 
-    for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
-        char p;
-
-        /* Apply the perm to get the original axis */
-        p = perm[idim];
-        if (p < 0) {
-            i = ndim+p;
-        }
-        else {
-            i = ndim-p-1;
-        }
+    axisdata = NIT_AXISDATA(iter);
+    sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, niter);
 
-        /* Apply op_axes */
-        if (op_axes != NULL) {
-            i = op_axes[i];
-        }
-        else {
-            i -= (ndim - op_ndim);
-        }
+    memset(reversestride, 0, NPY_MAXDIMS);
+    /* Initialize the strides to invalid values */
+    for (i = 0; i < NPY_MAXDIMS; ++i) {
+        strides[i] = NPY_MAX_INTP;
+    }
 
-        array_i[idim] = i;
+    if (op_axes != NULL) {
+        for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
+            char p;
 
-        if (i >= 0) {
-            NPY_IT_DBG_PRINTF("Iterator: Setting allocated stride %d "
-                                "for iterator dimension %d to %d\n", (int)i,
-                                (int)idim, (int)stride);
-            strides[i] = stride;
+            /* Apply the perm to get the original axis */
+            p = perm[idim];
             if (p < 0) {
-                reversestride[i] = 1;
-                anyreverse = 1;
-            }
-            if (shape == NULL) {
-                new_shape[i] = NAD_SHAPE(axisdata);
-                stride *= new_shape[i];
+                i = op_axes[ndim+p];
             }
             else {
-                stride *= shape[i];
+                i = op_axes[ndim-p-1];
             }
-        }
-    }
 
-    /*
-     * If custom axes were specified, some dimensions may not have been used.
-     * Add the REDUCE itflag if this creates a reduction situation.
-     */
-    if (shape == NULL) {
-        axisdata = NIT_AXISDATA(iter);
-        for (idim = 0; idim < op_ndim; ++idim) {
-            i = array_i[idim];
-            NPY_IT_DBG_PRINTF("Iterator: Checking output "
-                          "dimension %d (iterator dim %d) with stride %d\n",
-                          (int)i, (int)idim, (int)strides[idim]);
-            if (i < 0) {
-                NPY_IT_DBG_PRINTF("Iterator: The axis wasn't used, "
-                                  "and its dimension is %d\n",
-                                  (int)NAD_SHAPE(axisdata));
-                /*
-                 * If deleting this axis produces a reduction, but
-                 * reduction wasn't enabled, throw an error
-                 */
-                if (NAD_SHAPE(axisdata) != 1) {
-                    if (!(flags&NPY_ITER_REDUCE_OK)) {
-                        PyErr_SetString(PyExc_ValueError,
-                                "output requires a reduction, but "
-                                "reduction is not enabled");
-                        return NULL;
-                    }
-                    if (!((*op_itflags)&NPY_OP_ITFLAG_READ)) {
+            if (i >= 0) {
+                NPY_IT_DBG_PRINTF("Iterator: Setting allocated stride %d "
+                                    "for iterator dimension %d to %d\n", (int)i,
+                                    (int)idim, (int)stride);
+                strides[i] = stride;
+                if (p < 0) {
+                    reversestride[i] = 1;
+                    anyreverse = 1;
+                }
+                else {
+                    reversestride[i] = 0;
+                }
+                if (shape == NULL) {
+                    new_shape[i] = NAD_SHAPE(axisdata);
+                    stride *= new_shape[i];
+                    if (i >= ndim) {
                         PyErr_SetString(PyExc_ValueError,
-                                "output requires a reduction, but "
-                                "is flagged as write-only, not read-write");
+                                "automatically allocated output array "
+                                "specified with an inconsistent axis mapping");
                         return NULL;
                     }
+                }
+                else {
+                    stride *= shape[i];
+                }
+            }
+            else {
+                if (shape == NULL) {
+                    /*
+                     * If deleting this axis produces a reduction, but
+                     * reduction wasn't enabled, throw an error
+                     */
+                    if (NAD_SHAPE(axisdata) != 1) {
+                        if (!(flags&NPY_ITER_REDUCE_OK)) {
+                            PyErr_SetString(PyExc_ValueError,
+                                    "output requires a reduction, but "
+                                    "reduction is not enabled");
+                            return NULL;
+                        }
+                        if (!((*op_itflags)&NPY_OP_ITFLAG_READ)) {
+                            PyErr_SetString(PyExc_ValueError,
+                                    "output requires a reduction, but "
+                                    "is flagged as write-only, not read-write");
+                            return NULL;
+                        }
 
-                    NPY_IT_DBG_PRINTF("Iterator: Indicating that a reduction "
-                                      "is occurring\n");
-                    /* Indicate that a reduction is occurring */
-                    NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE;
-                    (*op_itflags) |= NPY_OP_ITFLAG_REDUCE;
+                        NPY_IT_DBG_PRINTF("Iterator: Indicating that a reduction "
+                                          "is occurring\n");
+                        /* Indicate that a reduction is occurring */
+                        NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE;
+                        (*op_itflags) |= NPY_OP_ITFLAG_REDUCE;
+                    }
                 }
             }
+        }
+    }
+    else {
+        for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
+            char p;
 
-            NIT_ADVANCE_AXISDATA(axisdata, 1);
+            /* Apply the perm to get the original axis */
+            p = perm[idim];
+            if (p < 0) {
+                i = op_ndim+p;
+            }
+            else {
+                i = op_ndim-p-1;
+            }
+
+            if (i >= 0) {
+                NPY_IT_DBG_PRINTF("Iterator: Setting allocated stride %d "
+                                    "for iterator dimension %d to %d\n", (int)i,
+                                    (int)idim, (int)stride);
+                strides[i] = stride;
+                if (p < 0) {
+                    reversestride[i] = 1;
+                    anyreverse = 1;
+                }
+                else {
+                    reversestride[i] = 0;
+                }
+                if (shape == NULL) {
+                    new_shape[i] = NAD_SHAPE(axisdata);
+                    stride *= new_shape[i];
+                }
+                else {
+                    stride *= shape[i];
+                }
+            }
         }
+    }
 
+    /*
+     * If custom axes were specified, some dimensions may not have been used.
+     * Add the REDUCE itflag if this creates a reduction situation.
+     */
+    if (shape == NULL) {
         /* Ensure there are no dimension gaps in op_axes, and find op_ndim */
         op_ndim = ndim;
         if (op_axes != NULL) {
@@ -4395,7 +4453,7 @@ npyiter_new_temp_array(NpyIter *iter, PyTypeObject *subtype,
                  */
                 else if (op_ndim != ndim) {
                     PyErr_SetString(PyExc_ValueError,
-                            "automatically allocated reduction output array "
+                            "automatically allocated output array "
                             "specified with an inconsistent axis mapping");
                     return NULL;
                 }
@@ -4682,49 +4740,54 @@ npyiter_allocate_arrays(NpyIter *iter,
          * the inner stride of this operand works for the whole
          * array, we can set NPY_OP_ITFLAG_BUFNEVER.
          */
-        if (PyArray_NDIM(op[iiter]) > 0 && (itflags&NPY_ITFLAG_BUFFER) &&
-                        !(op_itflags[iiter]&NPY_OP_ITFLAG_CAST)) {
-            npy_intp stride, shape, innerstride = 0, innershape;
+        if ((itflags&NPY_ITFLAG_BUFFER) && !(op_itflags[iiter]&NPY_OP_ITFLAG_CAST)) {
             NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
-            npy_intp sizeof_axisdata =
-                                NIT_AXISDATA_SIZEOF(itflags, ndim, niter);
-            /* Find stride of the first non-empty shape */
-            for (idim = 0; idim < ndim; ++idim) {
-                innershape = NAD_SHAPE(axisdata);
-                if (innershape != 1) {
-                    innerstride = NAD_STRIDES(axisdata)[iiter];
-                    break;
-                }
-                NIT_ADVANCE_AXISDATA(axisdata, 1);
+            if (ndim == 1) {
+                op_itflags[iiter] |= NPY_OP_ITFLAG_BUFNEVER;
+                NBF_STRIDES(bufferdata)[iiter] = NAD_STRIDES(axisdata)[iiter];
             }
-            ++idim;
-            NIT_ADVANCE_AXISDATA(axisdata, 1);
-            /* Check that everything could have coalesced together */
-            for (; idim < ndim; ++idim) {
-                stride = NAD_STRIDES(axisdata)[iiter];
-                shape = NAD_SHAPE(axisdata);
-                if (shape != 1) {
-                    /*
-                     * If N times the inner stride doesn't equal this
-                     * stride, the multi-dimensionality is needed.
-                     */
-                    if (innerstride*innershape != stride) {
+            else if (PyArray_NDIM(op[iiter]) > 0) {
+                npy_intp stride, shape, innerstride = 0, innershape;
+                npy_intp sizeof_axisdata =
+                                    NIT_AXISDATA_SIZEOF(itflags, ndim, niter);
+                /* Find stride of the first non-empty shape */
+                for (idim = 0; idim < ndim; ++idim) {
+                    innershape = NAD_SHAPE(axisdata);
+                    if (innershape != 1) {
+                        innerstride = NAD_STRIDES(axisdata)[iiter];
                         break;
                     }
-                    else {
-                        innershape *= shape;
-                    }
+                    NIT_ADVANCE_AXISDATA(axisdata, 1);
                 }
+                ++idim;
                 NIT_ADVANCE_AXISDATA(axisdata, 1);
-            }
-            /*
-             * If we looped all the way to the end, one stride works.
-             * Set that stride, because it may not belong to the first
-             * dimension.
-             */
-            if (idim == ndim) {
-                op_itflags[iiter] |= NPY_OP_ITFLAG_BUFNEVER;
-                NBF_STRIDES(bufferdata)[iiter] = innerstride;
+                /* Check that everything could have coalesced together */
+                for (; idim < ndim; ++idim) {
+                    stride = NAD_STRIDES(axisdata)[iiter];
+                    shape = NAD_SHAPE(axisdata);
+                    if (shape != 1) {
+                        /*
+                         * If N times the inner stride doesn't equal this
+                         * stride, the multi-dimensionality is needed.
+                         */
+                        if (innerstride*innershape != stride) {
+                            break;
+                        }
+                        else {
+                            innershape *= shape;
+                        }
+                    }
+                    NIT_ADVANCE_AXISDATA(axisdata, 1);
+                }
+                /*
+                 * If we looped all the way to the end, one stride works.
+                 * Set that stride, because it may not belong to the first
+                 * dimension.
+                 */
+                if (idim == ndim) {
+                    op_itflags[iiter] |= NPY_OP_ITFLAG_BUFNEVER;
+                    NBF_STRIDES(bufferdata)[iiter] = innerstride;
+                }
             }
         }
     }
@@ -4771,6 +4834,7 @@ npyiter_get_common_dtype(npy_intp niter, PyArrayObject **op,
     npy_intp narrs = 0, ndtypes = 0;
     PyArrayObject *arrs[NPY_MAXARGS];
     PyArray_Descr *dtypes[NPY_MAXARGS];
+    PyArray_Descr *ret;
 
     NPY_IT_DBG_PRINTF("Iterator: Getting a common data type from operands\n");
 
@@ -4790,7 +4854,30 @@ npyiter_get_common_dtype(npy_intp niter, PyArrayObject **op,
         }
     }
 
-    return PyArray_ResultType(narrs, arrs, ndtypes, dtypes);
+    if (narrs == 0) {
+        npy_intp i;
+        ret = dtypes[0];
+        for (i = 1; i < ndtypes; ++i) {
+            if (ret != dtypes[i])
+                break;
+        }
+        if (i == ndtypes) {
+            if (ndtypes == 1 || PyArray_ISNBO(ret->byteorder)) {
+                Py_INCREF(ret);
+            }
+            else {
+                ret = PyArray_DescrNewByteorder(ret, NPY_NATIVE);
+            }
+        }
+        else {
+            ret = PyArray_ResultType(narrs, arrs, ndtypes, dtypes);
+        }
+    }
+    else {
+        ret = PyArray_ResultType(narrs, arrs, ndtypes, dtypes);
+    }
+
+    return ret;
 }
 
 static int
author	Mark Wiebe <mwwiebe@gmail.com>	2011-01-31 19:40:25 -0800
committer	Mark Wiebe <mwwiebe@gmail.com>	2011-02-03 14:48:23 -0800
commit	85993f895e7676f477f80e940a2ee925b3a0c19c (patch)
tree	7fe30fe031de99838eb9086e3cf4711546cf1fd3
parent	fcc6cc73ddcb1fc85446ba9256ac24ecdda6c6d8 (diff)
download	numpy-85993f895e7676f477f80e940a2ee925b3a0c19c.tar.gz