ENH: umath: Add checking for reorderable ufuncs, add PyArray_ReduceWrapper to API

np.divide, for instance, isn't commutative, so the computation should not change the order of computation. The reduce function disables multiple-axis reductions on ufuncs which are not reorderable.
author: Mark Wiebe <mwwiebe@gmail.com> 2011-08-21 11:46:52 -0700
committer: Charles Harris <charlesr.harris@gmail.com> 2011-08-27 07:26:58 -0600
commit: 9a9f08e089ff49fccca1feac9620c2837f8c09bd (patch)
tree: 87c29d7db3c96ff0ad2b3c7c6953c9efb6799ce5
parent: f3d60f9696856b47f4f5818b751eba2f7dcc402a (diff)
download: numpy-9a9f08e089ff49fccca1feac9620c2837f8c09bd.tar.gz
9 files changed, 261 insertions, 53 deletions
diff --git a/numpy/core/code_generators/generate_umath.py b/numpy/core/code_generators/generate_umath.py
index 5fe31b945..536b64a9a 100644
--- a/numpy/core/code_generators/generate_umath.py
+++ b/numpy/core/code_generators/generate_umath.py
@@ -11,6 +11,7 @@ sys.path.pop(0)
 Zero = "PyUFunc_Zero"
 One = "PyUFunc_One"
 None_ = "PyUFunc_None"
+ReorderableNone = "PyUFunc_ReorderableNone"
 
 # Sentinel value to specify using the full type description in the
 # function name
@@ -440,28 +441,28 @@ defdict = {
           TD(P, f='logical_xor'),
           ),
 'maximum' :
-    Ufunc(2, 1, None,
+    Ufunc(2, 1, ReorderableNone,
           docstrings.get('numpy.core.umath.maximum'),
           'PyUFunc_SimpleBinaryOperationTypeResolution',
           TD(noobj),
           TD(O, f='npy_ObjectMax')
           ),
 'minimum' :
-    Ufunc(2, 1, None,
+    Ufunc(2, 1, ReorderableNone,
           docstrings.get('numpy.core.umath.minimum'),
           'PyUFunc_SimpleBinaryOperationTypeResolution',
           TD(noobj),
           TD(O, f='npy_ObjectMin')
           ),
 'fmax' :
-    Ufunc(2, 1, None,
+    Ufunc(2, 1, ReorderableNone,
           docstrings.get('numpy.core.umath.fmax'),
           'PyUFunc_SimpleBinaryOperationTypeResolution',
           TD(noobj),
           TD(O, f='npy_ObjectMax')
           ),
 'fmin' :
-    Ufunc(2, 1, None,
+    Ufunc(2, 1, ReorderableNone,
           docstrings.get('numpy.core.umath.fmin'),
           'PyUFunc_SimpleBinaryOperationTypeResolution',
           TD(noobj),
diff --git a/numpy/core/code_generators/numpy_api.py b/numpy/core/code_generators/numpy_api.py
index 2e159db06..3288a5749 100644
--- a/numpy/core/code_generators/numpy_api.py
+++ b/numpy/core/code_generators/numpy_api.py
@@ -335,8 +335,9 @@ multiarray_funcs_api = {
     'PyArray_AssignArray':                  296,
     'PyArray_CreateReduceResult':           297,
     'PyArray_InitializeReduceResult':       298,
-    'PyArray_RemoveAxesInPlace':            299,
-    'PyArray_DebugPrint':                   300,
+    'PyArray_ReduceWrapper':                299,
+    'PyArray_RemoveAxesInPlace':            300,
+    'PyArray_DebugPrint':                   301,
 }
 
 ufunc_types_api = {
diff --git a/numpy/core/include/numpy/ndarraytypes.h b/numpy/core/include/numpy/ndarraytypes.h
index 374a9b622..f8d44f99a 100644
--- a/numpy/core/include/numpy/ndarraytypes.h
+++ b/numpy/core/include/numpy/ndarraytypes.h
@@ -953,12 +953,12 @@ typedef int (PyArray_FinalizeFunc)(PyArrayObject *, PyObject *);
 #define NPY_BEGIN_THREADS _save = PyEval_SaveThread();
 #define NPY_END_THREADS   do {if (_save) PyEval_RestoreThread(_save);} while (0);
 
-#define NPY_BEGIN_THREADS_DESCR(dtype)                          \
-        do {if (!(PyDataType_FLAGCHK(dtype, NPY_NEEDS_PYAPI)))      \
+#define NPY_BEGIN_THREADS_DESCR(dtype) \
+        do {if (!(PyDataType_FLAGCHK(dtype, NPY_NEEDS_PYAPI))) \
                 NPY_BEGIN_THREADS;} while (0);
 
-#define NPY_END_THREADS_DESCR(dtype)                            \
-        do {if (!(PyDataType_FLAGCHK(dtype, NPY_NEEDS_PYAPI)))      \
+#define NPY_END_THREADS_DESCR(dtype) \
+        do {if (!(PyDataType_FLAGCHK(dtype, NPY_NEEDS_PYAPI))) \
                 NPY_END_THREADS; } while (0);
 
 #define NPY_ALLOW_C_API_DEF  PyGILState_STATE __save__;
@@ -978,7 +978,7 @@ typedef int (PyArray_FinalizeFunc)(PyArrayObject *, PyObject *);
 #endif
 
 /*****************************
- * NA object
+ * NA object, added in 1.7
  *****************************/
 
 /* Direct access to the fields of the NA object is just internal to NumPy. */
@@ -1775,19 +1775,153 @@ struct NpyAuxData_tag {
     ((auxdata)->clone(auxdata))
 
 /************************************************************
- * A struct used by PyArray_CreateSortedStridePerm
+ * A struct used by PyArray_CreateSortedStridePerm, new in 1.7.
  ************************************************************/
 
 typedef struct {
     npy_intp perm, stride;
 } npy_stride_sort_item;
 
+/************************************************************
+ * Typedefs used by PyArray_ReduceWrapper, new in 1.7.
+ ************************************************************/
+
 /*
+ * This is a function for assigning a reduction unit to the result,
+ * before doing the reduction computation. If 'preservena' is True,
+ * any masked NA values in 'result' should not be overwritten. The
+ * value in 'data' is passed through from PyArray_ReduceWrapper.
+ *
+ * This function could, for example, simply be a call like
+ *      return PyArray_AssignZero(result, NULL, preservena, NULL);
+ *
+ * It should return -1 on failure, or 0 on success.
+ */
+typedef int (PyArray_AssignReduceUnitFunc)(PyArrayObject *result,
+                                            int preservena, void *data);
+
+/*
+ * This is a function for the inner reduce loop. Both the unmasked and
+ * masked variants have the same prototype, but should behave differently.
+ *
+ * The needs_api parameter indicates whether it's ok to release the GIL during
+ * the inner loop, such as when the iternext() function never calls
+ * a function which could raise a Python exception.
+ *
+ * Ths skip_first_count parameter indicates how many elements need to be
+ * skipped based on NpyIter_IsFirstVisit checks. This can only be positive
+ * when the 'assign_unit' parameter was NULL when calling
+ * PyArray_ReduceWrapper.
+ *
+ * The unmasked inner loop gets two data pointers and two strides, and should
+ * look roughly like this:
+ *  {
+ *      NPY_BEGIN_THREADS_DEF;
+ *      if (!needs_api) {
+ *          NPY_BEGIN_THREADS;
+ *      }
+ *      // This first-visit loop can be skipped if 'assign_unit' was non-NULL
+ *      if (skip_first_count > 0) {
+ *          do {
+ *              char *data0 = dataptr[0], *data1 = dataptr[1];
+ *              npy_intp stride0 = strideptr[0], stride1 = strideptr[1];
+ *              npy_intp count = *countptr;
+ *
+ *              // Skip any first-visit elements
+ *              if (NpyIter_IsFirstVisit(iter, 0)) {
+ *                  if (stride0 == 0) {
+ *                      --count;
+ *                      --skip_first_count;
+ *                      data1 += stride1;
+ *                      //data2 += stride2; // In masked loop
+ *                  }
+ *                  else {
+ *                      skip_first_count -= count;
+ *                      count = 0;
+ *                  }
+ *              }
+ *
+ *              while (count--) {
+ *                  *(result_t *)data0 = my_reduce_op(*(result_t *)data0,
+ *                                                    *(operand_t *)data1);
+ *                  data0 += stride0;
+ *                  data1 += stride1;
+ *              }
+ *
+ *              // Jump to the faster loop when skipping is done
+ *              if (skip_first_count == 0) {
+ *                  if (iternext(iter)) {
+ *                      break;
+ *                  }
+ *                  else {
+ *                      goto finish_loop;
+ *                  }
+ *              }
+ *          } while (iternext(iter));
+ *      }
+ *      do {
+ *          char *data0 = dataptr[0], *data1 = dataptr[1];
+ *          npy_intp stride0 = strideptr[0], stride1 = strideptr[1];
+ *          npy_intp count = *countptr;
+ *
+ *          while (count--) {
+ *              *(result_t *)data0 = my_reduce_op(*(result_t *)data0,
+ *                                                *(operand_t *)data1);
+ *              data0 += stride0;
+ *              data1 += stride1;
+ *          }
+ *      } while (iternext(iter));
+ *  finish_loop:
+ *      if (!needs_api) {
+ *          NPY_END_THREADS;
+ *      }
+ *      return (needs_api && PyErr_Occurred()) ? -1 : 0;
+ *  }
+ *
+ * The masked inner loop gets three data pointers and three strides, and
+ * looks identical except for the iteration inner loops which should be
+ * like this:
+ *      do {
+ *          char *data0 = dataptr[0], *data1 = dataptr[1], *data2 = dataptr[2];
+ *          npy_intp stride0 = strideptr[0], stride1 = strideptr[1],
+ *                      stride2 = strideptr[2];
+ *          npy_intp count = *countptr;
+ *
+ *          // Skipping first visits would go here
+ *
+ *          while (count--) {
+ *              if (NpyMaskValue_IsExposed((npy_mask)*data2)) {
+ *                  *(result_t *)data0 = my_reduce_op(*(result_t *)data0,
+ *                                                    *(operand_t *)data1);
+ *              }
+ *              data0 += stride0;
+ *              data1 += stride1;
+ *              data2 += stride2;
+ *          }
+ *
+ *          // Jumping to the faster loop would go here
+ *
+ *      } while (iternext(iter));
+ *
+ * If needs_api is True, this function should call PyErr_Occurred()
+ * to check if an error occurred during processing, and return -1 for
+ * error, 0 for success.
+ */
+typedef int (PyArray_ReduceInnerLoopFunc)(NpyIter *iter,
+                                            char **dataptr,
+                                            npy_intp *strideptr,
+                                            npy_intp *countptr,
+                                            NpyIter_IterNextFunc *iternext,
+                                            int needs_api,
+                                            npy_intp skip_first_count,
+                                            void *data);
+
+/************************************************************
  * This is the form of the struct that's returned pointed by the
  * PyCObject attribute of an array __array_struct__. See
  * http://numpy.scipy.org/array_interface.shtml for the full
  * documentation.
- */
+ ************************************************************/
 typedef struct {
     int two;              /*
                            * contains the integer 2 as a sanity
diff --git a/numpy/core/include/numpy/ufuncobject.h b/numpy/core/include/numpy/ufuncobject.h
index c0a2308b5..efcfa9a1c 100644
--- a/numpy/core/include/numpy/ufuncobject.h
+++ b/numpy/core/include/numpy/ufuncobject.h
@@ -255,9 +255,26 @@ typedef struct _tagPyUFuncObject {
 #define NPY_LOOP_END_THREADS
 #endif
 
+/*
+ * UFunc has unit of 1, and the order of operations can be reordered
+ * This case allows reduction with multiple axes at once.
+ */
 #define PyUFunc_One 1
+/*
+ * UFunc has unit of 0, and the order of operations can be reordered
+ * This case allows reduction with multiple axes at once.
+ */
 #define PyUFunc_Zero 0
+/*
+ * UFunc has no unit, and the order of operations cannot be reordered.
+ * This case does not allow reduction with multiple axes at once.
+ */
 #define PyUFunc_None -1
+/*
+ * UFunc has no unit, and the order of operations can be reordered
+ * This case allows reduction with multiple axes at once.
+ */
+#define PyUFunc_ReorderableNone -2
 
 #define UFUNC_REDUCE 0
 #define UFUNC_ACCUMULATE 1
diff --git a/numpy/core/src/multiarray/item_selection.c b/numpy/core/src/multiarray/item_selection.c
index 5b03fdb0c..09431e9dc 100644
--- a/numpy/core/src/multiarray/item_selection.c
+++ b/numpy/core/src/multiarray/item_selection.c
@@ -2011,7 +2011,7 @@ PyArray_ReduceCountNonzero(PyArrayObject *arr, PyArrayObject *out,
 
     result = PyArray_ReduceWrapper(arr, out,
                             PyArray_DESCR(arr), dtype,
-                            axis_flags, skipna, keepdims,
+                            axis_flags, 1, skipna, keepdims,
                             &assign_reduce_unit_zero,
                             &reduce_count_nonzero_inner_loop,
                             &reduce_count_nonzero_masked_inner_loop,
diff --git a/numpy/core/src/multiarray/reduction.c b/numpy/core/src/multiarray/reduction.c
index 4684085d1..03c15b420 100644
--- a/numpy/core/src/multiarray/reduction.c
+++ b/numpy/core/src/multiarray/reduction.c
@@ -244,6 +244,32 @@ PyArray_CreateReduceResult(PyArrayObject *operand, PyArrayObject *out,
     return result;
 }
 
+/*
+ * Checks that there are only zero or one dimensions selected in 'axis_flags',
+ * and raises an error about a non-reorderable reduction if not.
+ */
+static int
+check_nonreorderable_axes(int ndim, npy_bool *axis_flags, const char *funcname)
+{
+    int idim, single_axis = 0;
+    for (idim = 0; idim < ndim; ++idim) {
+        if (axis_flags[idim]) {
+            if (single_axis) {
+                PyErr_Format(PyExc_ValueError,
+                        "reduction operation '%s' is not reorderable, "
+                        "so only one axis may be specified",
+                        funcname);
+                return -1;
+            }
+            else {
+                single_axis = 1;
+            }
+        }
+    }
+
+    return 0;
+}
+
 /*NUMPY_API
  *
  * This function initializes a result array for a reduction operation
@@ -285,6 +311,11 @@ PyArray_CreateReduceResult(PyArrayObject *operand, PyArrayObject *out,
  * operand : The array being reduced.
  * axis_flags : An array of boolean flags, one for each axis of 'operand'.
  *              When a flag is True, it indicates to reduce along that axis.
+ * reorderable : If True, the reduction being done is reorderable, which
+ *               means specifying multiple axes of reduction at once is ok,
+ *               and the reduction code may calculate the reduction in an
+ *               arbitrary order. The calculation may be reordered because
+ *               of cache behavior or multithreading requirements.
  * skipna  : If True, indicates that the reduction is being calculated
  *           as if the NA values are being dropped from the computation
  *           instead of accumulating into an NA result.
@@ -301,7 +332,7 @@ PyArray_CreateReduceResult(PyArrayObject *operand, PyArrayObject *out,
 NPY_NO_EXPORT PyArrayObject *
 PyArray_InitializeReduceResult(
                     PyArrayObject *result, PyArrayObject *operand,
-                    npy_bool *axis_flags, int skipna,
+                    npy_bool *axis_flags, int reorderable, int skipna,
                     npy_intp *out_skip_first_count, const char *funcname)
 {
     npy_intp *strides, *shape, shape_orig[NPY_MAXDIMS], shape0;
@@ -314,6 +345,15 @@ PyArray_InitializeReduceResult(
     *out_skip_first_count = 0;
 
     /*
+     * If this reduction is non-reorderable, make sure there are
+     * only 0 or 1 axes in axis_flags.
+     */
+    if (!reorderable && check_nonreorderable_axes(ndim,
+                                    axis_flags, funcname) < 0) {
+        return NULL;
+    }
+
+    /*
      * If 'skipna' is False, or 'operand' has no NA mask in which
      * case the 'skipna' flag does nothing.
      */
@@ -421,10 +461,10 @@ PyArray_InitializeReduceResult(
     }
 
     /*
-     * If there are zero or one reduction axes, adjust the view's
+     * If there is one reduction axis, adjust the view's
      * shape to only look at the remaining elements
      */
-    if (nreduce_axes <= 1) {
+    if (nreduce_axes == 1) {
         strides = PyArray_STRIDES(op_view);
         for (idim = 0; idim < ndim; ++idim) {
             if (axis_flags[idim]) {
@@ -442,6 +482,12 @@ PyArray_InitializeReduceResult(
             }
         }
     }
+    /* If there are zero reduction axes, make the view empty */
+    else if (nreduce_axes == 0) {
+        for (idim = 0; idim < ndim; ++idim) {
+            shape[idim] = 0;
+        }
+    }
     /*
      * Otherwise iterate over the whole operand, but tell the inner loop
      * to skip the elements we already copied by setting the skip_first_count.
@@ -457,7 +503,8 @@ PyArray_InitializeReduceResult(
     return op_view;
 }
 
-/*
+/*NUMPY_API
+ *
  * This function executes all the standard NumPy reduction function
  * boilerplate code, just calling assign_unit and the appropriate
  * inner loop function where necessary.
@@ -467,6 +514,11 @@ PyArray_InitializeReduceResult(
  * operand_dtype : The dtype the inner loop expects for the operand.
  * result_dtype : The dtype the inner loop expects for the result.
  * axis_flags  : Flags indicating the reduction axes of 'operand'.
+ * reorderable : If True, the reduction being done is reorderable, which
+ *               means specifying multiple axes of reduction at once is ok,
+ *               and the reduction code may calculate the reduction in an
+ *               arbitrary order. The calculation may be reordered because
+ *               of cache behavior or multithreading requirements.
  * skipna      : If true, NAs are skipped instead of propagating.
  * keepdims    : If true, leaves the reduction dimensions in the result
  *               with size one.
@@ -483,7 +535,8 @@ NPY_NO_EXPORT PyArrayObject *
 PyArray_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out,
                         PyArray_Descr *operand_dtype,
                         PyArray_Descr *result_dtype,
-                        npy_bool *axis_flags, int skipna, int keepdims,
+                        npy_bool *axis_flags, int reorderable,
+                        int skipna, int keepdims,
                         PyArray_AssignReduceUnitFunc *assign_unit,
                         PyArray_ReduceInnerLoopFunc *inner_loop,
                         PyArray_ReduceInnerLoopFunc *masked_inner_loop,
@@ -552,6 +605,15 @@ PyArray_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out,
      * otherwise copy the initial values and get a view to the rest.
      */
     if (assign_unit != NULL) {
+        /*
+         * If this reduction is non-reorderable, make sure there are
+         * only 0 or 1 axes in axis_flags.
+         */
+        if (!reorderable && check_nonreorderable_axes(PyArray_NDIM(operand),
+                                        axis_flags, funcname) < 0) {
+            return NULL;
+        }
+
         if (assign_unit(result, !skipna, data) < 0) {
             goto fail;
         }
@@ -560,11 +622,17 @@ PyArray_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out,
     }
     else {
         op_view = PyArray_InitializeReduceResult(result, operand,
-                            axis_flags, skipna, &skip_first_count, funcname);
+                            axis_flags, reorderable, skipna,
+                            &skip_first_count, funcname);
         if (op_view == NULL) {
             Py_DECREF(result);
             return NULL;
         }
+        if (PyArray_SIZE(op_view) == 0) {
+            Py_DECREF(op_view);
+            op_view = NULL;
+            goto finish;
+        }
     }
 
     /* Set up the iterator */
diff --git a/numpy/core/src/multiarray/reduction.h b/numpy/core/src/multiarray/reduction.h
index 761497be1..3a04fd118 100644
--- a/numpy/core/src/multiarray/reduction.h
+++ b/numpy/core/src/multiarray/reduction.h
@@ -132,38 +132,6 @@ typedef int (PyArray_ReduceInnerLoopFunc)(NpyIter *iter,
                                             void *data);
 
 /*
- * This function executes all the standard NumPy reduction function
- * boilerplate code, just calling assign_unit and the appropriate
- * inner loop function where necessary.
- *
- * operand     : The array to be reduced.
- * out         : NULL, or the array into which to place the result.
- * operand_dtype : The dtype the inner loop expects for the operand.
- * result_dtype : The dtype the inner loop expects for the result.
- * axis_flags  : Flags indicating the reduction axes of 'operand'.
- * skipna      : If true, NAs are skipped instead of propagating.
- * keepdims    : If true, leaves the reduction dimensions in the result
- *               with size one.
- * assign_unit : If NULL, PyArray_InitializeReduceResult is used, otherwise
- *               this function is called to initialize the result to
- *               the reduction's unit.
- * inner_loop  : The inner loop which does the reduction.
- * masked_inner_loop: The inner loop which does the reduction with a mask.
- * data        : Data which is passed to assign_unit and the inner loop.
- * buffersize  : Buffer size for the iterator. For the default, pass in 0.
- * funcname    : The name of the reduction function, for error messages.
- */
-NPY_NO_EXPORT PyArrayObject *
-PyArray_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out,
-                        PyArray_Descr *operand_dtype,
-                        PyArray_Descr *result_dtype,
-                        npy_bool *axis_flags, int skipna, int keepdims,
-                        PyArray_AssignReduceUnitFunc *assign_unit,
-                        PyArray_ReduceInnerLoopFunc *inner_loop,
-                        PyArray_ReduceInnerLoopFunc *masked_inner_loop,
-                        void *data, npy_intp buffersize, const char *funcname);
-
-/*
  * This function counts the number of elements that a reduction
  * will see along the reduction directions, given the provided options.
  *
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index e222b4945..b5635979c 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -2611,8 +2611,9 @@ initialize_reduce_result(int identity, PyArrayObject *result,
         return arr;
     }
     else {
+        int reorderable = (identity == PyUFunc_ReorderableNone);
         return PyArray_InitializeReduceResult(result, arr, axis_flags,
-                                skipna, out_skip_first_count, ufunc_name);
+                        reorderable, skipna, out_skip_first_count, ufunc_name);
     }
 }
 
@@ -2817,6 +2818,11 @@ PyUFunc_Reduce(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
     if (arr_view == NULL) {
         goto fail;
     }
+    if (PyArray_SIZE(arr_view) == 0) {
+        Py_DECREF(arr_view);
+        arr_view = NULL;
+        goto finish;
+    }
 
     /* Now we can do a loop applying the ufunc in a straightforward manner */
     op[0] = result;
diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py
index 2db3fa029..bad78bbea 100644
--- a/numpy/core/tests/test_ufunc.py
+++ b/numpy/core/tests/test_ufunc.py
@@ -630,6 +630,19 @@ class TestUfunc(TestCase):
         a = a[1:, 1:, 1:]
         self.check_unitless_reduction(a)
 
+    def test_unitless_reduction_nonreorderable(self):
+        a = np.array([[8.0, 2.0, 2.0], [1.0, 0.5, 0.25]])
+
+        res = np.divide.reduce(a, axis=0)
+        assert_equal(res, [8.0, 4.0, 8.0])
+
+        res = np.divide.reduce(a, axis=1)
+        assert_equal(res, [2.0, 8.0])
+
+        res = np.divide.reduce(a, axis=())
+        assert_equal(res, a)
+
+        assert_raises(ValueError, np.divide.reduce, a, axis=(0,1))
 
 if __name__ == "__main__":
     run_module_suite()
author	Mark Wiebe <mwwiebe@gmail.com>	2011-08-21 11:46:52 -0700
committer	Charles Harris <charlesr.harris@gmail.com>	2011-08-27 07:26:58 -0600
commit	9a9f08e089ff49fccca1feac9620c2837f8c09bd (patch)
tree	87c29d7db3c96ff0ad2b3c7c6953c9efb6799ce5
parent	f3d60f9696856b47f4f5818b751eba2f7dcc402a (diff)
download	numpy-9a9f08e089ff49fccca1feac9620c2837f8c09bd.tar.gz