summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/release/1.15.0-notes.rst8
-rw-r--r--numpy/core/src/umath/ufunc_object.c12
2 files changed, 17 insertions, 3 deletions
diff --git a/doc/release/1.15.0-notes.rst b/doc/release/1.15.0-notes.rst
index 4cfad0fc5..b76b68a19 100644
--- a/doc/release/1.15.0-notes.rst
+++ b/doc/release/1.15.0-notes.rst
@@ -233,5 +233,13 @@ the user wants to perform a stable sort thus harming the readability.
This change allows the user to specify kind='stable' thus clarifying
the intent.
+Do not make temporary copies for in-place accumulation
+------------------------------------------------------
+When ufuncs perform accumulation they no longer make temporary copies because
+of the overlap between input an output, that is, the next element accumulated
+is added before the accumulated result is stored in its place, hence the
+overlap is safe. Avoiding the copy results in faster execution.
+
+
Changes
=======
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index bf5a4ead3..6dd597b3a 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -3215,9 +3215,15 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
*/
ndim_iter = ndim;
flags |= NPY_ITER_MULTI_INDEX;
- /* Add some more flags */
- op_flags[0] |= NPY_ITER_UPDATEIFCOPY|NPY_ITER_ALIGNED;
- op_flags[1] |= NPY_ITER_COPY|NPY_ITER_ALIGNED;
+ /*
+ * Add some more flags.
+ *
+ * The accumulation outer loop is 'elementwise' over the array, so turn
+ * on NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE. That is, in-place
+ * accumulate(x, out=x) is safe to do without temporary copies.
+ */
+ op_flags[0] |= NPY_ITER_UPDATEIFCOPY|NPY_ITER_ALIGNED|NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE;
+ op_flags[1] |= NPY_ITER_COPY|NPY_ITER_ALIGNED|NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE;
op_dtypes_param = op_dtypes;
op_dtypes[1] = op_dtypes[0];
NPY_UF_DBG_PRINT("Allocating outer iterator\n");