diff options
-rw-r--r-- | doc/release/1.15.0-notes.rst | 8 | ||||
-rw-r--r-- | numpy/core/src/umath/ufunc_object.c | 12 |
2 files changed, 17 insertions, 3 deletions
diff --git a/doc/release/1.15.0-notes.rst b/doc/release/1.15.0-notes.rst index 4cfad0fc5..b76b68a19 100644 --- a/doc/release/1.15.0-notes.rst +++ b/doc/release/1.15.0-notes.rst @@ -233,5 +233,13 @@ the user wants to perform a stable sort thus harming the readability. This change allows the user to specify kind='stable' thus clarifying the intent. +Do not make temporary copies for in-place accumulation +------------------------------------------------------ +When ufuncs perform accumulation they no longer make temporary copies because +of the overlap between input an output, that is, the next element accumulated +is added before the accumulated result is stored in its place, hence the +overlap is safe. Avoiding the copy results in faster execution. + + Changes ======= diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index bf5a4ead3..6dd597b3a 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -3215,9 +3215,15 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, */ ndim_iter = ndim; flags |= NPY_ITER_MULTI_INDEX; - /* Add some more flags */ - op_flags[0] |= NPY_ITER_UPDATEIFCOPY|NPY_ITER_ALIGNED; - op_flags[1] |= NPY_ITER_COPY|NPY_ITER_ALIGNED; + /* + * Add some more flags. + * + * The accumulation outer loop is 'elementwise' over the array, so turn + * on NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE. That is, in-place + * accumulate(x, out=x) is safe to do without temporary copies. + */ + op_flags[0] |= NPY_ITER_UPDATEIFCOPY|NPY_ITER_ALIGNED|NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE; + op_flags[1] |= NPY_ITER_COPY|NPY_ITER_ALIGNED|NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE; op_dtypes_param = op_dtypes; op_dtypes[1] = op_dtypes[0]; NPY_UF_DBG_PRINT("Allocating outer iterator\n"); |