diff options
author | Mark Wiebe <mwwiebe@gmail.com> | 2011-01-30 23:26:43 -0800 |
---|---|---|
committer | Mark Wiebe <mwwiebe@gmail.com> | 2011-02-03 14:39:41 -0800 |
commit | f60797ba64ccf33597225d23b893b6eb11149860 (patch) | |
tree | ac404c33241cd48e7a65d0437c7cc9671dce3193 | |
parent | 31725765f5ce04a96b3bb53728756d135577fbdf (diff) | |
download | numpy-f60797ba64ccf33597225d23b893b6eb11149860.tar.gz |
BUG: perf: Operations like "a[10:20] += 10" were doing a redundant copy
-rw-r--r-- | numpy/core/src/multiarray/ctors.c | 42 | ||||
-rw-r--r-- | numpy/core/src/multiarray/new_iterator.c.src | 5 |
2 files changed, 39 insertions, 8 deletions
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index e0115db8b..7ebc83507 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -447,7 +447,8 @@ copy_and_swap(void *dst, void *src, int itemsize, npy_intp numitems, } /* Gets a half-open range [start, end) which contains the array data */ -void _get_memory_extents(PyArrayObject *arr, +NPY_NO_EXPORT void +_get_array_memory_extents(PyArrayObject *arr, npy_uintp *out_start, npy_uintp *out_end) { npy_uintp start, end; @@ -481,12 +482,13 @@ void _get_memory_extents(PyArrayObject *arr, } /* Returns 1 if the arrays have overlapping data, 0 otherwise */ -int _arrays_overlap(PyArrayObject *arr1, PyArrayObject *arr2) +NPY_NO_EXPORT int +_arrays_overlap(PyArrayObject *arr1, PyArrayObject *arr2) { npy_uintp start1 = 0, start2 = 0, end1 = 0, end2 = 0; - _get_memory_extents(arr1, &start1, &end1); - _get_memory_extents(arr2, &start2, &end2); + _get_array_memory_extents(arr1, &start1, &end1); + _get_array_memory_extents(arr2, &start2, &end2); return (start1 < end2) && (start2 < end1); } @@ -507,6 +509,38 @@ NPY_NO_EXPORT int PyArray_MoveInto(PyArrayObject *dst, PyArrayObject *src) { /* + * Performance fix for expresions like "a[1000:6000] += x". In this + * case, first an in-place add is done, followed by an assignment, + * equivalently expressed like this: + * + * tmp = a[1000:6000] # Calls array_subscript_nice in mapping.c + * np.add(tmp, x, tmp) + * a[1000:6000] = tmp # Calls array_ass_sub in mapping.c + * + * In the assignment the underlying data type, shape, strides, and + * data pointers are identical, but src != dst because they are separately + * generated slices. By detecting this and skipping the redundant + * copy of values to themselves, we potentially give a big speed boost. + * + * Note that we don't call EquivTypes, because usually the exact same + * dtype object will appear, and we don't want to slow things down + * with a complicated comparison. The comparisons are ordered to + * try and reject this with as little work as possible. + */ + if (PyArray_DATA(src) == PyArray_DATA(dst) && + PyArray_DESCR(src) == PyArray_DESCR(dst) && + PyArray_NDIM(src) == PyArray_NDIM(dst) && + PyArray_CompareLists(PyArray_DIMS(src), + PyArray_DIMS(dst), + PyArray_NDIM(src)) && + PyArray_CompareLists(PyArray_STRIDES(src), + PyArray_STRIDES(dst), + PyArray_NDIM(src))) { + /*printf("Redundant copy operation detected\n");*/ + return 0; + } + + /* * A special case is when there is just one dimension with positive * strides, and we pass that to CopyInto, which correctly handles * it for most cases. It may still incorrectly handle copying of diff --git a/numpy/core/src/multiarray/new_iterator.c.src b/numpy/core/src/multiarray/new_iterator.c.src index 4d031efbd..49d8ad8b5 100644 --- a/numpy/core/src/multiarray/new_iterator.c.src +++ b/numpy/core/src/multiarray/new_iterator.c.src @@ -4632,10 +4632,7 @@ npyiter_allocate_arrays(NpyIter *iter, npyiter_replace_axisdata(iter, iiter, op[iiter], ondim, PyArray_DATA(op[iiter]), op_axes ? op_axes[iiter] : NULL); - /* - * The temporary copy is aligned and needs no cast, and - * has constant stride 0 so never needs buffering - */ + /* The temporary copy is aligned and needs no cast */ op_itflags[iiter] |= NPY_OP_ITFLAG_ALIGNED; op_itflags[iiter] &= ~NPY_OP_ITFLAG_CAST; } |