summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Wiebe <mwwiebe@gmail.com>2011-01-30 23:26:43 -0800
committerMark Wiebe <mwwiebe@gmail.com>2011-02-03 14:39:41 -0800
commitf60797ba64ccf33597225d23b893b6eb11149860 (patch)
treeac404c33241cd48e7a65d0437c7cc9671dce3193
parent31725765f5ce04a96b3bb53728756d135577fbdf (diff)
downloadnumpy-f60797ba64ccf33597225d23b893b6eb11149860.tar.gz
BUG: perf: Operations like "a[10:20] += 10" were doing a redundant copy
-rw-r--r--numpy/core/src/multiarray/ctors.c42
-rw-r--r--numpy/core/src/multiarray/new_iterator.c.src5
2 files changed, 39 insertions, 8 deletions
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index e0115db8b..7ebc83507 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -447,7 +447,8 @@ copy_and_swap(void *dst, void *src, int itemsize, npy_intp numitems,
}
/* Gets a half-open range [start, end) which contains the array data */
-void _get_memory_extents(PyArrayObject *arr,
+NPY_NO_EXPORT void
+_get_array_memory_extents(PyArrayObject *arr,
npy_uintp *out_start, npy_uintp *out_end)
{
npy_uintp start, end;
@@ -481,12 +482,13 @@ void _get_memory_extents(PyArrayObject *arr,
}
/* Returns 1 if the arrays have overlapping data, 0 otherwise */
-int _arrays_overlap(PyArrayObject *arr1, PyArrayObject *arr2)
+NPY_NO_EXPORT int
+_arrays_overlap(PyArrayObject *arr1, PyArrayObject *arr2)
{
npy_uintp start1 = 0, start2 = 0, end1 = 0, end2 = 0;
- _get_memory_extents(arr1, &start1, &end1);
- _get_memory_extents(arr2, &start2, &end2);
+ _get_array_memory_extents(arr1, &start1, &end1);
+ _get_array_memory_extents(arr2, &start2, &end2);
return (start1 < end2) && (start2 < end1);
}
@@ -507,6 +509,38 @@ NPY_NO_EXPORT int
PyArray_MoveInto(PyArrayObject *dst, PyArrayObject *src)
{
/*
+ * Performance fix for expresions like "a[1000:6000] += x". In this
+ * case, first an in-place add is done, followed by an assignment,
+ * equivalently expressed like this:
+ *
+ * tmp = a[1000:6000] # Calls array_subscript_nice in mapping.c
+ * np.add(tmp, x, tmp)
+ * a[1000:6000] = tmp # Calls array_ass_sub in mapping.c
+ *
+ * In the assignment the underlying data type, shape, strides, and
+ * data pointers are identical, but src != dst because they are separately
+ * generated slices. By detecting this and skipping the redundant
+ * copy of values to themselves, we potentially give a big speed boost.
+ *
+ * Note that we don't call EquivTypes, because usually the exact same
+ * dtype object will appear, and we don't want to slow things down
+ * with a complicated comparison. The comparisons are ordered to
+ * try and reject this with as little work as possible.
+ */
+ if (PyArray_DATA(src) == PyArray_DATA(dst) &&
+ PyArray_DESCR(src) == PyArray_DESCR(dst) &&
+ PyArray_NDIM(src) == PyArray_NDIM(dst) &&
+ PyArray_CompareLists(PyArray_DIMS(src),
+ PyArray_DIMS(dst),
+ PyArray_NDIM(src)) &&
+ PyArray_CompareLists(PyArray_STRIDES(src),
+ PyArray_STRIDES(dst),
+ PyArray_NDIM(src))) {
+ /*printf("Redundant copy operation detected\n");*/
+ return 0;
+ }
+
+ /*
* A special case is when there is just one dimension with positive
* strides, and we pass that to CopyInto, which correctly handles
* it for most cases. It may still incorrectly handle copying of
diff --git a/numpy/core/src/multiarray/new_iterator.c.src b/numpy/core/src/multiarray/new_iterator.c.src
index 4d031efbd..49d8ad8b5 100644
--- a/numpy/core/src/multiarray/new_iterator.c.src
+++ b/numpy/core/src/multiarray/new_iterator.c.src
@@ -4632,10 +4632,7 @@ npyiter_allocate_arrays(NpyIter *iter,
npyiter_replace_axisdata(iter, iiter, op[iiter], ondim,
PyArray_DATA(op[iiter]), op_axes ? op_axes[iiter] : NULL);
- /*
- * The temporary copy is aligned and needs no cast, and
- * has constant stride 0 so never needs buffering
- */
+ /* The temporary copy is aligned and needs no cast */
op_itflags[iiter] |= NPY_OP_ITFLAG_ALIGNED;
op_itflags[iiter] &= ~NPY_OP_ITFLAG_CAST;
}