summaryrefslogtreecommitdiff
path: root/numpy
diff options
context:
space:
mode:
authorCharles Harris <charlesr.harris@gmail.com>2018-04-09 12:54:34 -0600
committerGitHub <noreply@github.com>2018-04-09 12:54:34 -0600
commit635559f1b3794eaa8440ce8ff6e498445c2f8b34 (patch)
tree266a5103111945c074798b5b61fc089463c0d6a8 /numpy
parent07d590c980edb05f1ef67cd27fff2ffe2d9c6047 (diff)
parent52ff55c559b974c96a053cab9457492f04a58a12 (diff)
downloadnumpy-635559f1b3794eaa8440ce8ff6e498445c2f8b34.tar.gz
Merge pull request #10796 from juliantaylor/inplace-overlap
BUG: fix wrong inplace vectorization on overlapping arguments
Diffstat (limited to 'numpy')
-rw-r--r--numpy/core/src/umath/loops.c.src20
1 files changed, 16 insertions, 4 deletions
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
index c1dfe15da..d196a8d4e 100644
--- a/numpy/core/src/umath/loops.c.src
+++ b/numpy/core/src/umath/loops.c.src
@@ -30,6 +30,16 @@
*/
#define PW_BLOCKSIZE 128
+
+/*
+ * largest simd vector size in bytes numpy supports
+ * it is currently a extremely large value as it is only used for memory
+ * overlap checks
+ */
+#ifndef NPY_MAX_SIMD_SIZE
+#define NPY_MAX_SIMD_SIZE 1024
+#endif
+
/*
* include vectorized functions and dispatchers
* this file is safe to include also for generic builds
@@ -180,10 +190,12 @@
do { \
/* condition allows compiler to optimize the generic macro */ \
if (IS_BINARY_CONT(tin, tout)) { \
- if (args[2] == args[0]) { \
+ if (abs_ptrdiff(args[2], args[0]) == 0 && \
+ abs_ptrdiff(args[2], args[1]) >= NPY_MAX_SIMD_SIZE) { \
BASE_BINARY_LOOP_INP(tin, tout, op) \
} \
- else if (args[2] == args[1]) { \
+ else if (abs_ptrdiff(args[2], args[1]) == 0 && \
+ abs_ptrdiff(args[2], args[0]) >= NPY_MAX_SIMD_SIZE) { \
BASE_BINARY_LOOP_INP(tin, tout, op) \
} \
else { \
@@ -191,7 +203,7 @@
} \
} \
else if (IS_BINARY_CONT_S1(tin, tout)) { \
- if (args[1] == args[2]) { \
+ if (abs_ptrdiff(args[2], args[1]) == 0) { \
BASE_BINARY_LOOP_S_INP(tin, tout, in1, args[0], in2, ip2, op) \
} \
else { \
@@ -199,7 +211,7 @@
} \
} \
else if (IS_BINARY_CONT_S2(tin, tout)) { \
- if (args[0] == args[2]) { \
+ if (abs_ptrdiff(args[2], args[0]) == 0) { \
BASE_BINARY_LOOP_S_INP(tin, tout, in2, args[1], in1, ip1, op) \
} \
else { \