diff options
author | Charles Harris <charlesr.harris@gmail.com> | 2018-04-09 12:54:34 -0600 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-04-09 12:54:34 -0600 |
commit | 635559f1b3794eaa8440ce8ff6e498445c2f8b34 (patch) | |
tree | 266a5103111945c074798b5b61fc089463c0d6a8 /numpy | |
parent | 07d590c980edb05f1ef67cd27fff2ffe2d9c6047 (diff) | |
parent | 52ff55c559b974c96a053cab9457492f04a58a12 (diff) | |
download | numpy-635559f1b3794eaa8440ce8ff6e498445c2f8b34.tar.gz |
Merge pull request #10796 from juliantaylor/inplace-overlap
BUG: fix wrong inplace vectorization on overlapping arguments
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/core/src/umath/loops.c.src | 20 |
1 files changed, 16 insertions, 4 deletions
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src index c1dfe15da..d196a8d4e 100644 --- a/numpy/core/src/umath/loops.c.src +++ b/numpy/core/src/umath/loops.c.src @@ -30,6 +30,16 @@ */ #define PW_BLOCKSIZE 128 + +/* + * largest simd vector size in bytes numpy supports + * it is currently a extremely large value as it is only used for memory + * overlap checks + */ +#ifndef NPY_MAX_SIMD_SIZE +#define NPY_MAX_SIMD_SIZE 1024 +#endif + /* * include vectorized functions and dispatchers * this file is safe to include also for generic builds @@ -180,10 +190,12 @@ do { \ /* condition allows compiler to optimize the generic macro */ \ if (IS_BINARY_CONT(tin, tout)) { \ - if (args[2] == args[0]) { \ + if (abs_ptrdiff(args[2], args[0]) == 0 && \ + abs_ptrdiff(args[2], args[1]) >= NPY_MAX_SIMD_SIZE) { \ BASE_BINARY_LOOP_INP(tin, tout, op) \ } \ - else if (args[2] == args[1]) { \ + else if (abs_ptrdiff(args[2], args[1]) == 0 && \ + abs_ptrdiff(args[2], args[0]) >= NPY_MAX_SIMD_SIZE) { \ BASE_BINARY_LOOP_INP(tin, tout, op) \ } \ else { \ @@ -191,7 +203,7 @@ } \ } \ else if (IS_BINARY_CONT_S1(tin, tout)) { \ - if (args[1] == args[2]) { \ + if (abs_ptrdiff(args[2], args[1]) == 0) { \ BASE_BINARY_LOOP_S_INP(tin, tout, in1, args[0], in2, ip2, op) \ } \ else { \ @@ -199,7 +211,7 @@ } \ } \ else if (IS_BINARY_CONT_S2(tin, tout)) { \ - if (args[0] == args[2]) { \ + if (abs_ptrdiff(args[2], args[0]) == 0) { \ BASE_BINARY_LOOP_S_INP(tin, tout, in2, args[1], in1, ip1, op) \ } \ else { \ |