diff options
-rw-r--r-- | numpy/core/src/umath/loops.c.src | 20 |
1 files changed, 16 insertions, 4 deletions
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src index c1dfe15da..d196a8d4e 100644 --- a/numpy/core/src/umath/loops.c.src +++ b/numpy/core/src/umath/loops.c.src @@ -30,6 +30,16 @@ */ #define PW_BLOCKSIZE 128 + +/* + * largest simd vector size in bytes numpy supports + * it is currently a extremely large value as it is only used for memory + * overlap checks + */ +#ifndef NPY_MAX_SIMD_SIZE +#define NPY_MAX_SIMD_SIZE 1024 +#endif + /* * include vectorized functions and dispatchers * this file is safe to include also for generic builds @@ -180,10 +190,12 @@ do { \ /* condition allows compiler to optimize the generic macro */ \ if (IS_BINARY_CONT(tin, tout)) { \ - if (args[2] == args[0]) { \ + if (abs_ptrdiff(args[2], args[0]) == 0 && \ + abs_ptrdiff(args[2], args[1]) >= NPY_MAX_SIMD_SIZE) { \ BASE_BINARY_LOOP_INP(tin, tout, op) \ } \ - else if (args[2] == args[1]) { \ + else if (abs_ptrdiff(args[2], args[1]) == 0 && \ + abs_ptrdiff(args[2], args[0]) >= NPY_MAX_SIMD_SIZE) { \ BASE_BINARY_LOOP_INP(tin, tout, op) \ } \ else { \ @@ -191,7 +203,7 @@ } \ } \ else if (IS_BINARY_CONT_S1(tin, tout)) { \ - if (args[1] == args[2]) { \ + if (abs_ptrdiff(args[2], args[1]) == 0) { \ BASE_BINARY_LOOP_S_INP(tin, tout, in1, args[0], in2, ip2, op) \ } \ else { \ @@ -199,7 +211,7 @@ } \ } \ else if (IS_BINARY_CONT_S2(tin, tout)) { \ - if (args[0] == args[2]) { \ + if (abs_ptrdiff(args[2], args[0]) == 0) { \ BASE_BINARY_LOOP_S_INP(tin, tout, in2, args[1], in1, ip1, op) \ } \ else { \ |