diff options
author | Jaime Fernandez <jaime.frio@gmail.com> | 2015-04-24 06:49:17 -0700 |
---|---|---|
committer | Jaime Fernandez <jaime.frio@gmail.com> | 2015-04-24 06:49:17 -0700 |
commit | 9ec8e5c0af892abe25eee4f269bd225dac47414b (patch) | |
tree | 5cf7a9a7df3f1d7a42d21897a131532935263deb | |
parent | 5d0f474de2a6ac953b6a26abb54515640db64bbf (diff) | |
download | numpy-9ec8e5c0af892abe25eee4f269bd225dac47414b.tar.gz |
MANT: Use proper abs function for pointer addresses in simd.inc.src
-rw-r--r-- | numpy/core/src/umath/simd.inc.src | 36 |
1 files changed, 28 insertions, 8 deletions
diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src index 5b111eb0d..55a638d6c 100644 --- a/numpy/core/src/umath/simd.inc.src +++ b/numpy/core/src/umath/simd.inc.src @@ -27,6 +27,19 @@ #include <stdlib.h> #include <string.h> /* for memcpy */ +/* Figure out the right abs function for pointer addresses */ +static NPY_INLINE npy_intp +abs_intp(npy_intp x) +{ +#if (NPY_SIZEOF_INTP <= NPY_SIZEOF_INT) + return abs(x); +#elif (NPY_SIZEOF_INTP <= NPY_SIZEOF_LONG) + return labs(x); +#else + return llabs(x); +#endif +} + /* * stride is equal to element size and input and destination are equal or * don't overlap within one register @@ -34,10 +47,11 @@ #define IS_BLOCKABLE_UNARY(esize, vsize) \ (steps[0] == (esize) && steps[0] == steps[1] && \ (npy_is_aligned(args[0], esize) && npy_is_aligned(args[1], esize)) && \ - ((abs(args[1] - args[0]) >= (vsize)) || ((abs(args[1] - args[0]) == 0)))) + ((abs_intp(args[1] - args[0]) >= (vsize)) || \ + ((abs_intp(args[1] - args[0]) == 0)))) #define IS_BLOCKABLE_REDUCE(esize, vsize) \ - (steps[1] == (esize) && abs(args[1] - args[0]) >= (vsize) && \ + (steps[1] == (esize) && abs_intp(args[1] - args[0]) >= (vsize) && \ npy_is_aligned(args[1], (esize)) && \ npy_is_aligned(args[0], (esize))) @@ -45,20 +59,26 @@ (steps[0] == steps[1] && steps[1] == steps[2] && steps[2] == (esize) && \ npy_is_aligned(args[2], (esize)) && npy_is_aligned(args[1], (esize)) && \ npy_is_aligned(args[0], (esize)) && \ - (abs(args[2] - args[0]) >= (vsize) || abs(args[2] - args[0]) == 0) && \ - (abs(args[2] - args[1]) >= (vsize) || abs(args[2] - args[1]) >= 0)) + (abs_intp(args[2] - args[0]) >= (vsize) || \ + abs_intp(args[2] - args[0]) == 0) && \ + (abs_intp(args[2] - args[1]) >= (vsize) || \ + abs_intp(args[2] - args[1]) >= 0)) #define IS_BLOCKABLE_BINARY_SCALAR1(esize, vsize) \ (steps[0] == 0 && steps[1] == steps[2] && steps[2] == (esize) && \ npy_is_aligned(args[2], (esize)) && npy_is_aligned(args[1], (esize)) && \ - ((abs(args[2] - args[1]) >= (vsize)) || (abs(args[2] - args[1]) == 0)) && \ - abs(args[2] - args[0]) >= (esize)) + ((abs_intp(args[2] - args[1]) >= (vsize)) || \ + (abs_intp(args[2] - args[1]) == 0)) && \ + abs_intp(args[2] - args[0]) >= (esize)) #define IS_BLOCKABLE_BINARY_SCALAR2(esize, vsize) \ (steps[1] == 0 && steps[0] == steps[2] && steps[2] == (esize) && \ npy_is_aligned(args[2], (esize)) && npy_is_aligned(args[0], (esize)) && \ - ((abs(args[2] - args[0]) >= (vsize)) || (abs(args[2] - args[0]) == 0)) && \ - abs(args[2] - args[1]) >= (esize)) + ((abs_intp(args[2] - args[0]) >= (vsize)) || \ + (abs_intp(args[2] - args[0]) == 0)) && \ + abs_intp(args[2] - args[1]) >= (esize)) + +#undef abs_intp #define IS_BLOCKABLE_BINARY_BOOL(esize, vsize) \ (steps[0] == (esize) && steps[0] == steps[1] && steps[2] == (1) && \ |