diff options
author | Raghuveer Devulapalli <raghuveer.devulapalli@intel.com> | 2019-05-09 18:19:07 -0700 |
---|---|---|
committer | Raghuveer Devulapalli <raghuveer.devulapalli@intel.com> | 2019-05-15 21:32:02 -0700 |
commit | 1afc95d44a6322ea2ad690e71a96e7b990eed3ad (patch) | |
tree | d6731d7359b8ca94787f201252d06dd16806f65a /numpy/core | |
parent | 56201bb0cadbf36806aed14391d399f6a1cb6152 (diff) | |
download | numpy-1afc95d44a6322ea2ad690e71a96e7b990eed3ad.tar.gz |
BUG: exp, log AVX loops do not use steps
Diffstat (limited to 'numpy/core')
-rw-r--r-- | numpy/core/src/umath/loops.c.src | 19 | ||||
-rw-r--r-- | numpy/core/src/umath/simd.inc.src | 20 |
2 files changed, 25 insertions, 14 deletions
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src index 89eeb0c47..e6d65b307 100644 --- a/numpy/core/src/umath/loops.c.src +++ b/numpy/core/src/umath/loops.c.src @@ -1621,21 +1621,16 @@ FLOAT_@func@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSE NPY_NO_EXPORT NPY_GCC_OPT_3 void FLOAT_@func@_@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)) { + if (!run_unary_@isa@_@func@_FLOAT(args, dimensions, steps)) { + UNARY_LOOP { #if defined @CHK@ && defined NPY_HAVE_SSE2_INTRINSICS - @ISA@_@func@_FLOAT((npy_float*)args[1], (npy_float*)args[0], dimensions[0]); + @ISA@_@func@_FLOAT((npy_float *)op1, (npy_float *)ip1, 1); #else - /* - * This is the path it would take if ISA was runtime detected, but not - * compiled for. It fixes the error on clang6.0 which fails to compile - * AVX512F version. Not sure if I like this idea, if during runtime it - * detects AXV512F, it will end up running the scalar version instead - * of AVX2. - */ - UNARY_LOOP { - const npy_float in1 = *(npy_float *)ip1; - *(npy_float *)op1 = @scalarf@(in1); - } + const npy_float in1 = *(npy_float *)ip1; + *(npy_float *)op1 = @scalarf@(in1); #endif + } + } } /**end repeat1**/ diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src index 72493e308..1c6ac4426 100644 --- a/numpy/core/src/umath/simd.inc.src +++ b/numpy/core/src/umath/simd.inc.src @@ -122,20 +122,36 @@ abs_ptrdiff(char *a, char *b) /**begin repeat * #ISA = AVX2, AVX512F# + * #isa = avx2, avx512f# + * #REGISTER_SIZE = 32, 64# */ /* prototypes */ -#if defined HAVE_ATTRIBUTE_TARGET_@ISA@_WITH_INTRINSICS && defined NPY_HAVE_SSE2_INTRINSICS /**begin repeat1 * #func = exp, log# */ +#if defined HAVE_ATTRIBUTE_TARGET_@ISA@_WITH_INTRINSICS && defined NPY_HAVE_SSE2_INTRINSICS static NPY_INLINE void @ISA@_@func@_FLOAT(npy_float *, npy_float *, const npy_intp n); +#endif -/**end repeat1**/ +static NPY_INLINE int +run_unary_@isa@_@func@_FLOAT(char **args, npy_intp *dimensions, npy_intp *steps) +{ +#if defined HAVE_ATTRIBUTE_TARGET_@ISA@_WITH_INTRINSICS && defined NPY_HAVE_SSE2_INTRINSICS + if (IS_BLOCKABLE_UNARY(sizeof(npy_float), @REGISTER_SIZE@)) { + @ISA@_@func@_FLOAT((npy_float*)args[1], (npy_float*)args[0], dimensions[0]); + return 1; + } + else + return 0; #endif + return 0; +} + +/**end repeat1**/ /**end repeat**/ |