diff options
-rw-r--r-- | numpy/core/src/umath/loops.c.src | 23 | ||||
-rw-r--r-- | numpy/core/src/umath/simd.inc.src | 8 |
2 files changed, 22 insertions, 9 deletions
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src index 024d495cd..a9526c2ed 100644 --- a/numpy/core/src/umath/loops.c.src +++ b/numpy/core/src/umath/loops.c.src @@ -1588,21 +1588,34 @@ FLOAT_@func@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSE /**begin repeat * #isa = avx512f, avx2# * #ISA = AVX512F, AVX2# - * #CHK = HAVE_ATTRIBUTE_TARGET_AVX512F, HAVE_ATTRIBUTE_TARGET_AVX2# - * #ATTR = NPY_GCC_TARGET_AVX512F, NPY_GCC_TARGET_AVX2# + * #CHK1 = HAVE_ATTRIBUTE_TARGET_AVX512F, HAVE_ATTRIBUTE_TARGET_AVX2# + * #CHK2 = NPY_HAVE_AVX512F_INTRINSICS, NPY_HAVE_AVX2_INTRINSICS# */ /**begin repeat1 * #func = exp, log# + * #scalarf = npy_expf, npy_logf# */ -#if @CHK@ -NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void +NPY_NO_EXPORT NPY_GCC_OPT_3 void FLOAT_@func@_@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)) { +#if defined @CHK1@ && defined @CHK2@ @ISA@_@func@_FLOAT((npy_float*)args[1], (npy_float*)args[0], dimensions[0]); -} +#else + /* + * This is the path it would take if ISA was runtime detected, but not + * compiled for. It fixes the error on clang6.0 which fails to compile + * AVX512F version. Not sure if I like this idea, if during runtime it + * detects AXV512F, it will end up running the scalar version instead + * of AVX2. + */ + UNARY_LOOP { + const npy_float in1 = *(npy_float *)ip1; + *(npy_float *)op1 = @scalarf@(in1); + } #endif +} /**end repeat1**/ /**end repeat**/ diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src index 0fa98a68a..31f11d302 100644 --- a/numpy/core/src/umath/simd.inc.src +++ b/numpy/core/src/umath/simd.inc.src @@ -1098,7 +1098,7 @@ sse2_@kind@_@TYPE@(@type@ * ip, @type@ * op, const npy_intp n) /* bunch of helper functions used in ISA_exp/log_FLOAT*/ -#if HAVE_ATTRIBUTE_TARGET_AVX2 +#if defined HAVE_ATTRIBUTE_TARGET_AVX2 && defined NPY_HAVE_AVX2_INTRINSICS static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX2 __m256 avx2_fmadd(__m256 a, __m256 b, __m256 c) { @@ -1186,7 +1186,7 @@ avx2_get_mantissa(__m256 x) } #endif -#if HAVE_ATTRIBUTE_TARGET_AVX512F +#if defined HAVE_ATTRIBUTE_TARGET_AVX512F && defined NPY_HAVE_AVX512F_INTRINSICS static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F __mmask16 avx512_get_full_load_mask(void) { @@ -1241,7 +1241,7 @@ avx512_get_mantissa(__m512 x) * #fmadd = avx2_fmadd,_mm512_fmadd_ps# **/ -#if HAVE_ATTRIBUTE_TARGET_@ISA@ +#if defined HAVE_ATTRIBUTE_TARGET_@ISA@ && defined NPY_HAVE_@ISA@_INTRINSICS static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ @vtype@ @isa@_range_reduction(@vtype@ x, @vtype@ y, @vtype@ c1, @vtype@ c2, @vtype@ c3) { @@ -1269,7 +1269,7 @@ static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ @vtype@ * #cvtps_epi32 = _mm256_cvtps_epi32, # */ -#if HAVE_ATTRIBUTE_TARGET_@ISA@ +#if defined HAVE_ATTRIBUTE_TARGET_@ISA@ && defined NPY_HAVE_@ISA@_INTRINSICS /* * Vectorized implementation of exp using AVX2 and AVX512: |