diff options
author | Raghuveer Devulapalli <raghuveer.devulapalli@intel.com> | 2019-03-27 13:55:37 -0700 |
---|---|---|
committer | Raghuveer Devulapalli <raghuveer.devulapalli@intel.com> | 2019-04-19 10:47:15 -0700 |
commit | f9d14627b36bc25aace6c78e6e5f6fe68c08bfcb (patch) | |
tree | 629cc3c9da87ba3fe1c910c332bb670ad4d909f2 /numpy/core | |
parent | 1352359095cbf64c6ad3426ed674eb61cf47e258 (diff) | |
download | numpy-f9d14627b36bc25aace6c78e6e5f6fe68c08bfcb.tar.gz |
BUG: Fixing AVX512F build issues on clang6.0
clang6.0 fails to compile this code:
__asm__ __volatile__ (
"vpaddd %zmm1, %zmm2, %zmm3\n\t"
);
Note that this is a known issue in clang6.0. clang7.0 and gcc does not
have this problem. This fails to set the flag HAVE_LINK_AVX512F. Hence,
the AVX512F version of exp and log doesn't get built. If AVX512F is
detected during runtime, instead of choosing to run the AVX2 version, it
will end up running scalar version.
Diffstat (limited to 'numpy/core')
-rw-r--r-- | numpy/core/src/umath/loops.c.src | 23 | ||||
-rw-r--r-- | numpy/core/src/umath/simd.inc.src | 8 |
2 files changed, 22 insertions, 9 deletions
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src index 024d495cd..a9526c2ed 100644 --- a/numpy/core/src/umath/loops.c.src +++ b/numpy/core/src/umath/loops.c.src @@ -1588,21 +1588,34 @@ FLOAT_@func@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSE /**begin repeat * #isa = avx512f, avx2# * #ISA = AVX512F, AVX2# - * #CHK = HAVE_ATTRIBUTE_TARGET_AVX512F, HAVE_ATTRIBUTE_TARGET_AVX2# - * #ATTR = NPY_GCC_TARGET_AVX512F, NPY_GCC_TARGET_AVX2# + * #CHK1 = HAVE_ATTRIBUTE_TARGET_AVX512F, HAVE_ATTRIBUTE_TARGET_AVX2# + * #CHK2 = NPY_HAVE_AVX512F_INTRINSICS, NPY_HAVE_AVX2_INTRINSICS# */ /**begin repeat1 * #func = exp, log# + * #scalarf = npy_expf, npy_logf# */ -#if @CHK@ -NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void +NPY_NO_EXPORT NPY_GCC_OPT_3 void FLOAT_@func@_@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)) { +#if defined @CHK1@ && defined @CHK2@ @ISA@_@func@_FLOAT((npy_float*)args[1], (npy_float*)args[0], dimensions[0]); -} +#else + /* + * This is the path it would take if ISA was runtime detected, but not + * compiled for. It fixes the error on clang6.0 which fails to compile + * AVX512F version. Not sure if I like this idea, if during runtime it + * detects AXV512F, it will end up running the scalar version instead + * of AVX2. + */ + UNARY_LOOP { + const npy_float in1 = *(npy_float *)ip1; + *(npy_float *)op1 = @scalarf@(in1); + } #endif +} /**end repeat1**/ /**end repeat**/ diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src index 0fa98a68a..31f11d302 100644 --- a/numpy/core/src/umath/simd.inc.src +++ b/numpy/core/src/umath/simd.inc.src @@ -1098,7 +1098,7 @@ sse2_@kind@_@TYPE@(@type@ * ip, @type@ * op, const npy_intp n) /* bunch of helper functions used in ISA_exp/log_FLOAT*/ -#if HAVE_ATTRIBUTE_TARGET_AVX2 +#if defined HAVE_ATTRIBUTE_TARGET_AVX2 && defined NPY_HAVE_AVX2_INTRINSICS static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX2 __m256 avx2_fmadd(__m256 a, __m256 b, __m256 c) { @@ -1186,7 +1186,7 @@ avx2_get_mantissa(__m256 x) } #endif -#if HAVE_ATTRIBUTE_TARGET_AVX512F +#if defined HAVE_ATTRIBUTE_TARGET_AVX512F && defined NPY_HAVE_AVX512F_INTRINSICS static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F __mmask16 avx512_get_full_load_mask(void) { @@ -1241,7 +1241,7 @@ avx512_get_mantissa(__m512 x) * #fmadd = avx2_fmadd,_mm512_fmadd_ps# **/ -#if HAVE_ATTRIBUTE_TARGET_@ISA@ +#if defined HAVE_ATTRIBUTE_TARGET_@ISA@ && defined NPY_HAVE_@ISA@_INTRINSICS static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ @vtype@ @isa@_range_reduction(@vtype@ x, @vtype@ y, @vtype@ c1, @vtype@ c2, @vtype@ c3) { @@ -1269,7 +1269,7 @@ static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ @vtype@ * #cvtps_epi32 = _mm256_cvtps_epi32, # */ -#if HAVE_ATTRIBUTE_TARGET_@ISA@ +#if defined HAVE_ATTRIBUTE_TARGET_@ISA@ && defined NPY_HAVE_@ISA@_INTRINSICS /* * Vectorized implementation of exp using AVX2 and AVX512: |