BUG: Fixing AVX512F build issues on clang6.0

clang6.0 fails to compile this code: __asm__ __volatile__ ( "vpaddd %zmm1, %zmm2, %zmm3\n\t" ); Note that this is a known issue in clang6.0. clang7.0 and gcc does not have this problem. This fails to set the flag HAVE_LINK_AVX512F. Hence, the AVX512F version of exp and log doesn't get built. If AVX512F is detected during runtime, instead of choosing to run the AVX2 version, it will end up running scalar version.
author: Raghuveer Devulapalli <raghuveer.devulapalli@intel.com> 2019-03-27 13:55:37 -0700
committer: Raghuveer Devulapalli <raghuveer.devulapalli@intel.com> 2019-04-19 10:47:15 -0700
commit: f9d14627b36bc25aace6c78e6e5f6fe68c08bfcb (patch)
tree: 629cc3c9da87ba3fe1c910c332bb670ad4d909f2 /numpy
parent: 1352359095cbf64c6ad3426ed674eb61cf47e258 (diff)
download: numpy-f9d14627b36bc25aace6c78e6e5f6fe68c08bfcb.tar.gz
2 files changed, 22 insertions, 9 deletions
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
index 024d495cd..a9526c2ed 100644
--- a/numpy/core/src/umath/loops.c.src
+++ b/numpy/core/src/umath/loops.c.src
@@ -1588,21 +1588,34 @@ FLOAT_@func@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSE
 /**begin repeat
  * #isa = avx512f, avx2#
  * #ISA = AVX512F, AVX2#
- * #CHK = HAVE_ATTRIBUTE_TARGET_AVX512F, HAVE_ATTRIBUTE_TARGET_AVX2#
- * #ATTR = NPY_GCC_TARGET_AVX512F, NPY_GCC_TARGET_AVX2#
+ * #CHK1 = HAVE_ATTRIBUTE_TARGET_AVX512F, HAVE_ATTRIBUTE_TARGET_AVX2#
+ * #CHK2 = NPY_HAVE_AVX512F_INTRINSICS, NPY_HAVE_AVX2_INTRINSICS#
  */
 
 /**begin repeat1
  *  #func = exp, log#
+ *  #scalarf = npy_expf, npy_logf#
  */
 
-#if @CHK@
-NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void
+NPY_NO_EXPORT NPY_GCC_OPT_3 void
 FLOAT_@func@_@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data))
 {
+#if defined @CHK1@ && defined @CHK2@
     @ISA@_@func@_FLOAT((npy_float*)args[1], (npy_float*)args[0], dimensions[0]);
-}
+#else
+    /*
+     * This is the path it would take if ISA was runtime detected, but not
+     * compiled for. It fixes the error on clang6.0 which fails to compile
+     * AVX512F version. Not sure if I like this idea, if during runtime it
+     * detects AXV512F, it will end up running the scalar version instead
+     * of AVX2.
+     */
+    UNARY_LOOP {
+	const npy_float in1 = *(npy_float *)ip1;
+	*(npy_float *)op1 = @scalarf@(in1);
+    }
 #endif
+}
 
 /**end repeat1**/
 /**end repeat**/
diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src
index 0fa98a68a..31f11d302 100644
--- a/numpy/core/src/umath/simd.inc.src
+++ b/numpy/core/src/umath/simd.inc.src
@@ -1098,7 +1098,7 @@ sse2_@kind@_@TYPE@(@type@ * ip, @type@ * op, const npy_intp n)
 
 /* bunch of helper functions used in ISA_exp/log_FLOAT*/
 
-#if HAVE_ATTRIBUTE_TARGET_AVX2
+#if defined HAVE_ATTRIBUTE_TARGET_AVX2 && defined NPY_HAVE_AVX2_INTRINSICS
 static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX2 __m256
 avx2_fmadd(__m256 a, __m256 b, __m256 c)
 {
@@ -1186,7 +1186,7 @@ avx2_get_mantissa(__m256 x)
 }
 #endif
 
-#if HAVE_ATTRIBUTE_TARGET_AVX512F
+#if defined HAVE_ATTRIBUTE_TARGET_AVX512F && defined NPY_HAVE_AVX512F_INTRINSICS
 static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F __mmask16
 avx512_get_full_load_mask(void)
 {
@@ -1241,7 +1241,7 @@ avx512_get_mantissa(__m512 x)
  * #fmadd = avx2_fmadd,_mm512_fmadd_ps#
  **/
 
-#if HAVE_ATTRIBUTE_TARGET_@ISA@
+#if defined HAVE_ATTRIBUTE_TARGET_@ISA@ && defined NPY_HAVE_@ISA@_INTRINSICS
 static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ @vtype@
 @isa@_range_reduction(@vtype@ x, @vtype@ y, @vtype@ c1, @vtype@ c2, @vtype@ c3)
 {
@@ -1269,7 +1269,7 @@ static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ @vtype@
  * #cvtps_epi32 = _mm256_cvtps_epi32, #
  */
 
-#if HAVE_ATTRIBUTE_TARGET_@ISA@
+#if defined HAVE_ATTRIBUTE_TARGET_@ISA@ && defined NPY_HAVE_@ISA@_INTRINSICS
 
 /*
  * Vectorized implementation of exp using AVX2 and AVX512:
author	Raghuveer Devulapalli <raghuveer.devulapalli@intel.com>	2019-03-27 13:55:37 -0700
committer	Raghuveer Devulapalli <raghuveer.devulapalli@intel.com>	2019-04-19 10:47:15 -0700
commit	f9d14627b36bc25aace6c78e6e5f6fe68c08bfcb (patch)
tree	629cc3c9da87ba3fe1c910c332bb670ad4d909f2 /numpy
parent	1352359095cbf64c6ad3426ed674eb61cf47e258 (diff)
download	numpy-f9d14627b36bc25aace6c78e6e5f6fe68c08bfcb.tar.gz