summaryrefslogtreecommitdiff
path: root/numpy
diff options
context:
space:
mode:
authorRaghuveer Devulapalli <raghuveer.devulapalli@intel.com>2019-03-27 13:55:37 -0700
committerRaghuveer Devulapalli <raghuveer.devulapalli@intel.com>2019-04-19 10:47:15 -0700
commitf9d14627b36bc25aace6c78e6e5f6fe68c08bfcb (patch)
tree629cc3c9da87ba3fe1c910c332bb670ad4d909f2 /numpy
parent1352359095cbf64c6ad3426ed674eb61cf47e258 (diff)
downloadnumpy-f9d14627b36bc25aace6c78e6e5f6fe68c08bfcb.tar.gz
BUG: Fixing AVX512F build issues on clang6.0
clang6.0 fails to compile this code: __asm__ __volatile__ ( "vpaddd %zmm1, %zmm2, %zmm3\n\t" ); Note that this is a known issue in clang6.0. clang7.0 and gcc does not have this problem. This fails to set the flag HAVE_LINK_AVX512F. Hence, the AVX512F version of exp and log doesn't get built. If AVX512F is detected during runtime, instead of choosing to run the AVX2 version, it will end up running scalar version.
Diffstat (limited to 'numpy')
-rw-r--r--numpy/core/src/umath/loops.c.src23
-rw-r--r--numpy/core/src/umath/simd.inc.src8
2 files changed, 22 insertions, 9 deletions
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
index 024d495cd..a9526c2ed 100644
--- a/numpy/core/src/umath/loops.c.src
+++ b/numpy/core/src/umath/loops.c.src
@@ -1588,21 +1588,34 @@ FLOAT_@func@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSE
/**begin repeat
* #isa = avx512f, avx2#
* #ISA = AVX512F, AVX2#
- * #CHK = HAVE_ATTRIBUTE_TARGET_AVX512F, HAVE_ATTRIBUTE_TARGET_AVX2#
- * #ATTR = NPY_GCC_TARGET_AVX512F, NPY_GCC_TARGET_AVX2#
+ * #CHK1 = HAVE_ATTRIBUTE_TARGET_AVX512F, HAVE_ATTRIBUTE_TARGET_AVX2#
+ * #CHK2 = NPY_HAVE_AVX512F_INTRINSICS, NPY_HAVE_AVX2_INTRINSICS#
*/
/**begin repeat1
* #func = exp, log#
+ * #scalarf = npy_expf, npy_logf#
*/
-#if @CHK@
-NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void
+NPY_NO_EXPORT NPY_GCC_OPT_3 void
FLOAT_@func@_@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data))
{
+#if defined @CHK1@ && defined @CHK2@
@ISA@_@func@_FLOAT((npy_float*)args[1], (npy_float*)args[0], dimensions[0]);
-}
+#else
+ /*
+ * This is the path it would take if ISA was runtime detected, but not
+ * compiled for. It fixes the error on clang6.0 which fails to compile
+ * AVX512F version. Not sure if I like this idea, if during runtime it
+ * detects AXV512F, it will end up running the scalar version instead
+ * of AVX2.
+ */
+ UNARY_LOOP {
+ const npy_float in1 = *(npy_float *)ip1;
+ *(npy_float *)op1 = @scalarf@(in1);
+ }
#endif
+}
/**end repeat1**/
/**end repeat**/
diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src
index 0fa98a68a..31f11d302 100644
--- a/numpy/core/src/umath/simd.inc.src
+++ b/numpy/core/src/umath/simd.inc.src
@@ -1098,7 +1098,7 @@ sse2_@kind@_@TYPE@(@type@ * ip, @type@ * op, const npy_intp n)
/* bunch of helper functions used in ISA_exp/log_FLOAT*/
-#if HAVE_ATTRIBUTE_TARGET_AVX2
+#if defined HAVE_ATTRIBUTE_TARGET_AVX2 && defined NPY_HAVE_AVX2_INTRINSICS
static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX2 __m256
avx2_fmadd(__m256 a, __m256 b, __m256 c)
{
@@ -1186,7 +1186,7 @@ avx2_get_mantissa(__m256 x)
}
#endif
-#if HAVE_ATTRIBUTE_TARGET_AVX512F
+#if defined HAVE_ATTRIBUTE_TARGET_AVX512F && defined NPY_HAVE_AVX512F_INTRINSICS
static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F __mmask16
avx512_get_full_load_mask(void)
{
@@ -1241,7 +1241,7 @@ avx512_get_mantissa(__m512 x)
* #fmadd = avx2_fmadd,_mm512_fmadd_ps#
**/
-#if HAVE_ATTRIBUTE_TARGET_@ISA@
+#if defined HAVE_ATTRIBUTE_TARGET_@ISA@ && defined NPY_HAVE_@ISA@_INTRINSICS
static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ @vtype@
@isa@_range_reduction(@vtype@ x, @vtype@ y, @vtype@ c1, @vtype@ c2, @vtype@ c3)
{
@@ -1269,7 +1269,7 @@ static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ @vtype@
* #cvtps_epi32 = _mm256_cvtps_epi32, #
*/
-#if HAVE_ATTRIBUTE_TARGET_@ISA@
+#if defined HAVE_ATTRIBUTE_TARGET_@ISA@ && defined NPY_HAVE_@ISA@_INTRINSICS
/*
* Vectorized implementation of exp using AVX2 and AVX512: