summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--numpy/core/src/umath/loops.c.src23
-rw-r--r--numpy/core/src/umath/simd.inc.src8
2 files changed, 22 insertions, 9 deletions
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
index 024d495cd..a9526c2ed 100644
--- a/numpy/core/src/umath/loops.c.src
+++ b/numpy/core/src/umath/loops.c.src
@@ -1588,21 +1588,34 @@ FLOAT_@func@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSE
/**begin repeat
* #isa = avx512f, avx2#
* #ISA = AVX512F, AVX2#
- * #CHK = HAVE_ATTRIBUTE_TARGET_AVX512F, HAVE_ATTRIBUTE_TARGET_AVX2#
- * #ATTR = NPY_GCC_TARGET_AVX512F, NPY_GCC_TARGET_AVX2#
+ * #CHK1 = HAVE_ATTRIBUTE_TARGET_AVX512F, HAVE_ATTRIBUTE_TARGET_AVX2#
+ * #CHK2 = NPY_HAVE_AVX512F_INTRINSICS, NPY_HAVE_AVX2_INTRINSICS#
*/
/**begin repeat1
* #func = exp, log#
+ * #scalarf = npy_expf, npy_logf#
*/
-#if @CHK@
-NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void
+NPY_NO_EXPORT NPY_GCC_OPT_3 void
FLOAT_@func@_@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data))
{
+#if defined @CHK1@ && defined @CHK2@
@ISA@_@func@_FLOAT((npy_float*)args[1], (npy_float*)args[0], dimensions[0]);
-}
+#else
+ /*
+ * This is the path it would take if ISA was runtime detected, but not
+ * compiled for. It fixes the error on clang6.0 which fails to compile
+ * AVX512F version. Not sure if I like this idea, if during runtime it
+ * detects AXV512F, it will end up running the scalar version instead
+ * of AVX2.
+ */
+ UNARY_LOOP {
+ const npy_float in1 = *(npy_float *)ip1;
+ *(npy_float *)op1 = @scalarf@(in1);
+ }
#endif
+}
/**end repeat1**/
/**end repeat**/
diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src
index 0fa98a68a..31f11d302 100644
--- a/numpy/core/src/umath/simd.inc.src
+++ b/numpy/core/src/umath/simd.inc.src
@@ -1098,7 +1098,7 @@ sse2_@kind@_@TYPE@(@type@ * ip, @type@ * op, const npy_intp n)
/* bunch of helper functions used in ISA_exp/log_FLOAT*/
-#if HAVE_ATTRIBUTE_TARGET_AVX2
+#if defined HAVE_ATTRIBUTE_TARGET_AVX2 && defined NPY_HAVE_AVX2_INTRINSICS
static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX2 __m256
avx2_fmadd(__m256 a, __m256 b, __m256 c)
{
@@ -1186,7 +1186,7 @@ avx2_get_mantissa(__m256 x)
}
#endif
-#if HAVE_ATTRIBUTE_TARGET_AVX512F
+#if defined HAVE_ATTRIBUTE_TARGET_AVX512F && defined NPY_HAVE_AVX512F_INTRINSICS
static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F __mmask16
avx512_get_full_load_mask(void)
{
@@ -1241,7 +1241,7 @@ avx512_get_mantissa(__m512 x)
* #fmadd = avx2_fmadd,_mm512_fmadd_ps#
**/
-#if HAVE_ATTRIBUTE_TARGET_@ISA@
+#if defined HAVE_ATTRIBUTE_TARGET_@ISA@ && defined NPY_HAVE_@ISA@_INTRINSICS
static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ @vtype@
@isa@_range_reduction(@vtype@ x, @vtype@ y, @vtype@ c1, @vtype@ c2, @vtype@ c3)
{
@@ -1269,7 +1269,7 @@ static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ @vtype@
* #cvtps_epi32 = _mm256_cvtps_epi32, #
*/
-#if HAVE_ATTRIBUTE_TARGET_@ISA@
+#if defined HAVE_ATTRIBUTE_TARGET_@ISA@ && defined NPY_HAVE_@ISA@_INTRINSICS
/*
* Vectorized implementation of exp using AVX2 and AVX512: