BUG: Adding macro HAVE_ATTRIBUTE_TARGET_@ISA@_WITH_INTRINSICS

1) use __builtin_cpu_supports("avx512f") only for gcc ver >= 5 2) Introduced two new macro's: HAVE_ATTRIBUTE_TARGET_@ISA@_WITH_INTRINSICS for ensuring compiler can compile functions that use intrinsics and are compiled with avx2/avx512f attributes
author: Raghuveer Devulapalli <raghuveer.devulapalli@intel.com> 2019-03-28 15:07:35 -0700
committer: Raghuveer Devulapalli <raghuveer.devulapalli@intel.com> 2019-04-19 10:47:15 -0700
commit: 651e03c0019d4c4c6ca8c43cb7d7c0d344a72cc1 (patch)
tree: f3d3333fa3427ba350344895d6bb850b8d5ca3ff /numpy/core
parent: f9d14627b36bc25aace6c78e6e5f6fe68c08bfcb (diff)
download: numpy-651e03c0019d4c4c6ca8c43cb7d7c0d344a72cc1.tar.gz
6 files changed, 35 insertions, 8 deletions
diff --git a/numpy/core/include/numpy/npy_common.h b/numpy/core/include/numpy/npy_common.h
index d83080160..108c0a202 100644
--- a/numpy/core/include/numpy/npy_common.h
+++ b/numpy/core/include/numpy/npy_common.h
@@ -46,12 +46,16 @@
 #endif
 #if defined HAVE_ATTRIBUTE_TARGET_AVX2 && defined HAVE_LINK_AVX2
 #define NPY_GCC_TARGET_AVX2 __attribute__((target("avx2")))
+#elif defined HAVE_ATTRIBUTE_TARGET_AVX2_WITH_INTRINSICS
+#define NPY_GCC_TARGET_AVX2 __attribute__((target("avx2")))
 #else
 #define NPY_GCC_TARGET_AVX2
 #endif
 
 #if defined HAVE_ATTRIBUTE_TARGET_AVX512F && defined HAVE_LINK_AVX512F
 #define NPY_GCC_TARGET_AVX512F __attribute__((target("avx512f")))
+#elif defined HAVE_ATTRIBUTE_TARGET_AVX512F_WITH_INTRINSICS
+#define NPY_GCC_TARGET_AVX512F __attribute__((target("avx512f")))
 #else
 #define NPY_GCC_TARGET_AVX512F
 #endif
diff --git a/numpy/core/setup.py b/numpy/core/setup.py
index 2bcd17f27..9f1ecf358 100644
--- a/numpy/core/setup.py
+++ b/numpy/core/setup.py
@@ -171,6 +171,11 @@ def check_math_capabilities(config, moredefs, mathlibs):
         if config.check_gcc_function_attribute(dec, fn):
             moredefs.append((fname2def(fn), 1))
 
+    for dec, fn, code, header in OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS:
+        if config.check_gcc_function_attribute_with_intrinsics(dec, fn, code,
+                                                               header):
+            moredefs.append((fname2def(fn), 1))
+
     for fn in OPTIONAL_VARIABLE_ATTRIBUTES:
         if config.check_gcc_variable_attribute(fn):
             m = fn.replace("(", "_").replace(")", "_")
diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py
index a9c044da9..885aec443 100644
--- a/numpy/core/setup_common.py
+++ b/numpy/core/setup_common.py
@@ -172,6 +172,21 @@ OPTIONAL_FUNCTION_ATTRIBUTES = [('__attribute__((optimize("unroll-loops")))',
                                  'attribute_target_avx512f'),
                                 ]
 
+# function attributes with intrinsics
+# To ensure your compiler can compile avx intrinsics with just the attributes
+# gcc 4.8.4 support attributes but not with intrisics
+# tested via "#include<%s> int %s %s(void *){code; return 0;};" % (header, attribute, name, code)
+# function name will be converted to HAVE_<upper-case-name> preprocessor macro
+OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS = [('__attribute__((target("avx2")))',
+                                'attribute_target_avx2_with_intrinsics',
+                                '__m256 temp = _mm256_set1_ps(1.0)',
+                                'immintrin.h'),
+                                ('__attribute__((target("avx512f")))',
+                                'attribute_target_avx512f_with_intrinsics',
+                                '__m512 temp = _mm512_set1_ps(1.0)',
+                                'immintrin.h'),
+                                ]
+
 # variable attributes tested via "int %s a" % attribute
 OPTIONAL_VARIABLE_ATTRIBUTES = ["__thread", "__declspec(thread)"]
 
diff --git a/numpy/core/src/umath/cpuid.c b/numpy/core/src/umath/cpuid.c
index ab97e7afc..51c540457 100644
--- a/numpy/core/src/umath/cpuid.c
+++ b/numpy/core/src/umath/cpuid.c
@@ -57,7 +57,11 @@ npy_cpu_supports(const char * feature)
 {
 #ifdef HAVE___BUILTIN_CPU_SUPPORTS
     if (strcmp(feature, "avx512f") == 0) {
+#if defined(__GNUC__) && (__GNUC__ < 5)
+        return 0;
+#else
         return __builtin_cpu_supports("avx512f") && os_avx512_support();
+#endif
     }
     else if (strcmp(feature, "avx2") == 0) {
         return __builtin_cpu_supports("avx2") && os_avx_support();
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
index a9526c2ed..8a9cc58ab 100644
--- a/numpy/core/src/umath/loops.c.src
+++ b/numpy/core/src/umath/loops.c.src
@@ -1588,8 +1588,7 @@ FLOAT_@func@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSE
 /**begin repeat
  * #isa = avx512f, avx2#
  * #ISA = AVX512F, AVX2#
- * #CHK1 = HAVE_ATTRIBUTE_TARGET_AVX512F, HAVE_ATTRIBUTE_TARGET_AVX2#
- * #CHK2 = NPY_HAVE_AVX512F_INTRINSICS, NPY_HAVE_AVX2_INTRINSICS#
+ * #CHK = HAVE_ATTRIBUTE_TARGET_AVX512F_WITH_INTRINSICS, HAVE_ATTRIBUTE_TARGET_AVX2_WITH_INTRINSICS#
  */
 
 /**begin repeat1
@@ -1600,7 +1599,7 @@ FLOAT_@func@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSE
 NPY_NO_EXPORT NPY_GCC_OPT_3 void
 FLOAT_@func@_@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data))
 {
-#if defined @CHK1@ && defined @CHK2@
+#if defined @CHK@
     @ISA@_@func@_FLOAT((npy_float*)args[1], (npy_float*)args[0], dimensions[0]);
 #else
     /*
diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src
index 31f11d302..9e491b407 100644
--- a/numpy/core/src/umath/simd.inc.src
+++ b/numpy/core/src/umath/simd.inc.src
@@ -126,7 +126,7 @@ abs_ptrdiff(char *a, char *b)
  */
 
 /* prototypes */
-#if defined NPY_HAVE_@ISA@_INTRINSICS
+#if defined HAVE_ATTRIBUTE_TARGET_@ISA@_WITH_INTRINSICS
 
 /**begin repeat1
  * #func = exp, log#
@@ -1098,7 +1098,7 @@ sse2_@kind@_@TYPE@(@type@ * ip, @type@ * op, const npy_intp n)
 
 /* bunch of helper functions used in ISA_exp/log_FLOAT*/
 
-#if defined HAVE_ATTRIBUTE_TARGET_AVX2 && defined NPY_HAVE_AVX2_INTRINSICS
+#if defined HAVE_ATTRIBUTE_TARGET_AVX2_WITH_INTRINSICS
 static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX2 __m256
 avx2_fmadd(__m256 a, __m256 b, __m256 c)
 {
@@ -1186,7 +1186,7 @@ avx2_get_mantissa(__m256 x)
 }
 #endif
 
-#if defined HAVE_ATTRIBUTE_TARGET_AVX512F && defined NPY_HAVE_AVX512F_INTRINSICS
+#if defined HAVE_ATTRIBUTE_TARGET_AVX512F_WITH_INTRINSICS
 static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F __mmask16
 avx512_get_full_load_mask(void)
 {
@@ -1241,7 +1241,7 @@ avx512_get_mantissa(__m512 x)
  * #fmadd = avx2_fmadd,_mm512_fmadd_ps#
  **/
 
-#if defined HAVE_ATTRIBUTE_TARGET_@ISA@ && defined NPY_HAVE_@ISA@_INTRINSICS
+#if defined HAVE_ATTRIBUTE_TARGET_@ISA@_WITH_INTRINSICS
 static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ @vtype@
 @isa@_range_reduction(@vtype@ x, @vtype@ y, @vtype@ c1, @vtype@ c2, @vtype@ c3)
 {
@@ -1269,7 +1269,6 @@ static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ @vtype@
  * #cvtps_epi32 = _mm256_cvtps_epi32, #
  */
 
-#if defined HAVE_ATTRIBUTE_TARGET_@ISA@ && defined NPY_HAVE_@ISA@_INTRINSICS
 
 /*
  * Vectorized implementation of exp using AVX2 and AVX512:
@@ -1286,6 +1285,7 @@ static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ @vtype@
  * same x = 0xc2781e37)
  */
 
+#if defined HAVE_ATTRIBUTE_TARGET_@ISA@_WITH_INTRINSICS
 static NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ void
 @ISA@_exp_FLOAT(npy_float * op, npy_float * ip, const npy_int array_size)
 {
author	Raghuveer Devulapalli <raghuveer.devulapalli@intel.com>	2019-03-28 15:07:35 -0700
committer	Raghuveer Devulapalli <raghuveer.devulapalli@intel.com>	2019-04-19 10:47:15 -0700
commit	651e03c0019d4c4c6ca8c43cb7d7c0d344a72cc1 (patch)
tree	f3d3333fa3427ba350344895d6bb850b8d5ca3ff /numpy/core
parent	f9d14627b36bc25aace6c78e6e5f6fe68c08bfcb (diff)
download	numpy-651e03c0019d4c4c6ca8c43cb7d7c0d344a72cc1.tar.gz