summaryrefslogtreecommitdiff
path: root/numpy/core
diff options
context:
space:
mode:
authorRaghuveer Devulapalli <raghuveer.devulapalli@intel.com>2019-03-28 15:07:35 -0700
committerRaghuveer Devulapalli <raghuveer.devulapalli@intel.com>2019-04-19 10:47:15 -0700
commit651e03c0019d4c4c6ca8c43cb7d7c0d344a72cc1 (patch)
treef3d3333fa3427ba350344895d6bb850b8d5ca3ff /numpy/core
parentf9d14627b36bc25aace6c78e6e5f6fe68c08bfcb (diff)
downloadnumpy-651e03c0019d4c4c6ca8c43cb7d7c0d344a72cc1.tar.gz
BUG: Adding macro HAVE_ATTRIBUTE_TARGET_@ISA@_WITH_INTRINSICS
1) use __builtin_cpu_supports("avx512f") only for gcc ver >= 5 2) Introduced two new macro's: HAVE_ATTRIBUTE_TARGET_@ISA@_WITH_INTRINSICS for ensuring compiler can compile functions that use intrinsics and are compiled with avx2/avx512f attributes
Diffstat (limited to 'numpy/core')
-rw-r--r--numpy/core/include/numpy/npy_common.h4
-rw-r--r--numpy/core/setup.py5
-rw-r--r--numpy/core/setup_common.py15
-rw-r--r--numpy/core/src/umath/cpuid.c4
-rw-r--r--numpy/core/src/umath/loops.c.src5
-rw-r--r--numpy/core/src/umath/simd.inc.src10
6 files changed, 35 insertions, 8 deletions
diff --git a/numpy/core/include/numpy/npy_common.h b/numpy/core/include/numpy/npy_common.h
index d83080160..108c0a202 100644
--- a/numpy/core/include/numpy/npy_common.h
+++ b/numpy/core/include/numpy/npy_common.h
@@ -46,12 +46,16 @@
#endif
#if defined HAVE_ATTRIBUTE_TARGET_AVX2 && defined HAVE_LINK_AVX2
#define NPY_GCC_TARGET_AVX2 __attribute__((target("avx2")))
+#elif defined HAVE_ATTRIBUTE_TARGET_AVX2_WITH_INTRINSICS
+#define NPY_GCC_TARGET_AVX2 __attribute__((target("avx2")))
#else
#define NPY_GCC_TARGET_AVX2
#endif
#if defined HAVE_ATTRIBUTE_TARGET_AVX512F && defined HAVE_LINK_AVX512F
#define NPY_GCC_TARGET_AVX512F __attribute__((target("avx512f")))
+#elif defined HAVE_ATTRIBUTE_TARGET_AVX512F_WITH_INTRINSICS
+#define NPY_GCC_TARGET_AVX512F __attribute__((target("avx512f")))
#else
#define NPY_GCC_TARGET_AVX512F
#endif
diff --git a/numpy/core/setup.py b/numpy/core/setup.py
index 2bcd17f27..9f1ecf358 100644
--- a/numpy/core/setup.py
+++ b/numpy/core/setup.py
@@ -171,6 +171,11 @@ def check_math_capabilities(config, moredefs, mathlibs):
if config.check_gcc_function_attribute(dec, fn):
moredefs.append((fname2def(fn), 1))
+ for dec, fn, code, header in OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS:
+ if config.check_gcc_function_attribute_with_intrinsics(dec, fn, code,
+ header):
+ moredefs.append((fname2def(fn), 1))
+
for fn in OPTIONAL_VARIABLE_ATTRIBUTES:
if config.check_gcc_variable_attribute(fn):
m = fn.replace("(", "_").replace(")", "_")
diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py
index a9c044da9..885aec443 100644
--- a/numpy/core/setup_common.py
+++ b/numpy/core/setup_common.py
@@ -172,6 +172,21 @@ OPTIONAL_FUNCTION_ATTRIBUTES = [('__attribute__((optimize("unroll-loops")))',
'attribute_target_avx512f'),
]
+# function attributes with intrinsics
+# To ensure your compiler can compile avx intrinsics with just the attributes
+# gcc 4.8.4 support attributes but not with intrisics
+# tested via "#include<%s> int %s %s(void *){code; return 0;};" % (header, attribute, name, code)
+# function name will be converted to HAVE_<upper-case-name> preprocessor macro
+OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS = [('__attribute__((target("avx2")))',
+ 'attribute_target_avx2_with_intrinsics',
+ '__m256 temp = _mm256_set1_ps(1.0)',
+ 'immintrin.h'),
+ ('__attribute__((target("avx512f")))',
+ 'attribute_target_avx512f_with_intrinsics',
+ '__m512 temp = _mm512_set1_ps(1.0)',
+ 'immintrin.h'),
+ ]
+
# variable attributes tested via "int %s a" % attribute
OPTIONAL_VARIABLE_ATTRIBUTES = ["__thread", "__declspec(thread)"]
diff --git a/numpy/core/src/umath/cpuid.c b/numpy/core/src/umath/cpuid.c
index ab97e7afc..51c540457 100644
--- a/numpy/core/src/umath/cpuid.c
+++ b/numpy/core/src/umath/cpuid.c
@@ -57,7 +57,11 @@ npy_cpu_supports(const char * feature)
{
#ifdef HAVE___BUILTIN_CPU_SUPPORTS
if (strcmp(feature, "avx512f") == 0) {
+#if defined(__GNUC__) && (__GNUC__ < 5)
+ return 0;
+#else
return __builtin_cpu_supports("avx512f") && os_avx512_support();
+#endif
}
else if (strcmp(feature, "avx2") == 0) {
return __builtin_cpu_supports("avx2") && os_avx_support();
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
index a9526c2ed..8a9cc58ab 100644
--- a/numpy/core/src/umath/loops.c.src
+++ b/numpy/core/src/umath/loops.c.src
@@ -1588,8 +1588,7 @@ FLOAT_@func@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSE
/**begin repeat
* #isa = avx512f, avx2#
* #ISA = AVX512F, AVX2#
- * #CHK1 = HAVE_ATTRIBUTE_TARGET_AVX512F, HAVE_ATTRIBUTE_TARGET_AVX2#
- * #CHK2 = NPY_HAVE_AVX512F_INTRINSICS, NPY_HAVE_AVX2_INTRINSICS#
+ * #CHK = HAVE_ATTRIBUTE_TARGET_AVX512F_WITH_INTRINSICS, HAVE_ATTRIBUTE_TARGET_AVX2_WITH_INTRINSICS#
*/
/**begin repeat1
@@ -1600,7 +1599,7 @@ FLOAT_@func@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSE
NPY_NO_EXPORT NPY_GCC_OPT_3 void
FLOAT_@func@_@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data))
{
-#if defined @CHK1@ && defined @CHK2@
+#if defined @CHK@
@ISA@_@func@_FLOAT((npy_float*)args[1], (npy_float*)args[0], dimensions[0]);
#else
/*
diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src
index 31f11d302..9e491b407 100644
--- a/numpy/core/src/umath/simd.inc.src
+++ b/numpy/core/src/umath/simd.inc.src
@@ -126,7 +126,7 @@ abs_ptrdiff(char *a, char *b)
*/
/* prototypes */
-#if defined NPY_HAVE_@ISA@_INTRINSICS
+#if defined HAVE_ATTRIBUTE_TARGET_@ISA@_WITH_INTRINSICS
/**begin repeat1
* #func = exp, log#
@@ -1098,7 +1098,7 @@ sse2_@kind@_@TYPE@(@type@ * ip, @type@ * op, const npy_intp n)
/* bunch of helper functions used in ISA_exp/log_FLOAT*/
-#if defined HAVE_ATTRIBUTE_TARGET_AVX2 && defined NPY_HAVE_AVX2_INTRINSICS
+#if defined HAVE_ATTRIBUTE_TARGET_AVX2_WITH_INTRINSICS
static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX2 __m256
avx2_fmadd(__m256 a, __m256 b, __m256 c)
{
@@ -1186,7 +1186,7 @@ avx2_get_mantissa(__m256 x)
}
#endif
-#if defined HAVE_ATTRIBUTE_TARGET_AVX512F && defined NPY_HAVE_AVX512F_INTRINSICS
+#if defined HAVE_ATTRIBUTE_TARGET_AVX512F_WITH_INTRINSICS
static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F __mmask16
avx512_get_full_load_mask(void)
{
@@ -1241,7 +1241,7 @@ avx512_get_mantissa(__m512 x)
* #fmadd = avx2_fmadd,_mm512_fmadd_ps#
**/
-#if defined HAVE_ATTRIBUTE_TARGET_@ISA@ && defined NPY_HAVE_@ISA@_INTRINSICS
+#if defined HAVE_ATTRIBUTE_TARGET_@ISA@_WITH_INTRINSICS
static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ @vtype@
@isa@_range_reduction(@vtype@ x, @vtype@ y, @vtype@ c1, @vtype@ c2, @vtype@ c3)
{
@@ -1269,7 +1269,6 @@ static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ @vtype@
* #cvtps_epi32 = _mm256_cvtps_epi32, #
*/
-#if defined HAVE_ATTRIBUTE_TARGET_@ISA@ && defined NPY_HAVE_@ISA@_INTRINSICS
/*
* Vectorized implementation of exp using AVX2 and AVX512:
@@ -1286,6 +1285,7 @@ static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ @vtype@
* same x = 0xc2781e37)
*/
+#if defined HAVE_ATTRIBUTE_TARGET_@ISA@_WITH_INTRINSICS
static NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ void
@ISA@_exp_FLOAT(npy_float * op, npy_float * ip, const npy_int array_size)
{