diff options
author | Raghuveer Devulapalli <raghuveer.devulapalli@intel.com> | 2019-03-28 15:07:35 -0700 |
---|---|---|
committer | Raghuveer Devulapalli <raghuveer.devulapalli@intel.com> | 2019-04-19 10:47:15 -0700 |
commit | 651e03c0019d4c4c6ca8c43cb7d7c0d344a72cc1 (patch) | |
tree | f3d3333fa3427ba350344895d6bb850b8d5ca3ff /numpy/core | |
parent | f9d14627b36bc25aace6c78e6e5f6fe68c08bfcb (diff) | |
download | numpy-651e03c0019d4c4c6ca8c43cb7d7c0d344a72cc1.tar.gz |
BUG: Adding macro HAVE_ATTRIBUTE_TARGET_@ISA@_WITH_INTRINSICS
1) use __builtin_cpu_supports("avx512f") only for gcc ver >= 5
2) Introduced two new macro's:
HAVE_ATTRIBUTE_TARGET_@ISA@_WITH_INTRINSICS for ensuring compiler can
compile functions that use intrinsics and are compiled with avx2/avx512f
attributes
Diffstat (limited to 'numpy/core')
-rw-r--r-- | numpy/core/include/numpy/npy_common.h | 4 | ||||
-rw-r--r-- | numpy/core/setup.py | 5 | ||||
-rw-r--r-- | numpy/core/setup_common.py | 15 | ||||
-rw-r--r-- | numpy/core/src/umath/cpuid.c | 4 | ||||
-rw-r--r-- | numpy/core/src/umath/loops.c.src | 5 | ||||
-rw-r--r-- | numpy/core/src/umath/simd.inc.src | 10 |
6 files changed, 35 insertions, 8 deletions
diff --git a/numpy/core/include/numpy/npy_common.h b/numpy/core/include/numpy/npy_common.h index d83080160..108c0a202 100644 --- a/numpy/core/include/numpy/npy_common.h +++ b/numpy/core/include/numpy/npy_common.h @@ -46,12 +46,16 @@ #endif #if defined HAVE_ATTRIBUTE_TARGET_AVX2 && defined HAVE_LINK_AVX2 #define NPY_GCC_TARGET_AVX2 __attribute__((target("avx2"))) +#elif defined HAVE_ATTRIBUTE_TARGET_AVX2_WITH_INTRINSICS +#define NPY_GCC_TARGET_AVX2 __attribute__((target("avx2"))) #else #define NPY_GCC_TARGET_AVX2 #endif #if defined HAVE_ATTRIBUTE_TARGET_AVX512F && defined HAVE_LINK_AVX512F #define NPY_GCC_TARGET_AVX512F __attribute__((target("avx512f"))) +#elif defined HAVE_ATTRIBUTE_TARGET_AVX512F_WITH_INTRINSICS +#define NPY_GCC_TARGET_AVX512F __attribute__((target("avx512f"))) #else #define NPY_GCC_TARGET_AVX512F #endif diff --git a/numpy/core/setup.py b/numpy/core/setup.py index 2bcd17f27..9f1ecf358 100644 --- a/numpy/core/setup.py +++ b/numpy/core/setup.py @@ -171,6 +171,11 @@ def check_math_capabilities(config, moredefs, mathlibs): if config.check_gcc_function_attribute(dec, fn): moredefs.append((fname2def(fn), 1)) + for dec, fn, code, header in OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS: + if config.check_gcc_function_attribute_with_intrinsics(dec, fn, code, + header): + moredefs.append((fname2def(fn), 1)) + for fn in OPTIONAL_VARIABLE_ATTRIBUTES: if config.check_gcc_variable_attribute(fn): m = fn.replace("(", "_").replace(")", "_") diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py index a9c044da9..885aec443 100644 --- a/numpy/core/setup_common.py +++ b/numpy/core/setup_common.py @@ -172,6 +172,21 @@ OPTIONAL_FUNCTION_ATTRIBUTES = [('__attribute__((optimize("unroll-loops")))', 'attribute_target_avx512f'), ] +# function attributes with intrinsics +# To ensure your compiler can compile avx intrinsics with just the attributes +# gcc 4.8.4 support attributes but not with intrisics +# tested via "#include<%s> int %s %s(void *){code; return 0;};" % (header, attribute, name, code) +# function name will be converted to HAVE_<upper-case-name> preprocessor macro +OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS = [('__attribute__((target("avx2")))', + 'attribute_target_avx2_with_intrinsics', + '__m256 temp = _mm256_set1_ps(1.0)', + 'immintrin.h'), + ('__attribute__((target("avx512f")))', + 'attribute_target_avx512f_with_intrinsics', + '__m512 temp = _mm512_set1_ps(1.0)', + 'immintrin.h'), + ] + # variable attributes tested via "int %s a" % attribute OPTIONAL_VARIABLE_ATTRIBUTES = ["__thread", "__declspec(thread)"] diff --git a/numpy/core/src/umath/cpuid.c b/numpy/core/src/umath/cpuid.c index ab97e7afc..51c540457 100644 --- a/numpy/core/src/umath/cpuid.c +++ b/numpy/core/src/umath/cpuid.c @@ -57,7 +57,11 @@ npy_cpu_supports(const char * feature) { #ifdef HAVE___BUILTIN_CPU_SUPPORTS if (strcmp(feature, "avx512f") == 0) { +#if defined(__GNUC__) && (__GNUC__ < 5) + return 0; +#else return __builtin_cpu_supports("avx512f") && os_avx512_support(); +#endif } else if (strcmp(feature, "avx2") == 0) { return __builtin_cpu_supports("avx2") && os_avx_support(); diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src index a9526c2ed..8a9cc58ab 100644 --- a/numpy/core/src/umath/loops.c.src +++ b/numpy/core/src/umath/loops.c.src @@ -1588,8 +1588,7 @@ FLOAT_@func@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSE /**begin repeat * #isa = avx512f, avx2# * #ISA = AVX512F, AVX2# - * #CHK1 = HAVE_ATTRIBUTE_TARGET_AVX512F, HAVE_ATTRIBUTE_TARGET_AVX2# - * #CHK2 = NPY_HAVE_AVX512F_INTRINSICS, NPY_HAVE_AVX2_INTRINSICS# + * #CHK = HAVE_ATTRIBUTE_TARGET_AVX512F_WITH_INTRINSICS, HAVE_ATTRIBUTE_TARGET_AVX2_WITH_INTRINSICS# */ /**begin repeat1 @@ -1600,7 +1599,7 @@ FLOAT_@func@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSE NPY_NO_EXPORT NPY_GCC_OPT_3 void FLOAT_@func@_@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)) { -#if defined @CHK1@ && defined @CHK2@ +#if defined @CHK@ @ISA@_@func@_FLOAT((npy_float*)args[1], (npy_float*)args[0], dimensions[0]); #else /* diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src index 31f11d302..9e491b407 100644 --- a/numpy/core/src/umath/simd.inc.src +++ b/numpy/core/src/umath/simd.inc.src @@ -126,7 +126,7 @@ abs_ptrdiff(char *a, char *b) */ /* prototypes */ -#if defined NPY_HAVE_@ISA@_INTRINSICS +#if defined HAVE_ATTRIBUTE_TARGET_@ISA@_WITH_INTRINSICS /**begin repeat1 * #func = exp, log# @@ -1098,7 +1098,7 @@ sse2_@kind@_@TYPE@(@type@ * ip, @type@ * op, const npy_intp n) /* bunch of helper functions used in ISA_exp/log_FLOAT*/ -#if defined HAVE_ATTRIBUTE_TARGET_AVX2 && defined NPY_HAVE_AVX2_INTRINSICS +#if defined HAVE_ATTRIBUTE_TARGET_AVX2_WITH_INTRINSICS static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX2 __m256 avx2_fmadd(__m256 a, __m256 b, __m256 c) { @@ -1186,7 +1186,7 @@ avx2_get_mantissa(__m256 x) } #endif -#if defined HAVE_ATTRIBUTE_TARGET_AVX512F && defined NPY_HAVE_AVX512F_INTRINSICS +#if defined HAVE_ATTRIBUTE_TARGET_AVX512F_WITH_INTRINSICS static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F __mmask16 avx512_get_full_load_mask(void) { @@ -1241,7 +1241,7 @@ avx512_get_mantissa(__m512 x) * #fmadd = avx2_fmadd,_mm512_fmadd_ps# **/ -#if defined HAVE_ATTRIBUTE_TARGET_@ISA@ && defined NPY_HAVE_@ISA@_INTRINSICS +#if defined HAVE_ATTRIBUTE_TARGET_@ISA@_WITH_INTRINSICS static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ @vtype@ @isa@_range_reduction(@vtype@ x, @vtype@ y, @vtype@ c1, @vtype@ c2, @vtype@ c3) { @@ -1269,7 +1269,6 @@ static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ @vtype@ * #cvtps_epi32 = _mm256_cvtps_epi32, # */ -#if defined HAVE_ATTRIBUTE_TARGET_@ISA@ && defined NPY_HAVE_@ISA@_INTRINSICS /* * Vectorized implementation of exp using AVX2 and AVX512: @@ -1286,6 +1285,7 @@ static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ @vtype@ * same x = 0xc2781e37) */ +#if defined HAVE_ATTRIBUTE_TARGET_@ISA@_WITH_INTRINSICS static NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ void @ISA@_exp_FLOAT(npy_float * op, npy_float * ip, const npy_int array_size) { |