summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSayed Adel <seiko@imavr.com>2023-02-03 12:41:50 +0200
committerSayed Adel <seiko@imavr.com>2023-02-20 04:07:15 +0200
commit866f41a85bddfa3ea6de551bb27f335b0f8a6a52 (patch)
tree4b2ad41ca058771c05ed7688ad8ee5a331fd05bc
parent86450a0cd79a8107453f1b9aeb629e52c388efcc (diff)
downloadnumpy-866f41a85bddfa3ea6de551bb27f335b0f8a6a52.tar.gz
MAINT, SIMD: Removes compiler definitions of attribute-based CPU dispatching
-rw-r--r--numpy/core/code_generators/generate_umath.py97
-rw-r--r--numpy/core/config.h.in16
-rw-r--r--numpy/core/include/numpy/npy_common.h50
-rw-r--r--numpy/core/meson.build27
-rw-r--r--numpy/core/setup.py25
-rw-r--r--numpy/core/setup_common.py56
-rw-r--r--numpy/core/src/umath/loops_exponent_log.dispatch.c.src2
7 files changed, 53 insertions, 220 deletions
diff --git a/numpy/core/code_generators/generate_umath.py b/numpy/core/code_generators/generate_umath.py
index 34fd0c9d1..a021c6c17 100644
--- a/numpy/core/code_generators/generate_umath.py
+++ b/numpy/core/code_generators/generate_umath.py
@@ -61,9 +61,6 @@ class TypeDescription:
cfunc_alias : str or none, optional
Appended to inner loop C function name, e.g., FLOAT_{cfunc_alias}. See make_arrays.
NOTE: it doesn't support 'astype'
- simd : list
- Available SIMD ufunc loops, dispatched at runtime in specified order
- Currently only supported for simples types (see make_arrays)
dispatch : str or None, optional
Dispatch-able source name without its extension '.dispatch.c' that
contains the definition of ufunc, dispatched at runtime depending on the
@@ -71,7 +68,7 @@ class TypeDescription:
NOTE: it doesn't support 'astype'
"""
def __init__(self, type, f=None, in_=None, out=None, astype=None, cfunc_alias=None,
- simd=None, dispatch=None):
+ dispatch=None):
self.type = type
self.func_data = f
if astype is None:
@@ -84,7 +81,6 @@ class TypeDescription:
out = out.replace('P', type)
self.out = out
self.cfunc_alias = cfunc_alias
- self.simd = simd
self.dispatch = dispatch
def finish_signature(self, nin, nout):
@@ -146,8 +142,9 @@ def build_func_data(types, f):
return func_data
def TD(types, f=None, astype=None, in_=None, out=None, cfunc_alias=None,
- simd=None, dispatch=None):
- """Generate a TypeDescription instance for each item in types
+ dispatch=None):
+ """
+ Generate a TypeDescription instance for each item in types
"""
if f is not None:
if isinstance(f, str):
@@ -172,12 +169,6 @@ def TD(types, f=None, astype=None, in_=None, out=None, cfunc_alias=None,
raise ValueError("Number of types and outputs do not match")
tds = []
for t, fd, i, o in zip(types, func_data, in_, out):
- # [(simd-name, list of types)]
- if simd is not None:
- simdt = [k for k, v in simd if t in v]
- else:
- simdt = []
-
# [(dispatch file name without extension '.dispatch.c*', list of types)]
if dispatch:
dispt = ([k for k, v in dispatch if t in v]+[None])[0]
@@ -185,7 +176,7 @@ def TD(types, f=None, astype=None, in_=None, out=None, cfunc_alias=None,
dispt = None
tds.append(TypeDescription(
t, f=fd, in_=i, out=o, astype=astype, cfunc_alias=cfunc_alias,
- simd=simdt, dispatch=dispt
+ dispatch=dispt
))
return tds
@@ -352,8 +343,10 @@ defdict = {
docstrings.get('numpy.core.umath.add'),
'PyUFunc_AdditionTypeResolver',
TD('?', cfunc_alias='logical_or', dispatch=[('loops_logical', '?')]),
- TD(no_bool_times_obj, simd=[('avx2', ints)],
- dispatch=[('loops_arithm_fp', 'fdFD')]),
+ TD(no_bool_times_obj, dispatch=[
+ ('loops_arithm_fp', 'fdFD'),
+ ('loops_autovec_int', ints),
+ ]),
[TypeDescription('M', FullTypeDescr, 'Mm', 'M'),
TypeDescription('m', FullTypeDescr, 'mm', 'm'),
TypeDescription('M', FullTypeDescr, 'mM', 'M'),
@@ -365,8 +358,10 @@ defdict = {
Ufunc(2, 1, None, # Zero is only a unit to the right, not the left
docstrings.get('numpy.core.umath.subtract'),
'PyUFunc_SubtractionTypeResolver',
- TD(no_bool_times_obj, simd=[('avx2', ints)],
- dispatch=[('loops_arithm_fp', 'fdFD')]),
+ TD(no_bool_times_obj, dispatch=[
+ ('loops_arithm_fp', 'fdFD'),
+ ('loops_autovec_int', ints),
+ ]),
[TypeDescription('M', FullTypeDescr, 'Mm', 'M'),
TypeDescription('m', FullTypeDescr, 'mm', 'm'),
TypeDescription('M', FullTypeDescr, 'MM', 'm'),
@@ -380,8 +375,10 @@ defdict = {
'PyUFunc_MultiplicationTypeResolver',
TD('?', cfunc_alias='logical_and',
dispatch=[('loops_logical', '?')]),
- TD(no_bool_times_obj, simd=[('avx2', ints)],
- dispatch=[('loops_arithm_fp', 'fdFD')]),
+ TD(no_bool_times_obj, dispatch=[
+ ('loops_arithm_fp', 'fdFD'),
+ ('loops_autovec_int', ints),
+ ]),
[TypeDescription('m', FullTypeDescr, 'mq', 'm'),
TypeDescription('m', FullTypeDescr, 'qm', 'm'),
TypeDescription('m', FullTypeDescr, 'md', 'm'),
@@ -421,8 +418,10 @@ defdict = {
Ufunc(1, 1, None,
docstrings.get('numpy.core.umath.conjugate'),
None,
- TD(ints+flts+cmplx, simd=[('avx2', ints)],
- dispatch=[('loops_arithm_fp', 'FD')]),
+ TD(ints+flts+cmplx, dispatch=[
+ ('loops_arithm_fp', 'FD'),
+ ('loops_autovec_int', ints),
+ ]),
TD(P, f='conjugate'),
),
'fmod':
@@ -437,15 +436,21 @@ defdict = {
Ufunc(1, 1, None,
docstrings.get('numpy.core.umath.square'),
None,
- TD(ints+inexact, simd=[('avx2', ints)],
- dispatch=[('loops_unary_fp', 'fd'), ('loops_arithm_fp', 'FD')]),
+ TD(ints+inexact, dispatch=[
+ ('loops_unary_fp', 'fd'),
+ ('loops_arithm_fp', 'FD'),
+ ('loops_autovec_int', ints),
+ ]),
TD(O, f='Py_square'),
),
'reciprocal':
Ufunc(1, 1, None,
docstrings.get('numpy.core.umath.reciprocal'),
None,
- TD(ints+inexact, simd=[('avx2', ints)], dispatch=[('loops_unary_fp', 'fd')]),
+ TD(ints+inexact, dispatch=[
+ ('loops_unary_fp', 'fd'),
+ ('loops_autovec_int', ints),
+ ]),
TD(O, f='Py_reciprocal'),
),
# This is no longer used as numpy.ones_like, however it is
@@ -563,24 +568,30 @@ defdict = {
Ufunc(2, 1, True_,
docstrings.get('numpy.core.umath.logical_and'),
'PyUFunc_SimpleBinaryComparisonTypeResolver',
- TD(nodatetime_or_obj, out='?', simd=[('avx2', ints)],
- dispatch=[('loops_logical', '?')]),
+ TD(nodatetime_or_obj, out='?', dispatch=[
+ ('loops_logical', '?'),
+ ('loops_autovec_int', ints),
+ ]),
TD(O, f='npy_ObjectLogicalAnd'),
),
'logical_not':
Ufunc(1, 1, None,
docstrings.get('numpy.core.umath.logical_not'),
None,
- TD(nodatetime_or_obj, out='?', simd=[('avx2', ints)],
- dispatch=[('loops_logical', '?')]),
+ TD(nodatetime_or_obj, out='?', dispatch=[
+ ('loops_logical', '?'),
+ ('loops_autovec_int', ints),
+ ]),
TD(O, f='npy_ObjectLogicalNot'),
),
'logical_or':
Ufunc(2, 1, False_,
docstrings.get('numpy.core.umath.logical_or'),
'PyUFunc_SimpleBinaryComparisonTypeResolver',
- TD(nodatetime_or_obj, out='?', simd=[('avx2', ints)],
- dispatch=[('loops_logical', '?')]),
+ TD(nodatetime_or_obj, out='?', dispatch=[
+ ('loops_logical', '?'),
+ ('loops_autovec_int', ints),
+ ]),
TD(O, f='npy_ObjectLogicalOr'),
),
'logical_xor':
@@ -656,7 +667,7 @@ defdict = {
None,
TD('?', cfunc_alias='logical_and',
dispatch=[('loops_logical', '?')]),
- TD(ints, simd=[('avx2', ints)]),
+ TD(ints, dispatch=[('loops_autovec_int', ints)]),
TD(O, f='PyNumber_And'),
),
'bitwise_or':
@@ -664,7 +675,7 @@ defdict = {
docstrings.get('numpy.core.umath.bitwise_or'),
None,
TD('?', cfunc_alias='logical_or', dispatch=[('loops_logical', '?')]),
- TD(ints, simd=[('avx2', ints)]),
+ TD(ints, dispatch=[('loops_autovec_int', ints)]),
TD(O, f='PyNumber_Or'),
),
'bitwise_xor':
@@ -673,7 +684,7 @@ defdict = {
None,
TD('?', cfunc_alias='not_equal',
dispatch=[('loops_comparison', '?')]),
- TD(ints, simd=[('avx2', ints)]),
+ TD(ints, dispatch=[('loops_autovec_int', ints)]),
TD(O, f='PyNumber_Xor'),
),
'invert':
@@ -682,21 +693,21 @@ defdict = {
None,
TD('?', cfunc_alias='logical_not',
dispatch=[('loops_logical', '?')]),
- TD(ints, simd=[('avx2', ints)]),
+ TD(ints, dispatch=[('loops_autovec_int', ints)]),
TD(O, f='PyNumber_Invert'),
),
'left_shift':
Ufunc(2, 1, None,
docstrings.get('numpy.core.umath.left_shift'),
None,
- TD(ints, simd=[('avx2', ints)]),
+ TD(ints, dispatch=[('loops_autovec_int', ints)]),
TD(O, f='PyNumber_Lshift'),
),
'right_shift':
Ufunc(2, 1, None,
docstrings.get('numpy.core.umath.right_shift'),
None,
- TD(ints, simd=[('avx2', ints)]),
+ TD(ints, dispatch=[('loops_autovec_int', ints)]),
TD(O, f='PyNumber_Rshift'),
),
'heaviside':
@@ -1156,18 +1167,6 @@ def make_arrays(funcdict):
datalist.append('(void *)NULL')
tname = english_upper(chartoname[t.type])
cfunc_fname = f"{tname}_{cfunc_alias}"
- if t.simd is not None:
- for vt in t.simd:
- code2list.append(textwrap.dedent("""\
- #ifdef HAVE_ATTRIBUTE_TARGET_{ISA}
- if (NPY_CPU_HAVE({ISA})) {{
- {fname}_functions[{idx}] = {cname}_{isa};
- }}
- #endif
- """).format(
- ISA=vt.upper(), isa=vt,
- fname=name, cname=cfunc_fname, idx=k
- ))
else:
try:
thedict = arity_lookup[uf.nin, uf.nout]
diff --git a/numpy/core/config.h.in b/numpy/core/config.h.in
index 943a90cc8..e3b559753 100644
--- a/numpy/core/config.h.in
+++ b/numpy/core/config.h.in
@@ -29,28 +29,12 @@
#mesondefine HAVE___BUILTIN_BSWAP64
#mesondefine HAVE___BUILTIN_EXPECT
#mesondefine HAVE___BUILTIN_MUL_OVERFLOW
-#mesondefine HAVE__M_FROM_INT64
-#mesondefine HAVE__MM_LOAD_PS
-#mesondefine HAVE__MM_PREFETCH
-#mesondefine HAVE__MM_LOAD_PD
#mesondefine HAVE___BUILTIN_PREFETCH
-#mesondefine HAVE_LINK_AVX
-#mesondefine HAVE_LINK_AVX2
-#mesondefine HAVE_LINK_AVX512F
-#mesondefine HAVE_LINK_AVX512_SKX
-#mesondefine HAVE_XGETBV
#mesondefine HAVE_ATTRIBUTE_OPTIMIZE_UNROLL_LOOPS
#mesondefine HAVE_ATTRIBUTE_OPTIMIZE_OPT_3
#mesondefine HAVE_ATTRIBUTE_OPTIMIZE_OPT_2
#mesondefine HAVE_ATTRIBUTE_NONNULL
-#mesondefine HAVE_ATTRIBUTE_TARGET_AVX
-#mesondefine HAVE_ATTRIBUTE_TARGET_AVX2
-#mesondefine HAVE_ATTRIBUTE_TARGET_AVX512F
-#mesondefine HAVE_ATTRIBUTE_TARGET_AVX512_SKX
-#mesondefine HAVE_ATTRIBUTE_TARGET_AVX2_WITH_INTRINSICS
-#mesondefine HAVE_ATTRIBUTE_TARGET_AVX512F_WITH_INTRINSICS
-#mesondefine HAVE_ATTRIBUTE_TARGET_AVX512_SKX_WITH_INTRINSICS
/* C99 complex support and complex.h are not universal */
#mesondefine HAVE_COMPLEX_H
diff --git a/numpy/core/include/numpy/npy_common.h b/numpy/core/include/numpy/npy_common.h
index ea4a818c8..3b31bcf2d 100644
--- a/numpy/core/include/numpy/npy_common.h
+++ b/numpy/core/include/numpy/npy_common.h
@@ -40,39 +40,6 @@
#define NPY_GCC_OPT_3
#endif
-/* compile target attributes */
-#if defined HAVE_ATTRIBUTE_TARGET_AVX && defined HAVE_LINK_AVX
-#define NPY_GCC_TARGET_AVX __attribute__((target("avx")))
-#else
-#define NPY_GCC_TARGET_AVX
-#endif
-
-#if defined HAVE_ATTRIBUTE_TARGET_AVX2_WITH_INTRINSICS
-#define HAVE_ATTRIBUTE_TARGET_FMA
-#define NPY_GCC_TARGET_FMA __attribute__((target("avx2,fma")))
-#endif
-
-#if defined HAVE_ATTRIBUTE_TARGET_AVX2 && defined HAVE_LINK_AVX2
-#define NPY_GCC_TARGET_AVX2 __attribute__((target("avx2")))
-#else
-#define NPY_GCC_TARGET_AVX2
-#endif
-
-#if defined HAVE_ATTRIBUTE_TARGET_AVX512F && defined HAVE_LINK_AVX512F
-#define NPY_GCC_TARGET_AVX512F __attribute__((target("avx512f")))
-#elif defined HAVE_ATTRIBUTE_TARGET_AVX512F_WITH_INTRINSICS
-#define NPY_GCC_TARGET_AVX512F __attribute__((target("avx512f")))
-#else
-#define NPY_GCC_TARGET_AVX512F
-#endif
-
-#if defined HAVE_ATTRIBUTE_TARGET_AVX512_SKX && defined HAVE_LINK_AVX512_SKX
-#define NPY_GCC_TARGET_AVX512_SKX __attribute__((target("avx512f,avx512dq,avx512vl,avx512bw,avx512cd")))
-#elif defined HAVE_ATTRIBUTE_TARGET_AVX512_SKX_WITH_INTRINSICS
-#define NPY_GCC_TARGET_AVX512_SKX __attribute__((target("avx512f,avx512dq,avx512vl,avx512bw,avx512cd")))
-#else
-#define NPY_GCC_TARGET_AVX512_SKX
-#endif
/*
* mark an argument (starting from 1) that must not be NULL and is not checked
* DO NOT USE IF FUNCTION CHECKS FOR NULL!! the compiler will remove the check
@@ -83,21 +50,6 @@
#define NPY_GCC_NONNULL(n)
#endif
-#if defined HAVE_XMMINTRIN_H && defined HAVE__MM_LOAD_PS
-#define NPY_HAVE_SSE_INTRINSICS
-#endif
-
-#if defined HAVE_EMMINTRIN_H && defined HAVE__MM_LOAD_PD
-#define NPY_HAVE_SSE2_INTRINSICS
-#endif
-
-#if defined HAVE_IMMINTRIN_H && defined HAVE_LINK_AVX2
-#define NPY_HAVE_AVX2_INTRINSICS
-#endif
-
-#if defined HAVE_IMMINTRIN_H && defined HAVE_LINK_AVX512F
-#define NPY_HAVE_AVX512F_INTRINSICS
-#endif
/*
* give a hint to the compiler which branch is more likely or unlikely
* to occur, e.g. rare error cases:
@@ -120,7 +72,7 @@
/* unlike _mm_prefetch also works on non-x86 */
#define NPY_PREFETCH(x, rw, loc) __builtin_prefetch((x), (rw), (loc))
#else
-#ifdef HAVE__MM_PREFETCH
+#ifdef NPY_HAVE_SSE
/* _MM_HINT_ET[01] (rw = 1) unsupported, only available in gcc >= 4.9 */
#define NPY_PREFETCH(x, rw, loc) _mm_prefetch((x), loc == 0 ? _MM_HINT_NTA : \
(loc == 1 ? _MM_HINT_T2 : \
diff --git a/numpy/core/meson.build b/numpy/core/meson.build
index e6607d8ad..84af05ff4 100644
--- a/numpy/core/meson.build
+++ b/numpy/core/meson.build
@@ -293,16 +293,7 @@ optional_function_attributes = [
['optimize("O3")', 'OPTIMIZE_OPT_3'],
['optimize("O2")', 'OPTIMIZE_OPT_2'],
['optimize("nonnull (1)")', 'NONNULL'],
- ]
-if host_machine.cpu_family() in ['x86', 'x86_64']
- optional_function_attributes += [
- ['target("avx")', 'TARGET_AVX'],
- ['target("avx2")', 'TARGET_AVX2'],
- ['target("avx512f")', 'TARGET_AVX512F'],
- ['target("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")', 'TARGET_AVX512_SKX'],
- ]
- # TODO: add the _WITH_INTRINSICS_AVX list
-endif
+]
#foreach attr: optional_function_attributes
# if cc.has_function_attribute(attr[0])
# cdata.set10('HAVE_ATTRIBUTE_' + attr[1], true)
@@ -323,22 +314,8 @@ optional_intrinsics = [
['__builtin_mul_overflow', '(long long)5, 5, (int*)5', [], []],
]
if host_machine.cpu_family() in ['x86', 'x86_64']
- optional_intrinsics += [
- # MMX only needed for icc, but some clang's don't have it
- ['_m_from_int64', '0', ['emmintrin.h'], []],
- ['_mm_load_ps', '(float*)0', ['xmmintrin.h'], []], # SSE
- ['_mm_prefetch', '(float*)0, _MM_HINT_NTA', ['xmmintrin.h'], []], # SSE
- ['_mm_load_pd', '(double*)0', ['emmintrin.h'], []], # SSE2
+ optional_intrinsics += [
['__builtin_prefetch', '(float*)0, 0, 3', [], []],
- # Check that the linker can handle AVX
- ['__asm__ volatile', '"vpand %xmm1, %xmm2, %xmm3"', ['stdio.h'], ['HAVE_LINK_AVX']],
- ['__asm__ volatile', '"vpand %ymm1, %ymm2, %ymm3"', ['stdio.h'], ['HAVE_LINK_AVX2']],
- ['__asm__ volatile', '"vpaddd %zmm1, %zmm2, %zmm3"', ['stdio.h'], ['HAVE_LINK_AVX512F']],
- ['__asm__ volatile',
- '"vfpclasspd $0x40, %zmm15, %k6\\n vmovdqu8 %xmm0, %xmm1\\n vpbroadcastmb2q %k0, %xmm0"',
- ['stdio.h'], ['HAVE_LINK_AVX512_SKX']
- ],
- ['__asm__ volatile', '"xgetbv"', ['stdio.h'], ['HAVE_XGETBV']],
]
endif
foreach intrin: optional_intrinsics
diff --git a/numpy/core/setup.py b/numpy/core/setup.py
index 2cad4ba43..b48d46c3f 100644
--- a/numpy/core/setup.py
+++ b/numpy/core/setup.py
@@ -171,18 +171,6 @@ def check_math_capabilities(config, ext, moredefs, mathlibs):
else:
return 1
- # NOTE: not needed in Meson build, we set the minimum
- # compiler version to 8.4 to avoid this bug
- # GH-14787: Work around GCC<8.4 bug when compiling with AVX512
- # support on Windows-based platforms
- def check_gh14787(fn):
- if fn == 'attribute_target_avx512f':
- if (sys.platform in ('win32', 'cygwin') and
- config.check_compiler_gcc() and
- not config.check_gcc_version_at_least(8, 4)):
- ext.extra_compile_args.extend(
- ['-ffixed-xmm%s' % n for n in range(16, 32)])
-
#use_msvc = config.check_decl("_MSC_VER")
if not check_funcs_once(MANDATORY_FUNCS, add_to_moredefs=False):
raise SystemError("One of the required function to build numpy is not"
@@ -233,20 +221,8 @@ def check_math_capabilities(config, ext, moredefs, mathlibs):
for dec, fn in OPTIONAL_FUNCTION_ATTRIBUTES:
if config.check_gcc_function_attribute(dec, fn):
moredefs.append((fname2def(fn), 1))
- check_gh14787(fn)
platform = sysconfig.get_platform()
- if ("x86_64" in platform):
- for dec, fn in OPTIONAL_FUNCTION_ATTRIBUTES_AVX:
- if config.check_gcc_function_attribute(dec, fn):
- moredefs.append((fname2def(fn), 1))
- check_gh14787(fn)
- for dec, fn, code, header in (
- OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS_AVX):
- if config.check_gcc_function_attribute_with_intrinsics(
- dec, fn, code, header):
- moredefs.append((fname2def(fn), 1))
-
for fn in OPTIONAL_VARIABLE_ATTRIBUTES:
if config.check_gcc_variable_attribute(fn):
m = fn.replace("(", "_").replace(")", "_")
@@ -1019,6 +995,7 @@ def configuration(parent_package='',top_path=None):
join('src', 'umath', 'loops_modulo.dispatch.c.src'),
join('src', 'umath', 'loops_comparison.dispatch.c.src'),
join('src', 'umath', 'loops_unary_complex.dispatch.c.src'),
+ join('src', 'umath', 'loops_autovec_int.dispatch.c.src'),
join('src', 'umath', 'matmul.h.src'),
join('src', 'umath', 'matmul.c.src'),
join('src', 'umath', 'clip.h'),
diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py
index 0512457f4..ef8d21fa7 100644
--- a/numpy/core/setup_common.py
+++ b/numpy/core/setup_common.py
@@ -183,25 +183,7 @@ OPTIONAL_INTRINSICS = [("__builtin_isnan", '5.'),
# Test `long long` for arm+clang 13 (gh-22811,
# but we use all versions of __builtin_mul_overflow):
("__builtin_mul_overflow", '(long long)5, 5, (int*)5'),
- # MMX only needed for icc, but some clangs don't have it
- ("_m_from_int64", '0', "emmintrin.h"),
- ("_mm_load_ps", '(float*)0', "xmmintrin.h"), # SSE
- ("_mm_prefetch", '(float*)0, _MM_HINT_NTA',
- "xmmintrin.h"), # SSE
- ("_mm_load_pd", '(double*)0', "emmintrin.h"), # SSE2
("__builtin_prefetch", "(float*)0, 0, 3"),
- # check that the linker can handle avx
- ("__asm__ volatile", '"vpand %xmm1, %xmm2, %xmm3"',
- "stdio.h", "LINK_AVX"),
- ("__asm__ volatile", '"vpand %ymm1, %ymm2, %ymm3"',
- "stdio.h", "LINK_AVX2"),
- ("__asm__ volatile", '"vpaddd %zmm1, %zmm2, %zmm3"',
- "stdio.h", "LINK_AVX512F"),
- ("__asm__ volatile", '"vfpclasspd $0x40, %zmm15, %k6\\n"\
- "vmovdqu8 %xmm0, %xmm1\\n"\
- "vpbroadcastmb2q %k0, %xmm0\\n"',
- "stdio.h", "LINK_AVX512_SKX"),
- ("__asm__ volatile", '"xgetbv"', "stdio.h", "XGETBV"),
]
# function attributes
@@ -216,44 +198,6 @@ OPTIONAL_FUNCTION_ATTRIBUTES = [('__attribute__((optimize("unroll-loops")))',
('__attribute__((nonnull (1)))',
'attribute_nonnull'),
]
-
-OPTIONAL_FUNCTION_ATTRIBUTES_AVX = [('__attribute__((target ("avx")))',
- 'attribute_target_avx'),
- ('__attribute__((target ("avx2")))',
- 'attribute_target_avx2'),
- ('__attribute__((target ("avx512f")))',
- 'attribute_target_avx512f'),
- ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))',
- 'attribute_target_avx512_skx'),
- ]
-
-# function attributes with intrinsics
-# To ensure your compiler can compile avx intrinsics with just the attributes
-# gcc 4.8.4 support attributes but not with intrisics
-# tested via "#include<%s> int %s %s(void *){code; return 0;};" % (header, attribute, name, code)
-# function name will be converted to HAVE_<upper-case-name> preprocessor macro
-# The _mm512_castps_si512 instruction is specific check for AVX-512F support
-# in gcc-4.9 which is missing a subset of intrinsics. See
-# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61878
-OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS_AVX = [
- ('__attribute__((target("avx2,fma")))',
- 'attribute_target_avx2_with_intrinsics',
- '__m256 temp = _mm256_set1_ps(1.0); temp = \
- _mm256_fmadd_ps(temp, temp, temp)',
- 'immintrin.h'),
- ('__attribute__((target("avx512f")))',
- 'attribute_target_avx512f_with_intrinsics',
- '__m512i temp = _mm512_castps_si512(_mm512_set1_ps(1.0))',
- 'immintrin.h'),
- ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))',
- 'attribute_target_avx512_skx_with_intrinsics',
- '__mmask8 temp = _mm512_fpclass_pd_mask(_mm512_set1_pd(1.0), 0x01);\
- __m512i unused_temp = \
- _mm512_castps_si512(_mm512_set1_ps(1.0));\
- _mm_mask_storeu_epi8(NULL, 0xFF, _mm_broadcastmb_epi64(temp))',
- 'immintrin.h'),
- ]
-
def fname2def(name):
return "HAVE_%s" % name.upper()
diff --git a/numpy/core/src/umath/loops_exponent_log.dispatch.c.src b/numpy/core/src/umath/loops_exponent_log.dispatch.c.src
index 182c57b01..1fac3c150 100644
--- a/numpy/core/src/umath/loops_exponent_log.dispatch.c.src
+++ b/numpy/core/src/umath/loops_exponent_log.dispatch.c.src
@@ -239,7 +239,7 @@ fma_scalef_ps(__m256 poly, __m256 quadrant)
#ifdef SIMD_AVX512F
-NPY_FINLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F __mmask16
+NPY_FINLINE __mmask16
avx512_get_full_load_mask_ps(void)
{
return 0xFFFF;