diff options
Diffstat (limited to 'numpy/core')
| -rw-r--r-- | numpy/core/code_generators/generate_umath.py | 97 | ||||
| -rw-r--r-- | numpy/core/config.h.in | 16 | ||||
| -rw-r--r-- | numpy/core/include/numpy/npy_common.h | 50 | ||||
| -rw-r--r-- | numpy/core/meson.build | 27 | ||||
| -rw-r--r-- | numpy/core/setup.py | 25 | ||||
| -rw-r--r-- | numpy/core/setup_common.py | 56 | ||||
| -rw-r--r-- | numpy/core/src/umath/loops_exponent_log.dispatch.c.src | 2 |
7 files changed, 53 insertions, 220 deletions
diff --git a/numpy/core/code_generators/generate_umath.py b/numpy/core/code_generators/generate_umath.py index 34fd0c9d1..a021c6c17 100644 --- a/numpy/core/code_generators/generate_umath.py +++ b/numpy/core/code_generators/generate_umath.py @@ -61,9 +61,6 @@ class TypeDescription: cfunc_alias : str or none, optional Appended to inner loop C function name, e.g., FLOAT_{cfunc_alias}. See make_arrays. NOTE: it doesn't support 'astype' - simd : list - Available SIMD ufunc loops, dispatched at runtime in specified order - Currently only supported for simples types (see make_arrays) dispatch : str or None, optional Dispatch-able source name without its extension '.dispatch.c' that contains the definition of ufunc, dispatched at runtime depending on the @@ -71,7 +68,7 @@ class TypeDescription: NOTE: it doesn't support 'astype' """ def __init__(self, type, f=None, in_=None, out=None, astype=None, cfunc_alias=None, - simd=None, dispatch=None): + dispatch=None): self.type = type self.func_data = f if astype is None: @@ -84,7 +81,6 @@ class TypeDescription: out = out.replace('P', type) self.out = out self.cfunc_alias = cfunc_alias - self.simd = simd self.dispatch = dispatch def finish_signature(self, nin, nout): @@ -146,8 +142,9 @@ def build_func_data(types, f): return func_data def TD(types, f=None, astype=None, in_=None, out=None, cfunc_alias=None, - simd=None, dispatch=None): - """Generate a TypeDescription instance for each item in types + dispatch=None): + """ + Generate a TypeDescription instance for each item in types """ if f is not None: if isinstance(f, str): @@ -172,12 +169,6 @@ def TD(types, f=None, astype=None, in_=None, out=None, cfunc_alias=None, raise ValueError("Number of types and outputs do not match") tds = [] for t, fd, i, o in zip(types, func_data, in_, out): - # [(simd-name, list of types)] - if simd is not None: - simdt = [k for k, v in simd if t in v] - else: - simdt = [] - # [(dispatch file name without extension '.dispatch.c*', list of types)] if dispatch: dispt = ([k for k, v in dispatch if t in v]+[None])[0] @@ -185,7 +176,7 @@ def TD(types, f=None, astype=None, in_=None, out=None, cfunc_alias=None, dispt = None tds.append(TypeDescription( t, f=fd, in_=i, out=o, astype=astype, cfunc_alias=cfunc_alias, - simd=simdt, dispatch=dispt + dispatch=dispt )) return tds @@ -352,8 +343,10 @@ defdict = { docstrings.get('numpy.core.umath.add'), 'PyUFunc_AdditionTypeResolver', TD('?', cfunc_alias='logical_or', dispatch=[('loops_logical', '?')]), - TD(no_bool_times_obj, simd=[('avx2', ints)], - dispatch=[('loops_arithm_fp', 'fdFD')]), + TD(no_bool_times_obj, dispatch=[ + ('loops_arithm_fp', 'fdFD'), + ('loops_autovec_int', ints), + ]), [TypeDescription('M', FullTypeDescr, 'Mm', 'M'), TypeDescription('m', FullTypeDescr, 'mm', 'm'), TypeDescription('M', FullTypeDescr, 'mM', 'M'), @@ -365,8 +358,10 @@ defdict = { Ufunc(2, 1, None, # Zero is only a unit to the right, not the left docstrings.get('numpy.core.umath.subtract'), 'PyUFunc_SubtractionTypeResolver', - TD(no_bool_times_obj, simd=[('avx2', ints)], - dispatch=[('loops_arithm_fp', 'fdFD')]), + TD(no_bool_times_obj, dispatch=[ + ('loops_arithm_fp', 'fdFD'), + ('loops_autovec_int', ints), + ]), [TypeDescription('M', FullTypeDescr, 'Mm', 'M'), TypeDescription('m', FullTypeDescr, 'mm', 'm'), TypeDescription('M', FullTypeDescr, 'MM', 'm'), @@ -380,8 +375,10 @@ defdict = { 'PyUFunc_MultiplicationTypeResolver', TD('?', cfunc_alias='logical_and', dispatch=[('loops_logical', '?')]), - TD(no_bool_times_obj, simd=[('avx2', ints)], - dispatch=[('loops_arithm_fp', 'fdFD')]), + TD(no_bool_times_obj, dispatch=[ + ('loops_arithm_fp', 'fdFD'), + ('loops_autovec_int', ints), + ]), [TypeDescription('m', FullTypeDescr, 'mq', 'm'), TypeDescription('m', FullTypeDescr, 'qm', 'm'), TypeDescription('m', FullTypeDescr, 'md', 'm'), @@ -421,8 +418,10 @@ defdict = { Ufunc(1, 1, None, docstrings.get('numpy.core.umath.conjugate'), None, - TD(ints+flts+cmplx, simd=[('avx2', ints)], - dispatch=[('loops_arithm_fp', 'FD')]), + TD(ints+flts+cmplx, dispatch=[ + ('loops_arithm_fp', 'FD'), + ('loops_autovec_int', ints), + ]), TD(P, f='conjugate'), ), 'fmod': @@ -437,15 +436,21 @@ defdict = { Ufunc(1, 1, None, docstrings.get('numpy.core.umath.square'), None, - TD(ints+inexact, simd=[('avx2', ints)], - dispatch=[('loops_unary_fp', 'fd'), ('loops_arithm_fp', 'FD')]), + TD(ints+inexact, dispatch=[ + ('loops_unary_fp', 'fd'), + ('loops_arithm_fp', 'FD'), + ('loops_autovec_int', ints), + ]), TD(O, f='Py_square'), ), 'reciprocal': Ufunc(1, 1, None, docstrings.get('numpy.core.umath.reciprocal'), None, - TD(ints+inexact, simd=[('avx2', ints)], dispatch=[('loops_unary_fp', 'fd')]), + TD(ints+inexact, dispatch=[ + ('loops_unary_fp', 'fd'), + ('loops_autovec_int', ints), + ]), TD(O, f='Py_reciprocal'), ), # This is no longer used as numpy.ones_like, however it is @@ -563,24 +568,30 @@ defdict = { Ufunc(2, 1, True_, docstrings.get('numpy.core.umath.logical_and'), 'PyUFunc_SimpleBinaryComparisonTypeResolver', - TD(nodatetime_or_obj, out='?', simd=[('avx2', ints)], - dispatch=[('loops_logical', '?')]), + TD(nodatetime_or_obj, out='?', dispatch=[ + ('loops_logical', '?'), + ('loops_autovec_int', ints), + ]), TD(O, f='npy_ObjectLogicalAnd'), ), 'logical_not': Ufunc(1, 1, None, docstrings.get('numpy.core.umath.logical_not'), None, - TD(nodatetime_or_obj, out='?', simd=[('avx2', ints)], - dispatch=[('loops_logical', '?')]), + TD(nodatetime_or_obj, out='?', dispatch=[ + ('loops_logical', '?'), + ('loops_autovec_int', ints), + ]), TD(O, f='npy_ObjectLogicalNot'), ), 'logical_or': Ufunc(2, 1, False_, docstrings.get('numpy.core.umath.logical_or'), 'PyUFunc_SimpleBinaryComparisonTypeResolver', - TD(nodatetime_or_obj, out='?', simd=[('avx2', ints)], - dispatch=[('loops_logical', '?')]), + TD(nodatetime_or_obj, out='?', dispatch=[ + ('loops_logical', '?'), + ('loops_autovec_int', ints), + ]), TD(O, f='npy_ObjectLogicalOr'), ), 'logical_xor': @@ -656,7 +667,7 @@ defdict = { None, TD('?', cfunc_alias='logical_and', dispatch=[('loops_logical', '?')]), - TD(ints, simd=[('avx2', ints)]), + TD(ints, dispatch=[('loops_autovec_int', ints)]), TD(O, f='PyNumber_And'), ), 'bitwise_or': @@ -664,7 +675,7 @@ defdict = { docstrings.get('numpy.core.umath.bitwise_or'), None, TD('?', cfunc_alias='logical_or', dispatch=[('loops_logical', '?')]), - TD(ints, simd=[('avx2', ints)]), + TD(ints, dispatch=[('loops_autovec_int', ints)]), TD(O, f='PyNumber_Or'), ), 'bitwise_xor': @@ -673,7 +684,7 @@ defdict = { None, TD('?', cfunc_alias='not_equal', dispatch=[('loops_comparison', '?')]), - TD(ints, simd=[('avx2', ints)]), + TD(ints, dispatch=[('loops_autovec_int', ints)]), TD(O, f='PyNumber_Xor'), ), 'invert': @@ -682,21 +693,21 @@ defdict = { None, TD('?', cfunc_alias='logical_not', dispatch=[('loops_logical', '?')]), - TD(ints, simd=[('avx2', ints)]), + TD(ints, dispatch=[('loops_autovec_int', ints)]), TD(O, f='PyNumber_Invert'), ), 'left_shift': Ufunc(2, 1, None, docstrings.get('numpy.core.umath.left_shift'), None, - TD(ints, simd=[('avx2', ints)]), + TD(ints, dispatch=[('loops_autovec_int', ints)]), TD(O, f='PyNumber_Lshift'), ), 'right_shift': Ufunc(2, 1, None, docstrings.get('numpy.core.umath.right_shift'), None, - TD(ints, simd=[('avx2', ints)]), + TD(ints, dispatch=[('loops_autovec_int', ints)]), TD(O, f='PyNumber_Rshift'), ), 'heaviside': @@ -1156,18 +1167,6 @@ def make_arrays(funcdict): datalist.append('(void *)NULL') tname = english_upper(chartoname[t.type]) cfunc_fname = f"{tname}_{cfunc_alias}" - if t.simd is not None: - for vt in t.simd: - code2list.append(textwrap.dedent("""\ - #ifdef HAVE_ATTRIBUTE_TARGET_{ISA} - if (NPY_CPU_HAVE({ISA})) {{ - {fname}_functions[{idx}] = {cname}_{isa}; - }} - #endif - """).format( - ISA=vt.upper(), isa=vt, - fname=name, cname=cfunc_fname, idx=k - )) else: try: thedict = arity_lookup[uf.nin, uf.nout] diff --git a/numpy/core/config.h.in b/numpy/core/config.h.in index 943a90cc8..e3b559753 100644 --- a/numpy/core/config.h.in +++ b/numpy/core/config.h.in @@ -29,28 +29,12 @@ #mesondefine HAVE___BUILTIN_BSWAP64 #mesondefine HAVE___BUILTIN_EXPECT #mesondefine HAVE___BUILTIN_MUL_OVERFLOW -#mesondefine HAVE__M_FROM_INT64 -#mesondefine HAVE__MM_LOAD_PS -#mesondefine HAVE__MM_PREFETCH -#mesondefine HAVE__MM_LOAD_PD #mesondefine HAVE___BUILTIN_PREFETCH -#mesondefine HAVE_LINK_AVX -#mesondefine HAVE_LINK_AVX2 -#mesondefine HAVE_LINK_AVX512F -#mesondefine HAVE_LINK_AVX512_SKX -#mesondefine HAVE_XGETBV #mesondefine HAVE_ATTRIBUTE_OPTIMIZE_UNROLL_LOOPS #mesondefine HAVE_ATTRIBUTE_OPTIMIZE_OPT_3 #mesondefine HAVE_ATTRIBUTE_OPTIMIZE_OPT_2 #mesondefine HAVE_ATTRIBUTE_NONNULL -#mesondefine HAVE_ATTRIBUTE_TARGET_AVX -#mesondefine HAVE_ATTRIBUTE_TARGET_AVX2 -#mesondefine HAVE_ATTRIBUTE_TARGET_AVX512F -#mesondefine HAVE_ATTRIBUTE_TARGET_AVX512_SKX -#mesondefine HAVE_ATTRIBUTE_TARGET_AVX2_WITH_INTRINSICS -#mesondefine HAVE_ATTRIBUTE_TARGET_AVX512F_WITH_INTRINSICS -#mesondefine HAVE_ATTRIBUTE_TARGET_AVX512_SKX_WITH_INTRINSICS /* C99 complex support and complex.h are not universal */ #mesondefine HAVE_COMPLEX_H diff --git a/numpy/core/include/numpy/npy_common.h b/numpy/core/include/numpy/npy_common.h index ea4a818c8..3b31bcf2d 100644 --- a/numpy/core/include/numpy/npy_common.h +++ b/numpy/core/include/numpy/npy_common.h @@ -40,39 +40,6 @@ #define NPY_GCC_OPT_3 #endif -/* compile target attributes */ -#if defined HAVE_ATTRIBUTE_TARGET_AVX && defined HAVE_LINK_AVX -#define NPY_GCC_TARGET_AVX __attribute__((target("avx"))) -#else -#define NPY_GCC_TARGET_AVX -#endif - -#if defined HAVE_ATTRIBUTE_TARGET_AVX2_WITH_INTRINSICS -#define HAVE_ATTRIBUTE_TARGET_FMA -#define NPY_GCC_TARGET_FMA __attribute__((target("avx2,fma"))) -#endif - -#if defined HAVE_ATTRIBUTE_TARGET_AVX2 && defined HAVE_LINK_AVX2 -#define NPY_GCC_TARGET_AVX2 __attribute__((target("avx2"))) -#else -#define NPY_GCC_TARGET_AVX2 -#endif - -#if defined HAVE_ATTRIBUTE_TARGET_AVX512F && defined HAVE_LINK_AVX512F -#define NPY_GCC_TARGET_AVX512F __attribute__((target("avx512f"))) -#elif defined HAVE_ATTRIBUTE_TARGET_AVX512F_WITH_INTRINSICS -#define NPY_GCC_TARGET_AVX512F __attribute__((target("avx512f"))) -#else -#define NPY_GCC_TARGET_AVX512F -#endif - -#if defined HAVE_ATTRIBUTE_TARGET_AVX512_SKX && defined HAVE_LINK_AVX512_SKX -#define NPY_GCC_TARGET_AVX512_SKX __attribute__((target("avx512f,avx512dq,avx512vl,avx512bw,avx512cd"))) -#elif defined HAVE_ATTRIBUTE_TARGET_AVX512_SKX_WITH_INTRINSICS -#define NPY_GCC_TARGET_AVX512_SKX __attribute__((target("avx512f,avx512dq,avx512vl,avx512bw,avx512cd"))) -#else -#define NPY_GCC_TARGET_AVX512_SKX -#endif /* * mark an argument (starting from 1) that must not be NULL and is not checked * DO NOT USE IF FUNCTION CHECKS FOR NULL!! the compiler will remove the check @@ -83,21 +50,6 @@ #define NPY_GCC_NONNULL(n) #endif -#if defined HAVE_XMMINTRIN_H && defined HAVE__MM_LOAD_PS -#define NPY_HAVE_SSE_INTRINSICS -#endif - -#if defined HAVE_EMMINTRIN_H && defined HAVE__MM_LOAD_PD -#define NPY_HAVE_SSE2_INTRINSICS -#endif - -#if defined HAVE_IMMINTRIN_H && defined HAVE_LINK_AVX2 -#define NPY_HAVE_AVX2_INTRINSICS -#endif - -#if defined HAVE_IMMINTRIN_H && defined HAVE_LINK_AVX512F -#define NPY_HAVE_AVX512F_INTRINSICS -#endif /* * give a hint to the compiler which branch is more likely or unlikely * to occur, e.g. rare error cases: @@ -120,7 +72,7 @@ /* unlike _mm_prefetch also works on non-x86 */ #define NPY_PREFETCH(x, rw, loc) __builtin_prefetch((x), (rw), (loc)) #else -#ifdef HAVE__MM_PREFETCH +#ifdef NPY_HAVE_SSE /* _MM_HINT_ET[01] (rw = 1) unsupported, only available in gcc >= 4.9 */ #define NPY_PREFETCH(x, rw, loc) _mm_prefetch((x), loc == 0 ? _MM_HINT_NTA : \ (loc == 1 ? _MM_HINT_T2 : \ diff --git a/numpy/core/meson.build b/numpy/core/meson.build index e6607d8ad..84af05ff4 100644 --- a/numpy/core/meson.build +++ b/numpy/core/meson.build @@ -293,16 +293,7 @@ optional_function_attributes = [ ['optimize("O3")', 'OPTIMIZE_OPT_3'], ['optimize("O2")', 'OPTIMIZE_OPT_2'], ['optimize("nonnull (1)")', 'NONNULL'], - ] -if host_machine.cpu_family() in ['x86', 'x86_64'] - optional_function_attributes += [ - ['target("avx")', 'TARGET_AVX'], - ['target("avx2")', 'TARGET_AVX2'], - ['target("avx512f")', 'TARGET_AVX512F'], - ['target("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")', 'TARGET_AVX512_SKX'], - ] - # TODO: add the _WITH_INTRINSICS_AVX list -endif +] #foreach attr: optional_function_attributes # if cc.has_function_attribute(attr[0]) # cdata.set10('HAVE_ATTRIBUTE_' + attr[1], true) @@ -323,22 +314,8 @@ optional_intrinsics = [ ['__builtin_mul_overflow', '(long long)5, 5, (int*)5', [], []], ] if host_machine.cpu_family() in ['x86', 'x86_64'] - optional_intrinsics += [ - # MMX only needed for icc, but some clang's don't have it - ['_m_from_int64', '0', ['emmintrin.h'], []], - ['_mm_load_ps', '(float*)0', ['xmmintrin.h'], []], # SSE - ['_mm_prefetch', '(float*)0, _MM_HINT_NTA', ['xmmintrin.h'], []], # SSE - ['_mm_load_pd', '(double*)0', ['emmintrin.h'], []], # SSE2 + optional_intrinsics += [ ['__builtin_prefetch', '(float*)0, 0, 3', [], []], - # Check that the linker can handle AVX - ['__asm__ volatile', '"vpand %xmm1, %xmm2, %xmm3"', ['stdio.h'], ['HAVE_LINK_AVX']], - ['__asm__ volatile', '"vpand %ymm1, %ymm2, %ymm3"', ['stdio.h'], ['HAVE_LINK_AVX2']], - ['__asm__ volatile', '"vpaddd %zmm1, %zmm2, %zmm3"', ['stdio.h'], ['HAVE_LINK_AVX512F']], - ['__asm__ volatile', - '"vfpclasspd $0x40, %zmm15, %k6\\n vmovdqu8 %xmm0, %xmm1\\n vpbroadcastmb2q %k0, %xmm0"', - ['stdio.h'], ['HAVE_LINK_AVX512_SKX'] - ], - ['__asm__ volatile', '"xgetbv"', ['stdio.h'], ['HAVE_XGETBV']], ] endif foreach intrin: optional_intrinsics diff --git a/numpy/core/setup.py b/numpy/core/setup.py index 2cad4ba43..b48d46c3f 100644 --- a/numpy/core/setup.py +++ b/numpy/core/setup.py @@ -171,18 +171,6 @@ def check_math_capabilities(config, ext, moredefs, mathlibs): else: return 1 - # NOTE: not needed in Meson build, we set the minimum - # compiler version to 8.4 to avoid this bug - # GH-14787: Work around GCC<8.4 bug when compiling with AVX512 - # support on Windows-based platforms - def check_gh14787(fn): - if fn == 'attribute_target_avx512f': - if (sys.platform in ('win32', 'cygwin') and - config.check_compiler_gcc() and - not config.check_gcc_version_at_least(8, 4)): - ext.extra_compile_args.extend( - ['-ffixed-xmm%s' % n for n in range(16, 32)]) - #use_msvc = config.check_decl("_MSC_VER") if not check_funcs_once(MANDATORY_FUNCS, add_to_moredefs=False): raise SystemError("One of the required function to build numpy is not" @@ -233,20 +221,8 @@ def check_math_capabilities(config, ext, moredefs, mathlibs): for dec, fn in OPTIONAL_FUNCTION_ATTRIBUTES: if config.check_gcc_function_attribute(dec, fn): moredefs.append((fname2def(fn), 1)) - check_gh14787(fn) platform = sysconfig.get_platform() - if ("x86_64" in platform): - for dec, fn in OPTIONAL_FUNCTION_ATTRIBUTES_AVX: - if config.check_gcc_function_attribute(dec, fn): - moredefs.append((fname2def(fn), 1)) - check_gh14787(fn) - for dec, fn, code, header in ( - OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS_AVX): - if config.check_gcc_function_attribute_with_intrinsics( - dec, fn, code, header): - moredefs.append((fname2def(fn), 1)) - for fn in OPTIONAL_VARIABLE_ATTRIBUTES: if config.check_gcc_variable_attribute(fn): m = fn.replace("(", "_").replace(")", "_") @@ -1019,6 +995,7 @@ def configuration(parent_package='',top_path=None): join('src', 'umath', 'loops_modulo.dispatch.c.src'), join('src', 'umath', 'loops_comparison.dispatch.c.src'), join('src', 'umath', 'loops_unary_complex.dispatch.c.src'), + join('src', 'umath', 'loops_autovec_int.dispatch.c.src'), join('src', 'umath', 'matmul.h.src'), join('src', 'umath', 'matmul.c.src'), join('src', 'umath', 'clip.h'), diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py index 0512457f4..ef8d21fa7 100644 --- a/numpy/core/setup_common.py +++ b/numpy/core/setup_common.py @@ -183,25 +183,7 @@ OPTIONAL_INTRINSICS = [("__builtin_isnan", '5.'), # Test `long long` for arm+clang 13 (gh-22811, # but we use all versions of __builtin_mul_overflow): ("__builtin_mul_overflow", '(long long)5, 5, (int*)5'), - # MMX only needed for icc, but some clangs don't have it - ("_m_from_int64", '0', "emmintrin.h"), - ("_mm_load_ps", '(float*)0', "xmmintrin.h"), # SSE - ("_mm_prefetch", '(float*)0, _MM_HINT_NTA', - "xmmintrin.h"), # SSE - ("_mm_load_pd", '(double*)0', "emmintrin.h"), # SSE2 ("__builtin_prefetch", "(float*)0, 0, 3"), - # check that the linker can handle avx - ("__asm__ volatile", '"vpand %xmm1, %xmm2, %xmm3"', - "stdio.h", "LINK_AVX"), - ("__asm__ volatile", '"vpand %ymm1, %ymm2, %ymm3"', - "stdio.h", "LINK_AVX2"), - ("__asm__ volatile", '"vpaddd %zmm1, %zmm2, %zmm3"', - "stdio.h", "LINK_AVX512F"), - ("__asm__ volatile", '"vfpclasspd $0x40, %zmm15, %k6\\n"\ - "vmovdqu8 %xmm0, %xmm1\\n"\ - "vpbroadcastmb2q %k0, %xmm0\\n"', - "stdio.h", "LINK_AVX512_SKX"), - ("__asm__ volatile", '"xgetbv"', "stdio.h", "XGETBV"), ] # function attributes @@ -216,44 +198,6 @@ OPTIONAL_FUNCTION_ATTRIBUTES = [('__attribute__((optimize("unroll-loops")))', ('__attribute__((nonnull (1)))', 'attribute_nonnull'), ] - -OPTIONAL_FUNCTION_ATTRIBUTES_AVX = [('__attribute__((target ("avx")))', - 'attribute_target_avx'), - ('__attribute__((target ("avx2")))', - 'attribute_target_avx2'), - ('__attribute__((target ("avx512f")))', - 'attribute_target_avx512f'), - ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))', - 'attribute_target_avx512_skx'), - ] - -# function attributes with intrinsics -# To ensure your compiler can compile avx intrinsics with just the attributes -# gcc 4.8.4 support attributes but not with intrisics -# tested via "#include<%s> int %s %s(void *){code; return 0;};" % (header, attribute, name, code) -# function name will be converted to HAVE_<upper-case-name> preprocessor macro -# The _mm512_castps_si512 instruction is specific check for AVX-512F support -# in gcc-4.9 which is missing a subset of intrinsics. See -# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61878 -OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS_AVX = [ - ('__attribute__((target("avx2,fma")))', - 'attribute_target_avx2_with_intrinsics', - '__m256 temp = _mm256_set1_ps(1.0); temp = \ - _mm256_fmadd_ps(temp, temp, temp)', - 'immintrin.h'), - ('__attribute__((target("avx512f")))', - 'attribute_target_avx512f_with_intrinsics', - '__m512i temp = _mm512_castps_si512(_mm512_set1_ps(1.0))', - 'immintrin.h'), - ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))', - 'attribute_target_avx512_skx_with_intrinsics', - '__mmask8 temp = _mm512_fpclass_pd_mask(_mm512_set1_pd(1.0), 0x01);\ - __m512i unused_temp = \ - _mm512_castps_si512(_mm512_set1_ps(1.0));\ - _mm_mask_storeu_epi8(NULL, 0xFF, _mm_broadcastmb_epi64(temp))', - 'immintrin.h'), - ] - def fname2def(name): return "HAVE_%s" % name.upper() diff --git a/numpy/core/src/umath/loops_exponent_log.dispatch.c.src b/numpy/core/src/umath/loops_exponent_log.dispatch.c.src index 182c57b01..1fac3c150 100644 --- a/numpy/core/src/umath/loops_exponent_log.dispatch.c.src +++ b/numpy/core/src/umath/loops_exponent_log.dispatch.c.src @@ -239,7 +239,7 @@ fma_scalef_ps(__m256 poly, __m256 quadrant) #ifdef SIMD_AVX512F -NPY_FINLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F __mmask16 +NPY_FINLINE __mmask16 avx512_get_full_load_mask_ps(void) { return 0xFFFF; |
