summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatti Picus <matti.picus@gmail.com>2021-12-14 09:16:46 -0700
committerGitHub <noreply@github.com>2021-12-14 09:16:46 -0700
commit8111d51af2c743c1ff90dc014ede49564a8f22d9 (patch)
treebb66fbc530f98bd21bff75a0c2e5fab24663ec73
parent77c3254f2aac861fc4da75fd37e193ed73b42d47 (diff)
parent8032ff73544ad4bb405c4a0f3bd983f86b273e2d (diff)
downloadnumpy-8111d51af2c743c1ff90dc014ede49564a8f22d9.tar.gz
Merge pull request #20552 from pradghos/s390x_cpu_feature
ENH: Extending CPU feature detection framework to support IBM Z SIMD
-rw-r--r--doc/source/reference/simd/build-options.rst3
-rw-r--r--doc/source/reference/simd/gen_features.py6
-rw-r--r--doc/source/reference/simd/generated_tables/cpu_features.inc15
-rw-r--r--numpy/core/src/common/npy_cpu_features.c.src37
-rw-r--r--numpy/core/src/common/npy_cpu_features.h11
-rw-r--r--numpy/core/tests/test_cpu_features.py11
-rw-r--r--numpy/distutils/ccompiler_opt.py29
-rw-r--r--numpy/distutils/checks/cpu_vx.c16
-rw-r--r--numpy/distutils/checks/cpu_vxe.c25
-rw-r--r--numpy/distutils/checks/cpu_vxe2.c21
-rw-r--r--numpy/distutils/command/build.py3
-rw-r--r--numpy/distutils/fcompiler/gnu.py2
-rw-r--r--numpy/distutils/tests/test_ccompiler_opt.py75
13 files changed, 217 insertions, 37 deletions
diff --git a/doc/source/reference/simd/build-options.rst b/doc/source/reference/simd/build-options.rst
index 80ef2c639..0a40d3ff5 100644
--- a/doc/source/reference/simd/build-options.rst
+++ b/doc/source/reference/simd/build-options.rst
@@ -165,6 +165,7 @@ Special Options
ARMHF ``NONE``
ARM64 A.K. AARCH64 ``NEON`` ``NEON_FP16`` ``NEON_VFPV4``
``ASIMD``
+ IBM/ZSYSTEM(S390X) ``NONE``
====================================== =======================================
- ``MAX``: Enables all supported CPU features by the compiler and platform.
@@ -338,7 +339,7 @@ that includes several sections, and each section has several values, representin
**Platform**:
- :enabled:`Architecture`: The architecture name of target CPU. It should be one of
- ``x86``, ``x64``, ``ppc64``, ``ppc64le``, ``armhf``, ``aarch64`` or ``unknown``.
+ ``x86``, ``x64``, ``ppc64``, ``ppc64le``, ``armhf``, ``aarch64``, ``s390x`` or ``unknown``.
- :enabled:`Compiler`: The compiler name. It should be one of
gcc, clang, msvc, icc, iccw or unix-like.
diff --git a/doc/source/reference/simd/gen_features.py b/doc/source/reference/simd/gen_features.py
index d74d54016..9a38ef5c9 100644
--- a/doc/source/reference/simd/gen_features.py
+++ b/doc/source/reference/simd/gen_features.py
@@ -158,6 +158,7 @@ if __name__ == '__main__':
pretty_names = {
"PPC64": "IBM/POWER big-endian",
"PPC64LE": "IBM/POWER little-endian",
+ "S390X": "IBM/ZSYSTEM(S390X)",
"ARMHF": "ARMv7/A32",
"AARCH64": "ARMv8/A64",
"ICC": "Intel Compiler",
@@ -170,7 +171,7 @@ if __name__ == '__main__':
with open(path.join(gen_path, 'cpu_features.inc'), 'wt') as fd:
fd.write(f'.. generated via {__file__}\n\n')
for arch in (
- ("x86", "PPC64", "PPC64LE", "ARMHF", "AARCH64")
+ ("x86", "PPC64", "PPC64LE", "ARMHF", "AARCH64", "S390X")
):
title = "On " + pretty_names.get(arch, arch)
table = Features(arch, 'gcc').table()
@@ -183,7 +184,8 @@ if __name__ == '__main__':
("PPC64", ("clang",)),
("PPC64LE", ("clang",)),
("ARMHF", ("clang",)),
- ("AARCH64", ("clang",))
+ ("AARCH64", ("clang",)),
+ ("S390X", ("clang",))
):
arch_pname = pretty_names.get(arch, arch)
for cc in cc_names:
diff --git a/doc/source/reference/simd/generated_tables/cpu_features.inc b/doc/source/reference/simd/generated_tables/cpu_features.inc
index a7eae5652..17d1b4951 100644
--- a/doc/source/reference/simd/generated_tables/cpu_features.inc
+++ b/doc/source/reference/simd/generated_tables/cpu_features.inc
@@ -1,4 +1,4 @@
-.. generated via /home/seiko/work/repos/numpy/doc/source/reference/simd/./gen_features.py
+.. generated via /home/seiko/work/repos/review/numpy/doc/source/reference/simd/gen_features.py
On x86
~~~~~~
@@ -91,3 +91,16 @@ On ARMv8/A64
``ASIMDFHM`` ``NEON`` ``NEON_FP16`` ``NEON_VFPV4`` ``ASIMD`` ``ASIMDHP``
============== ===========================================================
+On IBM/ZSYSTEM(S390X)
+~~~~~~~~~~~~~~~~~~~~~
+.. table::
+ :align: left
+
+ ======== ==============
+ Name Implies
+ ======== ==============
+ ``VX``
+ ``VXE`` ``VX``
+ ``VXE2`` ``VX`` ``VXE``
+ ======== ==============
+
diff --git a/numpy/core/src/common/npy_cpu_features.c.src b/numpy/core/src/common/npy_cpu_features.c.src
index a2383c45f..1385220f9 100644
--- a/numpy/core/src/common/npy_cpu_features.c.src
+++ b/numpy/core/src/common/npy_cpu_features.c.src
@@ -62,6 +62,7 @@ npy_cpu_features_dict(void)
* AVX512IFMA, AVX512VBMI, AVX512VBMI2, AVX512BITALG,
* AVX512_KNL, AVX512_KNM, AVX512_SKX, AVX512_CLX, AVX512_CNL, AVX512_ICL,
* VSX, VSX2, VSX3,
+ * VX, VXE, VXE2,
* NEON, NEON_FP16, NEON_VFPV4, ASIMD, FPHP, ASIMDHP, ASIMDDP, ASIMDFHM#
*/
if (PyDict_SetItemString(dict, "@feature@",
@@ -509,6 +510,42 @@ npy__cpu_init_features(void)
#endif
}
+/***************** ZARCH ******************/
+
+#elif defined(__s390x__)
+
+#include <sys/auxv.h>
+#ifndef HWCAP_S390_VXE
+ #define HWCAP_S390_VXE 8192
+#endif
+
+#ifndef HWCAP_S390_VXRS_EXT2
+ #define HWCAP_S390_VXRS_EXT2 32768
+#endif
+
+static void
+npy__cpu_init_features(void)
+{
+ memset(npy__cpu_have, 0, sizeof(npy__cpu_have[0]) * NPY_CPU_FEATURE_MAX);
+
+ unsigned int hwcap = getauxval(AT_HWCAP);
+ if ((hwcap & HWCAP_S390_VX) == 0) {
+ return;
+ }
+
+ if (hwcap & HWCAP_S390_VXRS_EXT2) {
+ npy__cpu_have[NPY_CPU_FEATURE_VX] =
+ npy__cpu_have[NPY_CPU_FEATURE_VXE] =
+ npy__cpu_have[NPY_CPU_FEATURE_VXE2] = 1;
+ return;
+ }
+
+ npy__cpu_have[NPY_CPU_FEATURE_VXE] = (hwcap & HWCAP_S390_VXE) != 0;
+
+ npy__cpu_have[NPY_CPU_FEATURE_VX] = 1;
+}
+
+
/***************** ARM ******************/
#elif defined(__arm__) || defined(__aarch64__)
diff --git a/numpy/core/src/common/npy_cpu_features.h b/numpy/core/src/common/npy_cpu_features.h
index ce1fc822a..1f52a445d 100644
--- a/numpy/core/src/common/npy_cpu_features.h
+++ b/numpy/core/src/common/npy_cpu_features.h
@@ -82,6 +82,15 @@ enum npy_cpu_features
// ARMv8.2 single&half-precision multiply
NPY_CPU_FEATURE_ASIMDFHM = 307,
+ // IBM/ZARCH
+ NPY_CPU_FEATURE_VX = 350,
+
+ // Vector-Enhancements Facility 1
+ NPY_CPU_FEATURE_VXE = 351,
+
+ // Vector-Enhancements Facility 2
+ NPY_CPU_FEATURE_VXE2 = 352,
+
NPY_CPU_FEATURE_MAX
};
@@ -138,6 +147,7 @@ npy_cpu_features_dict(void);
* On aarch64: ['NEON', 'NEON_FP16', 'NEON_VPFV4', 'ASIMD']
* On ppc64: []
* On ppc64le: ['VSX', 'VSX2']
+ * On s390x: []
* On any other arch or if the optimization is disabled: []
*/
NPY_VISIBILITY_HIDDEN PyObject *
@@ -159,6 +169,7 @@ npy_cpu_baseline_list(void);
* On aarch64: ['ASIMDHP', 'ASIMDDP', 'ASIMDFHM']
* On ppc64: ['VSX', 'VSX2', 'VSX3']
* On ppc64le: ['VSX3']
+ * On s390x: ['VX', 'VXE', VXE2]
* On any other arch or if the optimization is disabled: []
*/
NPY_VISIBILITY_HIDDEN PyObject *
diff --git a/numpy/core/tests/test_cpu_features.py b/numpy/core/tests/test_cpu_features.py
index 2ccbff41c..706cf7a7e 100644
--- a/numpy/core/tests/test_cpu_features.py
+++ b/numpy/core/tests/test_cpu_features.py
@@ -146,6 +146,17 @@ class Test_POWER_Features(AbstractTest):
def load_flags(self):
self.load_flags_auxv()
+
+is_zarch = re.match("^(s390x)", machine, re.IGNORECASE)
+@pytest.mark.skipif(not is_linux or not is_zarch,
+ reason="Only for Linux and IBM Z")
+class Test_ZARCH_Features(AbstractTest):
+ features = ["VX", "VXE", "VXE2"]
+
+ def load_flags(self):
+ self.load_flags_auxv()
+
+
is_arm = re.match("^(arm|aarch64)", machine, re.IGNORECASE)
@pytest.mark.skipif(not is_linux or not is_arm, reason="Only for Linux and ARM")
class Test_ARM_Features(AbstractTest):
diff --git a/numpy/distutils/ccompiler_opt.py b/numpy/distutils/ccompiler_opt.py
index b38e47c13..e020d96ee 100644
--- a/numpy/distutils/ccompiler_opt.py
+++ b/numpy/distutils/ccompiler_opt.py
@@ -228,6 +228,7 @@ class _Config:
x64 = "SSE SSE2 SSE3",
ppc64 = '', # play it safe
ppc64le = "VSX VSX2",
+ s390x = '',
armhf = '', # play it safe
aarch64 = "NEON NEON_FP16 NEON_VFPV4 ASIMD"
)
@@ -293,6 +294,13 @@ class _Config:
VSX2 = dict(interest=2, implies="VSX", implies_detect=False),
## Power9/ISA 3.00
VSX3 = dict(interest=3, implies="VSX2", implies_detect=False),
+ # IBM/Z
+ ## VX(z13) support
+ VX = dict(interest=1, headers="vecintrin.h"),
+ ## Vector-Enhancements Facility
+ VXE = dict(interest=2, implies="VX", implies_detect=False),
+ ## Vector-Enhancements Facility 2
+ VXE2 = dict(interest=3, implies="VXE", implies_detect=False),
# ARM
NEON = dict(interest=1, headers="arm_neon.h"),
NEON_FP16 = dict(interest=2, implies="NEON"),
@@ -472,6 +480,23 @@ class _Config:
return partial
+ on_zarch = self.cc_on_s390x
+ if on_zarch:
+ partial = dict(
+ VX = dict(
+ flags="-march=arch11 -mzvector"
+ ),
+ VXE = dict(
+ flags="-march=arch12", implies_detect=False
+ ),
+ VXE2 = dict(
+ flags="-march=arch13", implies_detect=False
+ )
+ )
+
+ return partial
+
+
if self.cc_on_aarch64 and is_unix: return dict(
NEON = dict(
implies="NEON_FP16 NEON_VFPV4 ASIMD", autovec=True
@@ -919,6 +944,7 @@ class _CCompiler:
("cc_on_ppc64", ".*(powerpc|ppc)64.*"),
("cc_on_aarch64", ".*(aarch64|arm64).*"),
("cc_on_armhf", ".*arm.*"),
+ ("cc_on_s390x", ".*s390x.*"),
# undefined platform
("cc_on_noarch", ""),
)
@@ -983,7 +1009,8 @@ class _CCompiler:
self.cc_is_gcc = True
self.cc_march = "unknown"
- for arch in ("x86", "x64", "ppc64", "ppc64le", "armhf", "aarch64"):
+ for arch in ("x86", "x64", "ppc64", "ppc64le",
+ "armhf", "aarch64", "s390x"):
if getattr(self, "cc_on_" + arch):
self.cc_march = arch
break
diff --git a/numpy/distutils/checks/cpu_vx.c b/numpy/distutils/checks/cpu_vx.c
new file mode 100644
index 000000000..18fb7ef94
--- /dev/null
+++ b/numpy/distutils/checks/cpu_vx.c
@@ -0,0 +1,16 @@
+#if (__VEC__ < 10301) || (__ARCH__ < 11)
+ #error VX not supported
+#endif
+
+#include <vecintrin.h>
+int main(int argc, char **argv)
+{
+ __vector double x = vec_abs(vec_xl(argc, (double*)argv));
+ __vector double y = vec_load_len((double*)argv, (unsigned int)argc);
+
+ x = vec_round(vec_ceil(x) + vec_floor(y));
+ __vector bool long long m = vec_cmpge(x, y);
+ __vector long long i = vec_signed(vec_sel(x, y, m));
+
+ return (int)vec_extract(i, 0);
+}
diff --git a/numpy/distutils/checks/cpu_vxe.c b/numpy/distutils/checks/cpu_vxe.c
new file mode 100644
index 000000000..ca41f8434
--- /dev/null
+++ b/numpy/distutils/checks/cpu_vxe.c
@@ -0,0 +1,25 @@
+#if (__VEC__ < 10302) || (__ARCH__ < 12)
+ #error VXE not supported
+#endif
+
+#include <vecintrin.h>
+int main(int argc, char **argv)
+{
+ __vector float x = vec_nabs(vec_xl(argc, (float*)argv));
+ __vector float y = vec_load_len((float*)argv, (unsigned int)argc);
+
+ x = vec_round(vec_ceil(x) + vec_floor(y));
+ __vector bool int m = vec_cmpge(x, y);
+ x = vec_sel(x, y, m);
+
+ // need to test the existance of intrin "vflls" since vec_doublee
+ // is vec_doublee maps to wrong intrin "vfll".
+ // see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100871
+#if defined(__GNUC__) && !defined(__clang__)
+ __vector long long i = vec_signed(__builtin_s390_vflls(x));
+#else
+ __vector long long i = vec_signed(vec_doublee(x));
+#endif
+
+ return (int)vec_extract(i, 0);
+}
diff --git a/numpy/distutils/checks/cpu_vxe2.c b/numpy/distutils/checks/cpu_vxe2.c
new file mode 100644
index 000000000..f36d57129
--- /dev/null
+++ b/numpy/distutils/checks/cpu_vxe2.c
@@ -0,0 +1,21 @@
+#if (__VEC__ < 10303) || (__ARCH__ < 13)
+ #error VXE2 not supported
+#endif
+
+#include <vecintrin.h>
+
+int main(int argc, char **argv)
+{
+ int val;
+ __vector signed short large = { 'a', 'b', 'c', 'a', 'g', 'h', 'g', 'o' };
+ __vector signed short search = { 'g', 'h', 'g', 'o' };
+ __vector unsigned char len = { 0 };
+ __vector unsigned char res = vec_search_string_cc(large, search, len, &val);
+ __vector float x = vec_xl(argc, (float*)argv);
+ __vector int i = vec_signed(x);
+
+ i = vec_srdb(vec_sldb(i, i, 2), i, 3);
+ val += (int)vec_extract(res, 1);
+ val += vec_extract(i, 0);
+ return val;
+}
diff --git a/numpy/distutils/command/build.py b/numpy/distutils/command/build.py
index a4fda537d..dc1ab3b9b 100644
--- a/numpy/distutils/command/build.py
+++ b/numpy/distutils/command/build.py
@@ -47,7 +47,8 @@ class build(old_build):
- not part of dispatch-able features(--cpu-dispatch)
- not supported by compiler or platform
"""
- self.simd_test = "BASELINE SSE2 SSE42 XOP FMA4 (FMA3 AVX2) AVX512F AVX512_SKX VSX VSX2 VSX3 NEON ASIMD"
+ self.simd_test = "BASELINE SSE2 SSE42 XOP FMA4 (FMA3 AVX2) AVX512F" \
+ " AVX512_SKX VSX VSX2 VSX3 NEON ASIMD VX VXE VXE2"
def finalize_options(self):
build_scripts = self.build_scripts
diff --git a/numpy/distutils/fcompiler/gnu.py b/numpy/distutils/fcompiler/gnu.py
index 39178071d..d8143328e 100644
--- a/numpy/distutils/fcompiler/gnu.py
+++ b/numpy/distutils/fcompiler/gnu.py
@@ -324,7 +324,7 @@ class Gnu95FCompiler(GnuFCompiler):
c_archs[c_archs.index("i386")] = "i686"
# check the arches the Fortran compiler supports, and compare with
# arch flags from C compiler
- for arch in ["ppc", "i686", "x86_64", "ppc64"]:
+ for arch in ["ppc", "i686", "x86_64", "ppc64", "s390x"]:
if _can_target(cmd, arch) and arch in c_archs:
arch_flags.extend(["-arch", arch])
return arch_flags
diff --git a/numpy/distutils/tests/test_ccompiler_opt.py b/numpy/distutils/tests/test_ccompiler_opt.py
index 1b27ab07c..6d42cc172 100644
--- a/numpy/distutils/tests/test_ccompiler_opt.py
+++ b/numpy/distutils/tests/test_ccompiler_opt.py
@@ -32,6 +32,7 @@ arch_compilers = dict(
ppc64le = ("gcc", "clang"),
armhf = ("gcc", "clang"),
aarch64 = ("gcc", "clang"),
+ s390x = ("gcc", "clang"),
noarch = ("gcc",)
)
@@ -382,18 +383,19 @@ class _Test_CCompilerOpt:
if o == "native" and self.cc_name() == "msvc":
continue
self.expect(o,
- trap_files=".*cpu_(sse|vsx|neon).c",
- x86="", ppc64="", armhf=""
+ trap_files=".*cpu_(sse|vsx|neon|vx).c",
+ x86="", ppc64="", armhf="", s390x=""
)
self.expect(o,
- trap_files=".*cpu_(sse3|vsx2|neon_vfpv4).c",
+ trap_files=".*cpu_(sse3|vsx2|neon_vfpv4|vxe).c",
x86="sse sse2", ppc64="vsx", armhf="neon neon_fp16",
- aarch64="", ppc64le=""
+ aarch64="", ppc64le="", s390x="vx"
)
self.expect(o,
trap_files=".*cpu_(popcnt|vsx3).c",
x86="sse .* sse41", ppc64="vsx vsx2",
- armhf="neon neon_fp16 .* asimd .*"
+ armhf="neon neon_fp16 .* asimd .*",
+ s390x="vx vxe vxe2"
)
self.expect(o,
x86_gcc=".* xop fma4 .* avx512f .* avx512_knl avx512_knm avx512_skx .*",
@@ -403,13 +405,14 @@ class _Test_CCompilerOpt:
# in msvc, avx512_knl avx512_knm aren't supported
x86_msvc=".* xop fma4 .* avx512f .* avx512_skx .*",
armhf=".* asimd asimdhp asimddp .*",
- ppc64="vsx vsx2 vsx3.*"
+ ppc64="vsx vsx2 vsx3.*",
+ s390x="vx vxe vxe2.*"
)
# min
self.expect("min",
x86="sse sse2", x64="sse sse2 sse3",
armhf="", aarch64="neon neon_fp16 .* asimd",
- ppc64="", ppc64le="vsx vsx2"
+ ppc64="", ppc64le="vsx vsx2", s390x=""
)
self.expect(
"min", trap_files=".*cpu_(sse2|vsx2).c",
@@ -420,7 +423,7 @@ class _Test_CCompilerOpt:
try:
self.expect("native",
trap_flags=".*(-march=native|-xHost|/QxHost).*",
- x86=".*", ppc64=".*", armhf=".*"
+ x86=".*", ppc64=".*", armhf=".*", s390x=".*"
)
if self.march() != "unknown":
raise AssertionError(
@@ -432,14 +435,15 @@ class _Test_CCompilerOpt:
def test_flags(self):
self.expect_flags(
- "sse sse2 vsx vsx2 neon neon_fp16",
+ "sse sse2 vsx vsx2 neon neon_fp16 vx vxe",
x86_gcc="-msse -msse2", x86_icc="-msse -msse2",
x86_iccw="/arch:SSE2",
x86_msvc="/arch:SSE2" if self.march() == "x86" else "",
ppc64_gcc= "-mcpu=power8",
ppc64_clang="-maltivec -mvsx -mpower8-vector",
armhf_gcc="-mfpu=neon-fp16 -mfp16-format=ieee",
- aarch64=""
+ aarch64="",
+ s390="-mzvector -march=arch12"
)
# testing normalize -march
self.expect_flags(
@@ -484,7 +488,7 @@ class _Test_CCompilerOpt:
try:
self.expect_targets(
targets,
- x86="", armhf="", ppc64=""
+ x86="", armhf="", ppc64="", s390x=""
)
if self.march() != "unknown":
raise AssertionError(
@@ -496,26 +500,26 @@ class _Test_CCompilerOpt:
def test_targets_syntax(self):
for targets in (
- "/*@targets $keep_baseline sse vsx neon*/",
- "/*@targets,$keep_baseline,sse,vsx,neon*/",
- "/*@targets*$keep_baseline*sse*vsx*neon*/",
+ "/*@targets $keep_baseline sse vsx neon vx*/",
+ "/*@targets,$keep_baseline,sse,vsx,neon vx*/",
+ "/*@targets*$keep_baseline*sse*vsx*neon*vx*/",
"""
/*
** @targets
- ** $keep_baseline, sse vsx,neon
+ ** $keep_baseline, sse vsx,neon, vx
*/
""",
"""
/*
- ************@targets*************
- ** $keep_baseline, sse vsx, neon
- *********************************
+ ************@targets****************
+ ** $keep_baseline, sse vsx, neon, vx
+ ************************************
*/
""",
"""
/*
/////////////@targets/////////////////
- //$keep_baseline//sse//vsx//neon
+ //$keep_baseline//sse//vsx//neon//vx
/////////////////////////////////////
*/
""",
@@ -523,11 +527,11 @@ class _Test_CCompilerOpt:
/*
@targets
$keep_baseline
- SSE VSX NEON*/
+ SSE VSX NEON VX*/
"""
) :
self.expect_targets(targets,
- x86="sse", ppc64="vsx", armhf="neon", unknown=""
+ x86="sse", ppc64="vsx", armhf="neon", s390x="vx", unknown=""
)
def test_targets(self):
@@ -538,10 +542,12 @@ class _Test_CCompilerOpt:
sse sse2 sse41 avx avx2 avx512f
vsx vsx2 vsx3
neon neon_fp16 asimdhp asimddp
+ vx vxe vxe2
*/
""",
- baseline="avx vsx2 asimd",
- x86="avx512f avx2", armhf="asimddp asimdhp", ppc64="vsx3"
+ baseline="avx vsx2 asimd vx vxe",
+ x86="avx512f avx2", armhf="asimddp asimdhp", ppc64="vsx3",
+ s390x="vxe2"
)
# test skipping non-dispatch features
self.expect_targets(
@@ -550,10 +556,11 @@ class _Test_CCompilerOpt:
sse41 avx avx2 avx512f
vsx2 vsx3
asimd asimdhp asimddp
+ vx vxe vxe2
*/
""",
- baseline="", dispatch="sse41 avx2 vsx2 asimd asimddp",
- x86="avx2 sse41", armhf="asimddp asimd", ppc64="vsx2"
+ baseline="", dispatch="sse41 avx2 vsx2 asimd asimddp vxe2",
+ x86="avx2 sse41", armhf="asimddp asimd", ppc64="vsx2", s390x="vxe2"
)
# test skipping features that not supported
self.expect_targets(
@@ -562,11 +569,13 @@ class _Test_CCompilerOpt:
sse2 sse41 avx2 avx512f
vsx2 vsx3
neon asimdhp asimddp
+ vx vxe vxe2
*/
""",
baseline="",
- trap_files=".*(avx2|avx512f|vsx3|asimddp).c",
- x86="sse41 sse2", ppc64="vsx2", armhf="asimdhp neon"
+ trap_files=".*(avx2|avx512f|vsx3|asimddp|vxe2).c",
+ x86="sse41 sse2", ppc64="vsx2", armhf="asimdhp neon",
+ s390x="vxe vx"
)
# test skipping features that implies each other
self.expect_targets(
@@ -598,14 +607,16 @@ class _Test_CCompilerOpt:
sse2 sse42 avx2 avx512f
vsx2 vsx3
neon neon_vfpv4 asimd asimddp
+ vx vxe vxe2
*/
""",
- baseline="sse41 avx2 vsx2 asimd vsx3",
+ baseline="sse41 avx2 vsx2 asimd vsx3 vxe",
x86="avx512f avx2 sse42 sse2",
ppc64="vsx3 vsx2",
armhf="asimddp asimd neon_vfpv4 neon",
# neon, neon_vfpv4, asimd implies each other
- aarch64="asimddp asimd"
+ aarch64="asimddp asimd",
+ s390x="vxe2 vxe vx"
)
# 'keep_sort', leave the sort as-is
self.expect_targets(
@@ -615,13 +626,15 @@ class _Test_CCompilerOpt:
avx512f sse42 avx2 sse2
vsx2 vsx3
asimd neon neon_vfpv4 asimddp
+ vxe vxe2
*/
""",
x86="avx512f sse42 avx2 sse2",
ppc64="vsx2 vsx3",
armhf="asimd neon neon_vfpv4 asimddp",
# neon, neon_vfpv4, asimd implies each other
- aarch64="asimd asimddp"
+ aarch64="asimd asimddp",
+ s390x="vxe vxe2"
)
# 'autovec', skipping features that can't be
# vectorized by the compiler
@@ -736,11 +749,13 @@ class _Test_CCompilerOpt:
(sse41 avx sse42) (sse3 avx2 avx512f)
(vsx vsx3 vsx2)
(asimddp neon neon_vfpv4 asimd asimdhp)
+ (vx vxe vxe2)
*/
""",
x86="avx avx512f",
ppc64="vsx3",
armhf=r"\(asimdhp asimddp\)",
+ s390x="vxe2"
)
# test compiler variety and avoiding duplicating
self.expect_targets(