Merge pull request #20552 from pradghos/s390x_cpu_feature

ENH: Extending CPU feature detection framework to support IBM Z SIMD
author: Matti Picus <matti.picus@gmail.com> 2021-12-14 09:16:46 -0700
committer: GitHub <noreply@github.com> 2021-12-14 09:16:46 -0700
commit: 8111d51af2c743c1ff90dc014ede49564a8f22d9 (patch)
tree: bb66fbc530f98bd21bff75a0c2e5fab24663ec73
parent: 77c3254f2aac861fc4da75fd37e193ed73b42d47 (diff)
parent: 8032ff73544ad4bb405c4a0f3bd983f86b273e2d (diff)
download: numpy-8111d51af2c743c1ff90dc014ede49564a8f22d9.tar.gz
13 files changed, 217 insertions, 37 deletions
diff --git a/doc/source/reference/simd/build-options.rst b/doc/source/reference/simd/build-options.rst
index 80ef2c639..0a40d3ff5 100644
--- a/doc/source/reference/simd/build-options.rst
+++ b/doc/source/reference/simd/build-options.rst
@@ -165,6 +165,7 @@ Special Options
        ARMHF                                  ``NONE``
        ARM64 A.K. AARCH64                     ``NEON`` ``NEON_FP16`` ``NEON_VFPV4``
                                               ``ASIMD``
+       IBM/ZSYSTEM(S390X)                     ``NONE``
       ======================================  =======================================
 
 - ``MAX``: Enables all supported CPU features by the compiler and platform.
@@ -338,7 +339,7 @@ that includes several sections, and each section has several values, representin
 **Platform**:
 
 - :enabled:`Architecture`: The architecture name of target CPU. It should be one of
-  ``x86``, ``x64``, ``ppc64``, ``ppc64le``, ``armhf``, ``aarch64`` or ``unknown``.
+  ``x86``, ``x64``, ``ppc64``, ``ppc64le``, ``armhf``, ``aarch64``, ``s390x`` or ``unknown``.
 
 - :enabled:`Compiler`: The compiler name. It should be one of
   gcc, clang, msvc, icc, iccw or unix-like.
diff --git a/doc/source/reference/simd/gen_features.py b/doc/source/reference/simd/gen_features.py
index d74d54016..9a38ef5c9 100644
--- a/doc/source/reference/simd/gen_features.py
+++ b/doc/source/reference/simd/gen_features.py
@@ -158,6 +158,7 @@ if __name__ == '__main__':
     pretty_names = {
         "PPC64": "IBM/POWER big-endian",
         "PPC64LE": "IBM/POWER little-endian",
+        "S390X": "IBM/ZSYSTEM(S390X)",
         "ARMHF": "ARMv7/A32",
         "AARCH64": "ARMv8/A64",
         "ICC": "Intel Compiler",
@@ -170,7 +171,7 @@ if __name__ == '__main__':
     with open(path.join(gen_path, 'cpu_features.inc'), 'wt') as fd:
         fd.write(f'.. generated via {__file__}\n\n')
         for arch in (
-            ("x86", "PPC64", "PPC64LE", "ARMHF", "AARCH64")
+            ("x86", "PPC64", "PPC64LE", "ARMHF", "AARCH64", "S390X")
         ):
             title = "On " + pretty_names.get(arch, arch)
             table = Features(arch, 'gcc').table()
@@ -183,7 +184,8 @@ if __name__ == '__main__':
             ("PPC64", ("clang",)),
             ("PPC64LE", ("clang",)),
             ("ARMHF", ("clang",)),
-            ("AARCH64", ("clang",))
+            ("AARCH64", ("clang",)),
+            ("S390X", ("clang",))
         ):
             arch_pname = pretty_names.get(arch, arch)
             for cc in cc_names:
diff --git a/doc/source/reference/simd/generated_tables/cpu_features.inc b/doc/source/reference/simd/generated_tables/cpu_features.inc
index a7eae5652..17d1b4951 100644
--- a/doc/source/reference/simd/generated_tables/cpu_features.inc
+++ b/doc/source/reference/simd/generated_tables/cpu_features.inc
@@ -1,4 +1,4 @@
-.. generated via /home/seiko/work/repos/numpy/doc/source/reference/simd/./gen_features.py
+.. generated via /home/seiko/work/repos/review/numpy/doc/source/reference/simd/gen_features.py
 
 On x86
 ~~~~~~
@@ -91,3 +91,16 @@ On ARMv8/A64
     ``ASIMDFHM``   ``NEON`` ``NEON_FP16`` ``NEON_VFPV4`` ``ASIMD`` ``ASIMDHP``
     ============== ===========================================================
 
+On IBM/ZSYSTEM(S390X)
+~~~~~~~~~~~~~~~~~~~~~
+.. table::
+    :align: left
+
+    ======== ==============
+    Name     Implies       
+    ======== ==============
+    ``VX``                 
+    ``VXE``  ``VX``        
+    ``VXE2`` ``VX`` ``VXE``
+    ======== ==============
+
diff --git a/numpy/core/src/common/npy_cpu_features.c.src b/numpy/core/src/common/npy_cpu_features.c.src
index a2383c45f..1385220f9 100644
--- a/numpy/core/src/common/npy_cpu_features.c.src
+++ b/numpy/core/src/common/npy_cpu_features.c.src
@@ -62,6 +62,7 @@ npy_cpu_features_dict(void)
      *            AVX512IFMA, AVX512VBMI, AVX512VBMI2, AVX512BITALG,
      *            AVX512_KNL, AVX512_KNM, AVX512_SKX, AVX512_CLX, AVX512_CNL, AVX512_ICL,
      *            VSX, VSX2, VSX3,
+     *            VX, VXE, VXE2,
      *            NEON, NEON_FP16, NEON_VFPV4, ASIMD, FPHP, ASIMDHP, ASIMDDP, ASIMDFHM#
     */
         if (PyDict_SetItemString(dict, "@feature@",
@@ -509,6 +510,42 @@ npy__cpu_init_features(void)
 #endif
 }
 
+/***************** ZARCH ******************/
+
+#elif defined(__s390x__)
+
+#include <sys/auxv.h>
+#ifndef HWCAP_S390_VXE
+    #define HWCAP_S390_VXE 8192
+#endif
+
+#ifndef HWCAP_S390_VXRS_EXT2
+    #define HWCAP_S390_VXRS_EXT2 32768
+#endif
+
+static void
+npy__cpu_init_features(void)
+{
+    memset(npy__cpu_have, 0, sizeof(npy__cpu_have[0]) * NPY_CPU_FEATURE_MAX);
+    
+    unsigned int hwcap = getauxval(AT_HWCAP);
+    if ((hwcap & HWCAP_S390_VX) == 0) {
+        return;
+    }
+
+    if (hwcap & HWCAP_S390_VXRS_EXT2) {
+       npy__cpu_have[NPY_CPU_FEATURE_VX]  =
+       npy__cpu_have[NPY_CPU_FEATURE_VXE] =
+       npy__cpu_have[NPY_CPU_FEATURE_VXE2] = 1;
+       return;
+    }
+    
+    npy__cpu_have[NPY_CPU_FEATURE_VXE] = (hwcap & HWCAP_S390_VXE) != 0;
+
+    npy__cpu_have[NPY_CPU_FEATURE_VX]  = 1;
+}
+
+
 /***************** ARM ******************/
 
 #elif defined(__arm__) || defined(__aarch64__)
diff --git a/numpy/core/src/common/npy_cpu_features.h b/numpy/core/src/common/npy_cpu_features.h
index ce1fc822a..1f52a445d 100644
--- a/numpy/core/src/common/npy_cpu_features.h
+++ b/numpy/core/src/common/npy_cpu_features.h
@@ -82,6 +82,15 @@ enum npy_cpu_features
     // ARMv8.2 single&half-precision multiply
     NPY_CPU_FEATURE_ASIMDFHM          = 307,
 
+    // IBM/ZARCH
+    NPY_CPU_FEATURE_VX                = 350,
+ 
+    // Vector-Enhancements Facility 1
+    NPY_CPU_FEATURE_VXE               = 351,
+
+    // Vector-Enhancements Facility 2
+    NPY_CPU_FEATURE_VXE2              = 352,
+
     NPY_CPU_FEATURE_MAX
 };
 
@@ -138,6 +147,7 @@ npy_cpu_features_dict(void);
  * On aarch64: ['NEON', 'NEON_FP16', 'NEON_VPFV4', 'ASIMD']
  * On ppc64: []
  * On ppc64le: ['VSX', 'VSX2']
+ * On s390x: []
  * On any other arch or if the optimization is disabled: []
  */
 NPY_VISIBILITY_HIDDEN PyObject *
@@ -159,6 +169,7 @@ npy_cpu_baseline_list(void);
  * On aarch64: ['ASIMDHP', 'ASIMDDP', 'ASIMDFHM']
  * On ppc64:  ['VSX', 'VSX2', 'VSX3']
  * On ppc64le: ['VSX3']
+ * On s390x: ['VX', 'VXE', VXE2]
  * On any other arch or if the optimization is disabled: []
  */
 NPY_VISIBILITY_HIDDEN PyObject *
diff --git a/numpy/core/tests/test_cpu_features.py b/numpy/core/tests/test_cpu_features.py
index 2ccbff41c..706cf7a7e 100644
--- a/numpy/core/tests/test_cpu_features.py
+++ b/numpy/core/tests/test_cpu_features.py
@@ -146,6 +146,17 @@ class Test_POWER_Features(AbstractTest):
     def load_flags(self):
         self.load_flags_auxv()
 
+
+is_zarch = re.match("^(s390x)", machine, re.IGNORECASE)
+@pytest.mark.skipif(not is_linux or not is_zarch,
+                    reason="Only for Linux and IBM Z")
+class Test_ZARCH_Features(AbstractTest):
+    features = ["VX", "VXE", "VXE2"]
+
+    def load_flags(self):
+        self.load_flags_auxv()
+
+
 is_arm = re.match("^(arm|aarch64)", machine, re.IGNORECASE)
 @pytest.mark.skipif(not is_linux or not is_arm, reason="Only for Linux and ARM")
 class Test_ARM_Features(AbstractTest):
diff --git a/numpy/distutils/ccompiler_opt.py b/numpy/distutils/ccompiler_opt.py
index b38e47c13..e020d96ee 100644
--- a/numpy/distutils/ccompiler_opt.py
+++ b/numpy/distutils/ccompiler_opt.py
@@ -228,6 +228,7 @@ class _Config:
         x64 = "SSE SSE2 SSE3",
         ppc64 = '', # play it safe
         ppc64le = "VSX VSX2",
+        s390x = '',
         armhf = '', # play it safe
         aarch64 = "NEON NEON_FP16 NEON_VFPV4 ASIMD"
     )
@@ -293,6 +294,13 @@ class _Config:
         VSX2 = dict(interest=2, implies="VSX", implies_detect=False),
         ## Power9/ISA 3.00
         VSX3 = dict(interest=3, implies="VSX2", implies_detect=False),
+        # IBM/Z
+        ## VX(z13) support
+        VX = dict(interest=1, headers="vecintrin.h"),
+        ## Vector-Enhancements Facility
+        VXE = dict(interest=2, implies="VX", implies_detect=False),
+        ## Vector-Enhancements Facility 2
+        VXE2 = dict(interest=3, implies="VXE", implies_detect=False),
         # ARM
         NEON  = dict(interest=1, headers="arm_neon.h"),
         NEON_FP16 = dict(interest=2, implies="NEON"),
@@ -472,6 +480,23 @@ class _Config:
 
             return partial
 
+        on_zarch = self.cc_on_s390x
+        if on_zarch:
+            partial = dict(
+                VX = dict(
+                    flags="-march=arch11 -mzvector"
+                ),
+                VXE = dict(
+                    flags="-march=arch12", implies_detect=False
+                ),
+                VXE2 = dict(
+                    flags="-march=arch13", implies_detect=False
+                )
+            )
+
+            return partial
+
+
         if self.cc_on_aarch64 and is_unix: return dict(
             NEON = dict(
                 implies="NEON_FP16 NEON_VFPV4 ASIMD", autovec=True
@@ -919,6 +944,7 @@ class _CCompiler:
             ("cc_on_ppc64",    ".*(powerpc|ppc)64.*"),
             ("cc_on_aarch64",  ".*(aarch64|arm64).*"),
             ("cc_on_armhf",    ".*arm.*"),
+            ("cc_on_s390x",    ".*s390x.*"),
             # undefined platform
             ("cc_on_noarch",    ""),
         )
@@ -983,7 +1009,8 @@ class _CCompiler:
             self.cc_is_gcc = True
 
         self.cc_march = "unknown"
-        for arch in ("x86", "x64", "ppc64", "ppc64le", "armhf", "aarch64"):
+        for arch in ("x86", "x64", "ppc64", "ppc64le", 
+                     "armhf", "aarch64", "s390x"):
             if getattr(self, "cc_on_" + arch):
                 self.cc_march = arch
                 break
diff --git a/numpy/distutils/checks/cpu_vx.c b/numpy/distutils/checks/cpu_vx.c
new file mode 100644
index 000000000..18fb7ef94
--- /dev/null
+++ b/numpy/distutils/checks/cpu_vx.c
@@ -0,0 +1,16 @@
+#if (__VEC__ < 10301) || (__ARCH__ < 11)
+    #error VX not supported
+#endif
+
+#include <vecintrin.h>
+int main(int argc, char **argv)
+{
+    __vector double x = vec_abs(vec_xl(argc, (double*)argv));
+    __vector double y = vec_load_len((double*)argv, (unsigned int)argc);
+
+    x = vec_round(vec_ceil(x) + vec_floor(y));
+    __vector bool long long m = vec_cmpge(x, y);
+    __vector long long i = vec_signed(vec_sel(x, y, m));
+
+    return (int)vec_extract(i, 0);
+}
diff --git a/numpy/distutils/checks/cpu_vxe.c b/numpy/distutils/checks/cpu_vxe.c
new file mode 100644
index 000000000..ca41f8434
--- /dev/null
+++ b/numpy/distutils/checks/cpu_vxe.c
@@ -0,0 +1,25 @@
+#if (__VEC__ < 10302) || (__ARCH__ < 12)
+    #error VXE not supported
+#endif
+
+#include <vecintrin.h>
+int main(int argc, char **argv)
+{
+    __vector float x = vec_nabs(vec_xl(argc, (float*)argv));
+    __vector float y = vec_load_len((float*)argv, (unsigned int)argc);
+    
+    x = vec_round(vec_ceil(x) + vec_floor(y));
+    __vector bool int m = vec_cmpge(x, y);
+    x = vec_sel(x, y, m);
+
+    // need to test the existance of intrin "vflls" since vec_doublee
+    // is vec_doublee maps to wrong intrin "vfll".
+    // see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100871
+#if defined(__GNUC__) && !defined(__clang__)
+    __vector long long i = vec_signed(__builtin_s390_vflls(x));
+#else
+    __vector long long i = vec_signed(vec_doublee(x));
+#endif
+
+    return (int)vec_extract(i, 0);
+}
diff --git a/numpy/distutils/checks/cpu_vxe2.c b/numpy/distutils/checks/cpu_vxe2.c
new file mode 100644
index 000000000..f36d57129
--- /dev/null
+++ b/numpy/distutils/checks/cpu_vxe2.c
@@ -0,0 +1,21 @@
+#if (__VEC__ < 10303) || (__ARCH__ < 13)
+    #error VXE2 not supported
+#endif
+
+#include <vecintrin.h>
+
+int main(int argc, char **argv)
+{
+    int val;
+    __vector signed short large = { 'a', 'b', 'c', 'a', 'g', 'h', 'g', 'o' };
+    __vector signed short search = { 'g', 'h', 'g', 'o' };
+    __vector unsigned char len = { 0 };
+    __vector unsigned char res = vec_search_string_cc(large, search, len, &val);
+    __vector float x = vec_xl(argc, (float*)argv);
+    __vector int i = vec_signed(x);
+
+    i = vec_srdb(vec_sldb(i, i, 2), i, 3);
+    val += (int)vec_extract(res, 1);
+    val += vec_extract(i, 0);
+    return val;
+}
diff --git a/numpy/distutils/command/build.py b/numpy/distutils/command/build.py
index a4fda537d..dc1ab3b9b 100644
--- a/numpy/distutils/command/build.py
+++ b/numpy/distutils/command/build.py
@@ -47,7 +47,8 @@ class build(old_build):
             - not part of dispatch-able features(--cpu-dispatch)
             - not supported by compiler or platform
         """
-        self.simd_test = "BASELINE SSE2 SSE42 XOP FMA4 (FMA3 AVX2) AVX512F AVX512_SKX VSX VSX2 VSX3 NEON ASIMD"
+        self.simd_test = "BASELINE SSE2 SSE42 XOP FMA4 (FMA3 AVX2) AVX512F" \
+                         " AVX512_SKX VSX VSX2 VSX3 NEON ASIMD VX VXE VXE2"
 
     def finalize_options(self):
         build_scripts = self.build_scripts
diff --git a/numpy/distutils/fcompiler/gnu.py b/numpy/distutils/fcompiler/gnu.py
index 39178071d..d8143328e 100644
--- a/numpy/distutils/fcompiler/gnu.py
+++ b/numpy/distutils/fcompiler/gnu.py
@@ -324,7 +324,7 @@ class Gnu95FCompiler(GnuFCompiler):
             c_archs[c_archs.index("i386")] = "i686"
         # check the arches the Fortran compiler supports, and compare with
         # arch flags from C compiler
-        for arch in ["ppc", "i686", "x86_64", "ppc64"]:
+        for arch in ["ppc", "i686", "x86_64", "ppc64", "s390x"]:
             if _can_target(cmd, arch) and arch in c_archs:
                 arch_flags.extend(["-arch", arch])
         return arch_flags
diff --git a/numpy/distutils/tests/test_ccompiler_opt.py b/numpy/distutils/tests/test_ccompiler_opt.py
index 1b27ab07c..6d42cc172 100644
--- a/numpy/distutils/tests/test_ccompiler_opt.py
+++ b/numpy/distutils/tests/test_ccompiler_opt.py
@@ -32,6 +32,7 @@ arch_compilers = dict(
     ppc64le = ("gcc", "clang"),
     armhf = ("gcc", "clang"),
     aarch64 = ("gcc", "clang"),
+    s390x = ("gcc", "clang"),
     noarch = ("gcc",)
 )
 
@@ -382,18 +383,19 @@ class _Test_CCompilerOpt:
             if o == "native" and self.cc_name() == "msvc":
                 continue
             self.expect(o,
-                trap_files=".*cpu_(sse|vsx|neon).c",
-                x86="", ppc64="", armhf=""
+                trap_files=".*cpu_(sse|vsx|neon|vx).c",
+                x86="", ppc64="", armhf="", s390x=""
             )
             self.expect(o,
-                trap_files=".*cpu_(sse3|vsx2|neon_vfpv4).c",
+                trap_files=".*cpu_(sse3|vsx2|neon_vfpv4|vxe).c",
                 x86="sse sse2", ppc64="vsx", armhf="neon neon_fp16",
-                aarch64="", ppc64le=""
+                aarch64="", ppc64le="", s390x="vx"
             )
             self.expect(o,
                 trap_files=".*cpu_(popcnt|vsx3).c",
                 x86="sse .* sse41", ppc64="vsx vsx2",
-                armhf="neon neon_fp16 .* asimd .*"
+                armhf="neon neon_fp16 .* asimd .*",
+                s390x="vx vxe vxe2"
             )
             self.expect(o,
                 x86_gcc=".* xop fma4 .* avx512f .* avx512_knl avx512_knm avx512_skx .*",
@@ -403,13 +405,14 @@ class _Test_CCompilerOpt:
                 # in msvc, avx512_knl avx512_knm aren't supported
                 x86_msvc=".* xop fma4 .* avx512f .* avx512_skx .*",
                 armhf=".* asimd asimdhp asimddp .*",
-                ppc64="vsx vsx2 vsx3.*"
+                ppc64="vsx vsx2 vsx3.*",
+                s390x="vx vxe vxe2.*"
             )
         # min
         self.expect("min",
             x86="sse sse2", x64="sse sse2 sse3",
             armhf="", aarch64="neon neon_fp16 .* asimd",
-            ppc64="", ppc64le="vsx vsx2"
+            ppc64="", ppc64le="vsx vsx2", s390x=""
         )
         self.expect(
             "min", trap_files=".*cpu_(sse2|vsx2).c",
@@ -420,7 +423,7 @@ class _Test_CCompilerOpt:
         try:
             self.expect("native",
                 trap_flags=".*(-march=native|-xHost|/QxHost).*",
-                x86=".*", ppc64=".*", armhf=".*"
+                x86=".*", ppc64=".*", armhf=".*", s390x=".*"
             )
             if self.march() != "unknown":
                 raise AssertionError(
@@ -432,14 +435,15 @@ class _Test_CCompilerOpt:
 
     def test_flags(self):
         self.expect_flags(
-            "sse sse2 vsx vsx2 neon neon_fp16",
+            "sse sse2 vsx vsx2 neon neon_fp16 vx vxe",
             x86_gcc="-msse -msse2", x86_icc="-msse -msse2",
             x86_iccw="/arch:SSE2",
             x86_msvc="/arch:SSE2" if self.march() == "x86" else "",
             ppc64_gcc= "-mcpu=power8",
             ppc64_clang="-maltivec -mvsx -mpower8-vector",
             armhf_gcc="-mfpu=neon-fp16 -mfp16-format=ieee",
-            aarch64=""
+            aarch64="",
+            s390="-mzvector -march=arch12"
         )
         # testing normalize -march
         self.expect_flags(
@@ -484,7 +488,7 @@ class _Test_CCompilerOpt:
             try:
                 self.expect_targets(
                     targets,
-                    x86="", armhf="", ppc64=""
+                    x86="", armhf="", ppc64="", s390x=""
                 )
                 if self.march() != "unknown":
                     raise AssertionError(
@@ -496,26 +500,26 @@ class _Test_CCompilerOpt:
 
     def test_targets_syntax(self):
         for targets in (
-            "/*@targets $keep_baseline sse vsx neon*/",
-            "/*@targets,$keep_baseline,sse,vsx,neon*/",
-            "/*@targets*$keep_baseline*sse*vsx*neon*/",
+            "/*@targets $keep_baseline sse vsx neon vx*/",
+            "/*@targets,$keep_baseline,sse,vsx,neon vx*/",
+            "/*@targets*$keep_baseline*sse*vsx*neon*vx*/",
             """
             /*
             ** @targets
-            ** $keep_baseline, sse vsx,neon
+            ** $keep_baseline, sse vsx,neon, vx
             */
             """,
             """
             /*
-            ************@targets*************
-            ** $keep_baseline, sse vsx, neon
-            *********************************
+            ************@targets****************
+            ** $keep_baseline, sse vsx, neon, vx
+            ************************************
             */
             """,
             """
             /*
             /////////////@targets/////////////////
-            //$keep_baseline//sse//vsx//neon
+            //$keep_baseline//sse//vsx//neon//vx
             /////////////////////////////////////
             */
             """,
@@ -523,11 +527,11 @@ class _Test_CCompilerOpt:
             /*
             @targets
             $keep_baseline
-            SSE VSX NEON*/
+            SSE VSX NEON VX*/
             """
         ) :
             self.expect_targets(targets,
-                x86="sse", ppc64="vsx", armhf="neon", unknown=""
+                x86="sse", ppc64="vsx", armhf="neon", s390x="vx", unknown=""
             )
 
     def test_targets(self):
@@ -538,10 +542,12 @@ class _Test_CCompilerOpt:
                 sse sse2 sse41 avx avx2 avx512f
                 vsx vsx2 vsx3
                 neon neon_fp16 asimdhp asimddp
+                vx vxe vxe2
             */
             """,
-            baseline="avx vsx2 asimd",
-            x86="avx512f avx2", armhf="asimddp asimdhp", ppc64="vsx3"
+            baseline="avx vsx2 asimd vx vxe",
+            x86="avx512f avx2", armhf="asimddp asimdhp", ppc64="vsx3",
+            s390x="vxe2"
         )
         # test skipping non-dispatch features
         self.expect_targets(
@@ -550,10 +556,11 @@ class _Test_CCompilerOpt:
                 sse41 avx avx2 avx512f
                 vsx2 vsx3
                 asimd asimdhp asimddp
+                vx vxe vxe2
             */
             """,
-            baseline="", dispatch="sse41 avx2 vsx2 asimd asimddp",
-            x86="avx2 sse41", armhf="asimddp asimd", ppc64="vsx2"
+            baseline="", dispatch="sse41 avx2 vsx2 asimd asimddp vxe2",
+            x86="avx2 sse41", armhf="asimddp asimd", ppc64="vsx2", s390x="vxe2"
         )
         # test skipping features that not supported
         self.expect_targets(
@@ -562,11 +569,13 @@ class _Test_CCompilerOpt:
                 sse2 sse41 avx2 avx512f
                 vsx2 vsx3
                 neon asimdhp asimddp
+                vx vxe vxe2
             */
             """,
             baseline="",
-            trap_files=".*(avx2|avx512f|vsx3|asimddp).c",
-            x86="sse41 sse2", ppc64="vsx2", armhf="asimdhp neon"
+            trap_files=".*(avx2|avx512f|vsx3|asimddp|vxe2).c",
+            x86="sse41 sse2", ppc64="vsx2", armhf="asimdhp neon",
+            s390x="vxe vx"
         )
         # test skipping features that implies each other
         self.expect_targets(
@@ -598,14 +607,16 @@ class _Test_CCompilerOpt:
                 sse2 sse42 avx2 avx512f
                 vsx2 vsx3
                 neon neon_vfpv4 asimd asimddp
+                vx vxe vxe2
             */
             """,
-            baseline="sse41 avx2 vsx2 asimd vsx3",
+            baseline="sse41 avx2 vsx2 asimd vsx3 vxe",
             x86="avx512f avx2 sse42 sse2",
             ppc64="vsx3 vsx2",
             armhf="asimddp asimd neon_vfpv4 neon",
             # neon, neon_vfpv4, asimd implies each other
-            aarch64="asimddp asimd"
+            aarch64="asimddp asimd",
+            s390x="vxe2 vxe vx"
         )
         # 'keep_sort', leave the sort as-is
         self.expect_targets(
@@ -615,13 +626,15 @@ class _Test_CCompilerOpt:
                 avx512f sse42 avx2 sse2
                 vsx2 vsx3
                 asimd neon neon_vfpv4 asimddp
+                vxe vxe2
             */
             """,
             x86="avx512f sse42 avx2 sse2",
             ppc64="vsx2 vsx3",
             armhf="asimd neon neon_vfpv4 asimddp",
             # neon, neon_vfpv4, asimd implies each other
-            aarch64="asimd asimddp"
+            aarch64="asimd asimddp",
+            s390x="vxe vxe2"
         )
         # 'autovec', skipping features that can't be
         # vectorized by the compiler
@@ -736,11 +749,13 @@ class _Test_CCompilerOpt:
                 (sse41 avx sse42) (sse3 avx2 avx512f)
                 (vsx vsx3 vsx2)
                 (asimddp neon neon_vfpv4 asimd asimdhp)
+                (vx vxe vxe2)
             */
             """,
             x86="avx avx512f",
             ppc64="vsx3",
             armhf=r"\(asimdhp asimddp\)",
+            s390x="vxe2"
         )
         # test compiler variety and avoiding duplicating
         self.expect_targets(
author	Matti Picus <matti.picus@gmail.com>	2021-12-14 09:16:46 -0700
committer	GitHub <noreply@github.com>	2021-12-14 09:16:46 -0700
commit	8111d51af2c743c1ff90dc014ede49564a8f22d9 (patch)
tree	bb66fbc530f98bd21bff75a0c2e5fab24663ec73
parent	77c3254f2aac861fc4da75fd37e193ed73b42d47 (diff)
parent	8032ff73544ad4bb405c4a0f3bd983f86b273e2d (diff)
download	numpy-8111d51af2c743c1ff90dc014ede49564a8f22d9.tar.gz