8 files changed, 134 insertions, 3 deletions
diff --git a/numpy/core/src/common/npy_cpu_features.c.src b/numpy/core/src/common/npy_cpu_features.c.src
index a2383c45f..a7cc7b36a 100644
--- a/numpy/core/src/common/npy_cpu_features.c.src
+++ b/numpy/core/src/common/npy_cpu_features.c.src
@@ -62,6 +62,7 @@ npy_cpu_features_dict(void)
      *            AVX512IFMA, AVX512VBMI, AVX512VBMI2, AVX512BITALG,
      *            AVX512_KNL, AVX512_KNM, AVX512_SKX, AVX512_CLX, AVX512_CNL, AVX512_ICL,
      *            VSX, VSX2, VSX3,
+     *            VX, VXE, VXE2,
      *            NEON, NEON_FP16, NEON_VFPV4, ASIMD, FPHP, ASIMDHP, ASIMDDP, ASIMDFHM#
     */
         if (PyDict_SetItemString(dict, "@feature@",
@@ -509,6 +510,42 @@ npy__cpu_init_features(void)
 #endif
 }
 
+/***************** ZARCH ******************/
+
+#elif defined(NPY_CPU_S390) || defined(__s390x__)
+
+#include <sys/auxv.h>
+#ifndef HWCAP_S390_VXE
+    #define HWCAP_S390_VXE 8192
+#endif
+
+#ifndef HWCAP_S390_VXRS_EXT2
+    #define HWCAP_S390_VXRS_EXT2 32768
+#endif
+
+static void
+npy__cpu_init_features(void)
+{
+    memset(npy__cpu_have, 0, sizeof(npy__cpu_have[0]) * NPY_CPU_FEATURE_MAX);
+    
+    unsigned int hwcap = getauxval(AT_HWCAP);
+    if ((hwcap & HWCAP_S390_VX) == 0)
+        return ; 
+
+    if (hwcap & HWCAP_S390_VXRS_EXT2)
+    {
+       npy__cpu_have[NPY_CPU_FEATURE_VX]  =
+       npy__cpu_have[NPY_CPU_FEATURE_VXE] =
+       npy__cpu_have[NPY_CPU_FEATURE_VXE2] = 1;
+       return ;
+    }
+    
+    npy__cpu_have[NPY_CPU_FEATURE_VXE] = (hwcap & HWCAP_S390_VXE) != 0;
+
+    npy__cpu_have[NPY_CPU_FEATURE_VX]  = 1;
+}
+
+
 /***************** ARM ******************/
 
 #elif defined(__arm__) || defined(__aarch64__)
diff --git a/numpy/core/src/common/npy_cpu_features.h b/numpy/core/src/common/npy_cpu_features.h
index ce1fc822a..c63e594b0 100644
--- a/numpy/core/src/common/npy_cpu_features.h
+++ b/numpy/core/src/common/npy_cpu_features.h
@@ -82,6 +82,15 @@ enum npy_cpu_features
     // ARMv8.2 single&half-precision multiply
     NPY_CPU_FEATURE_ASIMDFHM          = 307,
 
+    // IBM/ZARCH
+    NPY_CPU_FEATURE_VX                = 308,
+ 
+    // Vector-Enhancements Facility 1
+    NPY_CPU_FEATURE_VXE               = 309,
+
+    // Vector-Enhancements Facility 2
+    NPY_CPU_FEATURE_VXE2              = 310,
+
     NPY_CPU_FEATURE_MAX
 };
 
@@ -138,6 +147,7 @@ npy_cpu_features_dict(void);
  * On aarch64: ['NEON', 'NEON_FP16', 'NEON_VPFV4', 'ASIMD']
  * On ppc64: []
  * On ppc64le: ['VSX', 'VSX2']
+ * On s390x: ['VX', 'VXE', VXE2 ]
  * On any other arch or if the optimization is disabled: []
  */
 NPY_VISIBILITY_HIDDEN PyObject *
@@ -159,6 +169,7 @@ npy_cpu_baseline_list(void);
  * On aarch64: ['ASIMDHP', 'ASIMDDP', 'ASIMDFHM']
  * On ppc64:  ['VSX', 'VSX2', 'VSX3']
  * On ppc64le: ['VSX3']
+ * On s390x: ['VX', 'VXE', VXE2]
  * On any other arch or if the optimization is disabled: []
  */
 NPY_VISIBILITY_HIDDEN PyObject *
diff --git a/numpy/distutils/ccompiler_opt.py b/numpy/distutils/ccompiler_opt.py
index b38e47c13..834ade16c 100644
--- a/numpy/distutils/ccompiler_opt.py
+++ b/numpy/distutils/ccompiler_opt.py
@@ -228,6 +228,7 @@ class _Config:
         x64 = "SSE SSE2 SSE3",
         ppc64 = '', # play it safe
         ppc64le = "VSX VSX2",
+        s390x = "VX",
         armhf = '', # play it safe
         aarch64 = "NEON NEON_FP16 NEON_VFPV4 ASIMD"
     )
@@ -293,6 +294,13 @@ class _Config:
         VSX2 = dict(interest=2, implies="VSX", implies_detect=False),
         ## Power9/ISA 3.00
         VSX3 = dict(interest=3, implies="VSX2", implies_detect=False),
+        # IBM/Z
+        ## VX(z13) support
+        VX  = dict(interest=1, headers="vecintrin.h"),
+        ## Vector-Enhancements Facility
+        VXE = dict(interest=2, implies="VX",implies_detect=False),
+        ## Vector-Enhancements Facility 2
+        VXE2 = dict(interest=3, implies="VXE",implies_detect=False),
         # ARM
         NEON  = dict(interest=1, headers="arm_neon.h"),
         NEON_FP16 = dict(interest=2, implies="NEON"),
@@ -472,6 +480,28 @@ class _Config:
 
             return partial
 
+
+        on_zarch = self.cc_on_s390x
+        if on_zarch:
+            partial = dict(
+                VX = dict(
+                    flags="-march=arch11 -mzvector"
+                ),
+                VXE = dict(
+                    flags="-march=arch12 -mzvector", implies_detect=False
+                ),
+                VXE2 = dict(
+                    flags="-march=arch13 -mzvector", implies_detect=False
+                )
+            )
+            if self.cc_is_clang:
+                partial["VX"]["flags"]   = "-march=arch11 -mzvector"
+                partial["VXE"]["flags"]  = "-march=arch12 -mzvector"
+                partial["VXE2"]["flags"] = "-march=arch13 -mzvector"
+
+            return partial
+
+
         if self.cc_on_aarch64 and is_unix: return dict(
             NEON = dict(
                 implies="NEON_FP16 NEON_VFPV4 ASIMD", autovec=True
@@ -919,6 +949,7 @@ class _CCompiler:
             ("cc_on_ppc64",    ".*(powerpc|ppc)64.*"),
             ("cc_on_aarch64",  ".*(aarch64|arm64).*"),
             ("cc_on_armhf",    ".*arm.*"),
+            ("cc_on_s390x",    ".*s390x.*"),
             # undefined platform
             ("cc_on_noarch",    ""),
         )
@@ -983,7 +1014,7 @@ class _CCompiler:
             self.cc_is_gcc = True
 
         self.cc_march = "unknown"
-        for arch in ("x86", "x64", "ppc64", "ppc64le", "armhf", "aarch64"):
+        for arch in ("x86", "x64", "ppc64", "ppc64le", "armhf", "aarch64", "s390x"):
             if getattr(self, "cc_on_" + arch):
                 self.cc_march = arch
                 break
diff --git a/numpy/distutils/checks/cpu_vx.c b/numpy/distutils/checks/cpu_vx.c
new file mode 100644
index 000000000..be854ccb9
--- /dev/null
+++ b/numpy/distutils/checks/cpu_vx.c
@@ -0,0 +1,15 @@
+#if (__VEC__ < 10301) || (__ARCH__ < 11)
+    #error VX not supported
+#endif
+
+#include <vecintrin.h>
+#include<stdio.h>
+
+__vector int input= {1, 2, 4, 5 };
+
+int main(void)
+{
+   __vector int  out;
+   out = vec_abs(input);
+   return out[0];
+}
diff --git a/numpy/distutils/checks/cpu_vxe.c b/numpy/distutils/checks/cpu_vxe.c
new file mode 100644
index 000000000..e1d1d5fae
--- /dev/null
+++ b/numpy/distutils/checks/cpu_vxe.c
@@ -0,0 +1,18 @@
+#if (__VEC__ < 10302) || (__ARCH__ < 12)
+    #error VXE not supported
+#endif
+
+#include <vecintrin.h>
+#include <stdio.h>
+
+int main(void) {
+  __vector float a = {
+       25.0, 36.0, 81.0, 100.0
+  };
+
+  __vector float d;
+
+  d  = vec_nabs(a);
+
+  return (int)d[0];
+}
diff --git a/numpy/distutils/checks/cpu_vxe2.c b/numpy/distutils/checks/cpu_vxe2.c
new file mode 100644
index 000000000..9a7c8c872
--- /dev/null
+++ b/numpy/distutils/checks/cpu_vxe2.c
@@ -0,0 +1,19 @@
+#if (__VEC__ < 10303) || (__ARCH__ < 13)
+    #error VXE2 not supported
+#endif
+
+#include <vecintrin.h>
+#include <stdio.h>
+
+int main(void) {
+  int val;
+  vector signed short large = { 'a', 'b', 'c', 'a', 'g', 'h', 'g', 'o' };
+  vector signed short search = { 'g', 'h', 'g', 'o' };
+  vector unsigned char len = { 0 };
+  vector unsigned char res = vec_search_string_cc (large, search,
+						      len, &val);
+  if (len[7] == 0 && res[7] != 0)
+     __builtin_abort ();
+ 
+  return val;
+}
diff --git a/numpy/distutils/command/build.py b/numpy/distutils/command/build.py
index a4fda537d..e5c507772 100644
--- a/numpy/distutils/command/build.py
+++ b/numpy/distutils/command/build.py
@@ -47,7 +47,7 @@ class build(old_build):
             - not part of dispatch-able features(--cpu-dispatch)
             - not supported by compiler or platform
         """
-        self.simd_test = "BASELINE SSE2 SSE42 XOP FMA4 (FMA3 AVX2) AVX512F AVX512_SKX VSX VSX2 VSX3 NEON ASIMD"
+        self.simd_test = "BASELINE SSE2 SSE42 XOP FMA4 (FMA3 AVX2) AVX512F AVX512_SKX VSX VSX2 VSX3 NEON ASIMD VX VXE VXE2"
 
     def finalize_options(self):
         build_scripts = self.build_scripts
diff --git a/numpy/distutils/fcompiler/gnu.py b/numpy/distutils/fcompiler/gnu.py
index 39178071d..d8143328e 100644
--- a/numpy/distutils/fcompiler/gnu.py
+++ b/numpy/distutils/fcompiler/gnu.py
@@ -324,7 +324,7 @@ class Gnu95FCompiler(GnuFCompiler):
             c_archs[c_archs.index("i386")] = "i686"
         # check the arches the Fortran compiler supports, and compare with
         # arch flags from C compiler
-        for arch in ["ppc", "i686", "x86_64", "ppc64"]:
+        for arch in ["ppc", "i686", "x86_64", "ppc64", "s390x"]:
             if _can_target(cmd, arch) and arch in c_archs:
                 arch_flags.extend(["-arch", arch])
         return arch_flags