summaryrefslogtreecommitdiff
path: root/numpy
diff options
context:
space:
mode:
Diffstat (limited to 'numpy')
-rw-r--r--numpy/core/src/common/npy_cpu_features.c.src37
-rw-r--r--numpy/core/src/common/npy_cpu_features.h11
-rw-r--r--numpy/distutils/ccompiler_opt.py33
-rw-r--r--numpy/distutils/checks/cpu_vx.c15
-rw-r--r--numpy/distutils/checks/cpu_vxe.c18
-rw-r--r--numpy/distutils/checks/cpu_vxe2.c19
-rw-r--r--numpy/distutils/command/build.py2
-rw-r--r--numpy/distutils/fcompiler/gnu.py2
8 files changed, 134 insertions, 3 deletions
diff --git a/numpy/core/src/common/npy_cpu_features.c.src b/numpy/core/src/common/npy_cpu_features.c.src
index a2383c45f..a7cc7b36a 100644
--- a/numpy/core/src/common/npy_cpu_features.c.src
+++ b/numpy/core/src/common/npy_cpu_features.c.src
@@ -62,6 +62,7 @@ npy_cpu_features_dict(void)
* AVX512IFMA, AVX512VBMI, AVX512VBMI2, AVX512BITALG,
* AVX512_KNL, AVX512_KNM, AVX512_SKX, AVX512_CLX, AVX512_CNL, AVX512_ICL,
* VSX, VSX2, VSX3,
+ * VX, VXE, VXE2,
* NEON, NEON_FP16, NEON_VFPV4, ASIMD, FPHP, ASIMDHP, ASIMDDP, ASIMDFHM#
*/
if (PyDict_SetItemString(dict, "@feature@",
@@ -509,6 +510,42 @@ npy__cpu_init_features(void)
#endif
}
+/***************** ZARCH ******************/
+
+#elif defined(NPY_CPU_S390) || defined(__s390x__)
+
+#include <sys/auxv.h>
+#ifndef HWCAP_S390_VXE
+ #define HWCAP_S390_VXE 8192
+#endif
+
+#ifndef HWCAP_S390_VXRS_EXT2
+ #define HWCAP_S390_VXRS_EXT2 32768
+#endif
+
+static void
+npy__cpu_init_features(void)
+{
+ memset(npy__cpu_have, 0, sizeof(npy__cpu_have[0]) * NPY_CPU_FEATURE_MAX);
+
+ unsigned int hwcap = getauxval(AT_HWCAP);
+ if ((hwcap & HWCAP_S390_VX) == 0)
+ return ;
+
+ if (hwcap & HWCAP_S390_VXRS_EXT2)
+ {
+ npy__cpu_have[NPY_CPU_FEATURE_VX] =
+ npy__cpu_have[NPY_CPU_FEATURE_VXE] =
+ npy__cpu_have[NPY_CPU_FEATURE_VXE2] = 1;
+ return ;
+ }
+
+ npy__cpu_have[NPY_CPU_FEATURE_VXE] = (hwcap & HWCAP_S390_VXE) != 0;
+
+ npy__cpu_have[NPY_CPU_FEATURE_VX] = 1;
+}
+
+
/***************** ARM ******************/
#elif defined(__arm__) || defined(__aarch64__)
diff --git a/numpy/core/src/common/npy_cpu_features.h b/numpy/core/src/common/npy_cpu_features.h
index ce1fc822a..c63e594b0 100644
--- a/numpy/core/src/common/npy_cpu_features.h
+++ b/numpy/core/src/common/npy_cpu_features.h
@@ -82,6 +82,15 @@ enum npy_cpu_features
// ARMv8.2 single&half-precision multiply
NPY_CPU_FEATURE_ASIMDFHM = 307,
+ // IBM/ZARCH
+ NPY_CPU_FEATURE_VX = 308,
+
+ // Vector-Enhancements Facility 1
+ NPY_CPU_FEATURE_VXE = 309,
+
+ // Vector-Enhancements Facility 2
+ NPY_CPU_FEATURE_VXE2 = 310,
+
NPY_CPU_FEATURE_MAX
};
@@ -138,6 +147,7 @@ npy_cpu_features_dict(void);
* On aarch64: ['NEON', 'NEON_FP16', 'NEON_VPFV4', 'ASIMD']
* On ppc64: []
* On ppc64le: ['VSX', 'VSX2']
+ * On s390x: ['VX', 'VXE', VXE2 ]
* On any other arch or if the optimization is disabled: []
*/
NPY_VISIBILITY_HIDDEN PyObject *
@@ -159,6 +169,7 @@ npy_cpu_baseline_list(void);
* On aarch64: ['ASIMDHP', 'ASIMDDP', 'ASIMDFHM']
* On ppc64: ['VSX', 'VSX2', 'VSX3']
* On ppc64le: ['VSX3']
+ * On s390x: ['VX', 'VXE', VXE2]
* On any other arch or if the optimization is disabled: []
*/
NPY_VISIBILITY_HIDDEN PyObject *
diff --git a/numpy/distutils/ccompiler_opt.py b/numpy/distutils/ccompiler_opt.py
index b38e47c13..834ade16c 100644
--- a/numpy/distutils/ccompiler_opt.py
+++ b/numpy/distutils/ccompiler_opt.py
@@ -228,6 +228,7 @@ class _Config:
x64 = "SSE SSE2 SSE3",
ppc64 = '', # play it safe
ppc64le = "VSX VSX2",
+ s390x = "VX",
armhf = '', # play it safe
aarch64 = "NEON NEON_FP16 NEON_VFPV4 ASIMD"
)
@@ -293,6 +294,13 @@ class _Config:
VSX2 = dict(interest=2, implies="VSX", implies_detect=False),
## Power9/ISA 3.00
VSX3 = dict(interest=3, implies="VSX2", implies_detect=False),
+ # IBM/Z
+ ## VX(z13) support
+ VX = dict(interest=1, headers="vecintrin.h"),
+ ## Vector-Enhancements Facility
+ VXE = dict(interest=2, implies="VX",implies_detect=False),
+ ## Vector-Enhancements Facility 2
+ VXE2 = dict(interest=3, implies="VXE",implies_detect=False),
# ARM
NEON = dict(interest=1, headers="arm_neon.h"),
NEON_FP16 = dict(interest=2, implies="NEON"),
@@ -472,6 +480,28 @@ class _Config:
return partial
+
+ on_zarch = self.cc_on_s390x
+ if on_zarch:
+ partial = dict(
+ VX = dict(
+ flags="-march=arch11 -mzvector"
+ ),
+ VXE = dict(
+ flags="-march=arch12 -mzvector", implies_detect=False
+ ),
+ VXE2 = dict(
+ flags="-march=arch13 -mzvector", implies_detect=False
+ )
+ )
+ if self.cc_is_clang:
+ partial["VX"]["flags"] = "-march=arch11 -mzvector"
+ partial["VXE"]["flags"] = "-march=arch12 -mzvector"
+ partial["VXE2"]["flags"] = "-march=arch13 -mzvector"
+
+ return partial
+
+
if self.cc_on_aarch64 and is_unix: return dict(
NEON = dict(
implies="NEON_FP16 NEON_VFPV4 ASIMD", autovec=True
@@ -919,6 +949,7 @@ class _CCompiler:
("cc_on_ppc64", ".*(powerpc|ppc)64.*"),
("cc_on_aarch64", ".*(aarch64|arm64).*"),
("cc_on_armhf", ".*arm.*"),
+ ("cc_on_s390x", ".*s390x.*"),
# undefined platform
("cc_on_noarch", ""),
)
@@ -983,7 +1014,7 @@ class _CCompiler:
self.cc_is_gcc = True
self.cc_march = "unknown"
- for arch in ("x86", "x64", "ppc64", "ppc64le", "armhf", "aarch64"):
+ for arch in ("x86", "x64", "ppc64", "ppc64le", "armhf", "aarch64", "s390x"):
if getattr(self, "cc_on_" + arch):
self.cc_march = arch
break
diff --git a/numpy/distutils/checks/cpu_vx.c b/numpy/distutils/checks/cpu_vx.c
new file mode 100644
index 000000000..be854ccb9
--- /dev/null
+++ b/numpy/distutils/checks/cpu_vx.c
@@ -0,0 +1,15 @@
+#if (__VEC__ < 10301) || (__ARCH__ < 11)
+ #error VX not supported
+#endif
+
+#include <vecintrin.h>
+#include<stdio.h>
+
+__vector int input= {1, 2, 4, 5 };
+
+int main(void)
+{
+ __vector int out;
+ out = vec_abs(input);
+ return out[0];
+}
diff --git a/numpy/distutils/checks/cpu_vxe.c b/numpy/distutils/checks/cpu_vxe.c
new file mode 100644
index 000000000..e1d1d5fae
--- /dev/null
+++ b/numpy/distutils/checks/cpu_vxe.c
@@ -0,0 +1,18 @@
+#if (__VEC__ < 10302) || (__ARCH__ < 12)
+ #error VXE not supported
+#endif
+
+#include <vecintrin.h>
+#include <stdio.h>
+
+int main(void) {
+ __vector float a = {
+ 25.0, 36.0, 81.0, 100.0
+ };
+
+ __vector float d;
+
+ d = vec_nabs(a);
+
+ return (int)d[0];
+}
diff --git a/numpy/distutils/checks/cpu_vxe2.c b/numpy/distutils/checks/cpu_vxe2.c
new file mode 100644
index 000000000..9a7c8c872
--- /dev/null
+++ b/numpy/distutils/checks/cpu_vxe2.c
@@ -0,0 +1,19 @@
+#if (__VEC__ < 10303) || (__ARCH__ < 13)
+ #error VXE2 not supported
+#endif
+
+#include <vecintrin.h>
+#include <stdio.h>
+
+int main(void) {
+ int val;
+ vector signed short large = { 'a', 'b', 'c', 'a', 'g', 'h', 'g', 'o' };
+ vector signed short search = { 'g', 'h', 'g', 'o' };
+ vector unsigned char len = { 0 };
+ vector unsigned char res = vec_search_string_cc (large, search,
+ len, &val);
+ if (len[7] == 0 && res[7] != 0)
+ __builtin_abort ();
+
+ return val;
+}
diff --git a/numpy/distutils/command/build.py b/numpy/distutils/command/build.py
index a4fda537d..e5c507772 100644
--- a/numpy/distutils/command/build.py
+++ b/numpy/distutils/command/build.py
@@ -47,7 +47,7 @@ class build(old_build):
- not part of dispatch-able features(--cpu-dispatch)
- not supported by compiler or platform
"""
- self.simd_test = "BASELINE SSE2 SSE42 XOP FMA4 (FMA3 AVX2) AVX512F AVX512_SKX VSX VSX2 VSX3 NEON ASIMD"
+ self.simd_test = "BASELINE SSE2 SSE42 XOP FMA4 (FMA3 AVX2) AVX512F AVX512_SKX VSX VSX2 VSX3 NEON ASIMD VX VXE VXE2"
def finalize_options(self):
build_scripts = self.build_scripts
diff --git a/numpy/distutils/fcompiler/gnu.py b/numpy/distutils/fcompiler/gnu.py
index 39178071d..d8143328e 100644
--- a/numpy/distutils/fcompiler/gnu.py
+++ b/numpy/distutils/fcompiler/gnu.py
@@ -324,7 +324,7 @@ class Gnu95FCompiler(GnuFCompiler):
c_archs[c_archs.index("i386")] = "i686"
# check the arches the Fortran compiler supports, and compare with
# arch flags from C compiler
- for arch in ["ppc", "i686", "x86_64", "ppc64"]:
+ for arch in ["ppc", "i686", "x86_64", "ppc64", "s390x"]:
if _can_target(cmd, arch) and arch in c_archs:
arch_flags.extend(["-arch", arch])
return arch_flags