diff options
Diffstat (limited to 'numpy')
| -rw-r--r-- | numpy/core/src/common/npy_cpu_features.c.src | 37 | ||||
| -rw-r--r-- | numpy/core/src/common/npy_cpu_features.h | 11 | ||||
| -rw-r--r-- | numpy/distutils/ccompiler_opt.py | 33 | ||||
| -rw-r--r-- | numpy/distutils/checks/cpu_vx.c | 15 | ||||
| -rw-r--r-- | numpy/distutils/checks/cpu_vxe.c | 18 | ||||
| -rw-r--r-- | numpy/distutils/checks/cpu_vxe2.c | 19 | ||||
| -rw-r--r-- | numpy/distutils/command/build.py | 2 | ||||
| -rw-r--r-- | numpy/distutils/fcompiler/gnu.py | 2 |
8 files changed, 134 insertions, 3 deletions
diff --git a/numpy/core/src/common/npy_cpu_features.c.src b/numpy/core/src/common/npy_cpu_features.c.src index a2383c45f..a7cc7b36a 100644 --- a/numpy/core/src/common/npy_cpu_features.c.src +++ b/numpy/core/src/common/npy_cpu_features.c.src @@ -62,6 +62,7 @@ npy_cpu_features_dict(void) * AVX512IFMA, AVX512VBMI, AVX512VBMI2, AVX512BITALG, * AVX512_KNL, AVX512_KNM, AVX512_SKX, AVX512_CLX, AVX512_CNL, AVX512_ICL, * VSX, VSX2, VSX3, + * VX, VXE, VXE2, * NEON, NEON_FP16, NEON_VFPV4, ASIMD, FPHP, ASIMDHP, ASIMDDP, ASIMDFHM# */ if (PyDict_SetItemString(dict, "@feature@", @@ -509,6 +510,42 @@ npy__cpu_init_features(void) #endif } +/***************** ZARCH ******************/ + +#elif defined(NPY_CPU_S390) || defined(__s390x__) + +#include <sys/auxv.h> +#ifndef HWCAP_S390_VXE + #define HWCAP_S390_VXE 8192 +#endif + +#ifndef HWCAP_S390_VXRS_EXT2 + #define HWCAP_S390_VXRS_EXT2 32768 +#endif + +static void +npy__cpu_init_features(void) +{ + memset(npy__cpu_have, 0, sizeof(npy__cpu_have[0]) * NPY_CPU_FEATURE_MAX); + + unsigned int hwcap = getauxval(AT_HWCAP); + if ((hwcap & HWCAP_S390_VX) == 0) + return ; + + if (hwcap & HWCAP_S390_VXRS_EXT2) + { + npy__cpu_have[NPY_CPU_FEATURE_VX] = + npy__cpu_have[NPY_CPU_FEATURE_VXE] = + npy__cpu_have[NPY_CPU_FEATURE_VXE2] = 1; + return ; + } + + npy__cpu_have[NPY_CPU_FEATURE_VXE] = (hwcap & HWCAP_S390_VXE) != 0; + + npy__cpu_have[NPY_CPU_FEATURE_VX] = 1; +} + + /***************** ARM ******************/ #elif defined(__arm__) || defined(__aarch64__) diff --git a/numpy/core/src/common/npy_cpu_features.h b/numpy/core/src/common/npy_cpu_features.h index ce1fc822a..c63e594b0 100644 --- a/numpy/core/src/common/npy_cpu_features.h +++ b/numpy/core/src/common/npy_cpu_features.h @@ -82,6 +82,15 @@ enum npy_cpu_features // ARMv8.2 single&half-precision multiply NPY_CPU_FEATURE_ASIMDFHM = 307, + // IBM/ZARCH + NPY_CPU_FEATURE_VX = 308, + + // Vector-Enhancements Facility 1 + NPY_CPU_FEATURE_VXE = 309, + + // Vector-Enhancements Facility 2 + NPY_CPU_FEATURE_VXE2 = 310, + NPY_CPU_FEATURE_MAX }; @@ -138,6 +147,7 @@ npy_cpu_features_dict(void); * On aarch64: ['NEON', 'NEON_FP16', 'NEON_VPFV4', 'ASIMD'] * On ppc64: [] * On ppc64le: ['VSX', 'VSX2'] + * On s390x: ['VX', 'VXE', VXE2 ] * On any other arch or if the optimization is disabled: [] */ NPY_VISIBILITY_HIDDEN PyObject * @@ -159,6 +169,7 @@ npy_cpu_baseline_list(void); * On aarch64: ['ASIMDHP', 'ASIMDDP', 'ASIMDFHM'] * On ppc64: ['VSX', 'VSX2', 'VSX3'] * On ppc64le: ['VSX3'] + * On s390x: ['VX', 'VXE', VXE2] * On any other arch or if the optimization is disabled: [] */ NPY_VISIBILITY_HIDDEN PyObject * diff --git a/numpy/distutils/ccompiler_opt.py b/numpy/distutils/ccompiler_opt.py index b38e47c13..834ade16c 100644 --- a/numpy/distutils/ccompiler_opt.py +++ b/numpy/distutils/ccompiler_opt.py @@ -228,6 +228,7 @@ class _Config: x64 = "SSE SSE2 SSE3", ppc64 = '', # play it safe ppc64le = "VSX VSX2", + s390x = "VX", armhf = '', # play it safe aarch64 = "NEON NEON_FP16 NEON_VFPV4 ASIMD" ) @@ -293,6 +294,13 @@ class _Config: VSX2 = dict(interest=2, implies="VSX", implies_detect=False), ## Power9/ISA 3.00 VSX3 = dict(interest=3, implies="VSX2", implies_detect=False), + # IBM/Z + ## VX(z13) support + VX = dict(interest=1, headers="vecintrin.h"), + ## Vector-Enhancements Facility + VXE = dict(interest=2, implies="VX",implies_detect=False), + ## Vector-Enhancements Facility 2 + VXE2 = dict(interest=3, implies="VXE",implies_detect=False), # ARM NEON = dict(interest=1, headers="arm_neon.h"), NEON_FP16 = dict(interest=2, implies="NEON"), @@ -472,6 +480,28 @@ class _Config: return partial + + on_zarch = self.cc_on_s390x + if on_zarch: + partial = dict( + VX = dict( + flags="-march=arch11 -mzvector" + ), + VXE = dict( + flags="-march=arch12 -mzvector", implies_detect=False + ), + VXE2 = dict( + flags="-march=arch13 -mzvector", implies_detect=False + ) + ) + if self.cc_is_clang: + partial["VX"]["flags"] = "-march=arch11 -mzvector" + partial["VXE"]["flags"] = "-march=arch12 -mzvector" + partial["VXE2"]["flags"] = "-march=arch13 -mzvector" + + return partial + + if self.cc_on_aarch64 and is_unix: return dict( NEON = dict( implies="NEON_FP16 NEON_VFPV4 ASIMD", autovec=True @@ -919,6 +949,7 @@ class _CCompiler: ("cc_on_ppc64", ".*(powerpc|ppc)64.*"), ("cc_on_aarch64", ".*(aarch64|arm64).*"), ("cc_on_armhf", ".*arm.*"), + ("cc_on_s390x", ".*s390x.*"), # undefined platform ("cc_on_noarch", ""), ) @@ -983,7 +1014,7 @@ class _CCompiler: self.cc_is_gcc = True self.cc_march = "unknown" - for arch in ("x86", "x64", "ppc64", "ppc64le", "armhf", "aarch64"): + for arch in ("x86", "x64", "ppc64", "ppc64le", "armhf", "aarch64", "s390x"): if getattr(self, "cc_on_" + arch): self.cc_march = arch break diff --git a/numpy/distutils/checks/cpu_vx.c b/numpy/distutils/checks/cpu_vx.c new file mode 100644 index 000000000..be854ccb9 --- /dev/null +++ b/numpy/distutils/checks/cpu_vx.c @@ -0,0 +1,15 @@ +#if (__VEC__ < 10301) || (__ARCH__ < 11) + #error VX not supported +#endif + +#include <vecintrin.h> +#include<stdio.h> + +__vector int input= {1, 2, 4, 5 }; + +int main(void) +{ + __vector int out; + out = vec_abs(input); + return out[0]; +} diff --git a/numpy/distutils/checks/cpu_vxe.c b/numpy/distutils/checks/cpu_vxe.c new file mode 100644 index 000000000..e1d1d5fae --- /dev/null +++ b/numpy/distutils/checks/cpu_vxe.c @@ -0,0 +1,18 @@ +#if (__VEC__ < 10302) || (__ARCH__ < 12) + #error VXE not supported +#endif + +#include <vecintrin.h> +#include <stdio.h> + +int main(void) { + __vector float a = { + 25.0, 36.0, 81.0, 100.0 + }; + + __vector float d; + + d = vec_nabs(a); + + return (int)d[0]; +} diff --git a/numpy/distutils/checks/cpu_vxe2.c b/numpy/distutils/checks/cpu_vxe2.c new file mode 100644 index 000000000..9a7c8c872 --- /dev/null +++ b/numpy/distutils/checks/cpu_vxe2.c @@ -0,0 +1,19 @@ +#if (__VEC__ < 10303) || (__ARCH__ < 13) + #error VXE2 not supported +#endif + +#include <vecintrin.h> +#include <stdio.h> + +int main(void) { + int val; + vector signed short large = { 'a', 'b', 'c', 'a', 'g', 'h', 'g', 'o' }; + vector signed short search = { 'g', 'h', 'g', 'o' }; + vector unsigned char len = { 0 }; + vector unsigned char res = vec_search_string_cc (large, search, + len, &val); + if (len[7] == 0 && res[7] != 0) + __builtin_abort (); + + return val; +} diff --git a/numpy/distutils/command/build.py b/numpy/distutils/command/build.py index a4fda537d..e5c507772 100644 --- a/numpy/distutils/command/build.py +++ b/numpy/distutils/command/build.py @@ -47,7 +47,7 @@ class build(old_build): - not part of dispatch-able features(--cpu-dispatch) - not supported by compiler or platform """ - self.simd_test = "BASELINE SSE2 SSE42 XOP FMA4 (FMA3 AVX2) AVX512F AVX512_SKX VSX VSX2 VSX3 NEON ASIMD" + self.simd_test = "BASELINE SSE2 SSE42 XOP FMA4 (FMA3 AVX2) AVX512F AVX512_SKX VSX VSX2 VSX3 NEON ASIMD VX VXE VXE2" def finalize_options(self): build_scripts = self.build_scripts diff --git a/numpy/distutils/fcompiler/gnu.py b/numpy/distutils/fcompiler/gnu.py index 39178071d..d8143328e 100644 --- a/numpy/distutils/fcompiler/gnu.py +++ b/numpy/distutils/fcompiler/gnu.py @@ -324,7 +324,7 @@ class Gnu95FCompiler(GnuFCompiler): c_archs[c_archs.index("i386")] = "i686" # check the arches the Fortran compiler supports, and compare with # arch flags from C compiler - for arch in ["ppc", "i686", "x86_64", "ppc64"]: + for arch in ["ppc", "i686", "x86_64", "ppc64", "s390x"]: if _can_target(cmd, arch) and arch in c_archs: arch_flags.extend(["-arch", arch]) return arch_flags |
