diff options
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/core/src/common/npy_cpu_features.c.src | 20 | ||||
-rw-r--r-- | numpy/core/src/common/npy_cpu_features.h | 6 | ||||
-rw-r--r-- | numpy/core/tests/test_cpu_features.py | 4 | ||||
-rw-r--r-- | numpy/distutils/ccompiler_opt.py | 7 | ||||
-rw-r--r-- | numpy/distutils/checks/cpu_vsx4.c | 14 | ||||
-rw-r--r-- | numpy/distutils/checks/extra_vsx4_mma.c | 21 | ||||
-rw-r--r-- | numpy/distutils/command/build.py | 4 | ||||
-rw-r--r-- | numpy/distutils/tests/test_ccompiler_opt.py | 12 |
8 files changed, 72 insertions, 16 deletions
diff --git a/numpy/core/src/common/npy_cpu_features.c.src b/numpy/core/src/common/npy_cpu_features.c.src index 1385220f9..ff4f9f60a 100644 --- a/numpy/core/src/common/npy_cpu_features.c.src +++ b/numpy/core/src/common/npy_cpu_features.c.src @@ -61,7 +61,7 @@ npy_cpu_features_dict(void) * AVX512VPOPCNTDQ, AVX512VL, AVX512BW, AVX512DQ, AVX512VNNI, * AVX512IFMA, AVX512VBMI, AVX512VBMI2, AVX512BITALG, * AVX512_KNL, AVX512_KNM, AVX512_SKX, AVX512_CLX, AVX512_CNL, AVX512_ICL, - * VSX, VSX2, VSX3, + * VSX, VSX2, VSX3, VSX4, * VX, VXE, VXE2, * NEON, NEON_FP16, NEON_VFPV4, ASIMD, FPHP, ASIMDHP, ASIMDDP, ASIMDFHM# */ @@ -474,9 +474,15 @@ npy__cpu_init_features(void) #ifndef AT_HWCAP2 #define AT_HWCAP2 26 #endif + #ifndef PPC_FEATURE2_ARCH_2_07 + #define PPC_FEATURE2_ARCH_2_07 0x80000000 + #endif #ifndef PPC_FEATURE2_ARCH_3_00 #define PPC_FEATURE2_ARCH_3_00 0x00800000 #endif + #ifndef PPC_FEATURE2_ARCH_3_1 + #define PPC_FEATURE2_ARCH_3_1 0x00040000 + #endif #endif static void @@ -489,15 +495,18 @@ npy__cpu_init_features(void) return; hwcap = getauxval(AT_HWCAP2); - if (hwcap & PPC_FEATURE2_ARCH_3_00) + if (hwcap & PPC_FEATURE2_ARCH_3_1) { npy__cpu_have[NPY_CPU_FEATURE_VSX] = npy__cpu_have[NPY_CPU_FEATURE_VSX2] = - npy__cpu_have[NPY_CPU_FEATURE_VSX3] = 1; + npy__cpu_have[NPY_CPU_FEATURE_VSX3] = + npy__cpu_have[NPY_CPU_FEATURE_VSX4] = 1; return; } - npy__cpu_have[NPY_CPU_FEATURE_VSX2] = (hwcap & PPC_FEATURE2_ARCH_2_07) != 0; npy__cpu_have[NPY_CPU_FEATURE_VSX] = 1; + npy__cpu_have[NPY_CPU_FEATURE_VSX2] = (hwcap & PPC_FEATURE2_ARCH_2_07) != 0; + npy__cpu_have[NPY_CPU_FEATURE_VSX3] = (hwcap & PPC_FEATURE2_ARCH_3_00) != 0; + npy__cpu_have[NPY_CPU_FEATURE_VSX4] = (hwcap & PPC_FEATURE2_ARCH_3_1) != 0; // TODO: AIX, FreeBSD #else npy__cpu_have[NPY_CPU_FEATURE_VSX] = 1; @@ -507,6 +516,9 @@ npy__cpu_init_features(void) #ifdef NPY_HAVE_VSX3 npy__cpu_have[NPY_CPU_FEATURE_VSX3] = 1; #endif + #ifdef NPY_HAVE_VSX4 + npy__cpu_have[NPY_CPU_FEATURE_VSX4] = 1; + #endif #endif } diff --git a/numpy/core/src/common/npy_cpu_features.h b/numpy/core/src/common/npy_cpu_features.h index 1f52a445d..3d5f2e75c 100644 --- a/numpy/core/src/common/npy_cpu_features.h +++ b/numpy/core/src/common/npy_cpu_features.h @@ -65,6 +65,8 @@ enum npy_cpu_features NPY_CPU_FEATURE_VSX2 = 201, // POWER9 NPY_CPU_FEATURE_VSX3 = 202, + // POWER10 + NPY_CPU_FEATURE_VSX4 = 203, // ARM NPY_CPU_FEATURE_NEON = 300, @@ -167,8 +169,8 @@ npy_cpu_baseline_list(void); * On x64: ['SSSE3', 'SSE41', 'POPCNT', 'SSE42', 'AVX', 'F16C', 'FMA3', 'AVX2', 'AVX512F', ...] * On armhf: ['NEON', 'NEON_FP16', 'NEON_VPFV4', 'ASIMD', 'ASIMDHP', 'ASIMDDP', 'ASIMDFHM'] * On aarch64: ['ASIMDHP', 'ASIMDDP', 'ASIMDFHM'] - * On ppc64: ['VSX', 'VSX2', 'VSX3'] - * On ppc64le: ['VSX3'] + * On ppc64: ['VSX', 'VSX2', 'VSX3', 'VSX4'] + * On ppc64le: ['VSX3', 'VSX4'] * On s390x: ['VX', 'VXE', VXE2] * On any other arch or if the optimization is disabled: [] */ diff --git a/numpy/core/tests/test_cpu_features.py b/numpy/core/tests/test_cpu_features.py index 706cf7a7e..1a76897e2 100644 --- a/numpy/core/tests/test_cpu_features.py +++ b/numpy/core/tests/test_cpu_features.py @@ -140,8 +140,8 @@ class Test_X86_Features(AbstractTest): is_power = re.match("^(powerpc|ppc)64", machine, re.IGNORECASE) @pytest.mark.skipif(not is_linux or not is_power, reason="Only for Linux and Power") class Test_POWER_Features(AbstractTest): - features = ["VSX", "VSX2", "VSX3"] - features_map = dict(VSX2="ARCH_2_07", VSX3="ARCH_3_00") + features = ["VSX", "VSX2", "VSX3", "VSX4"] + features_map = dict(VSX2="ARCH_2_07", VSX3="ARCH_3_00", VSX4="ARCH_3_1") def load_flags(self): self.load_flags_auxv() diff --git a/numpy/distutils/ccompiler_opt.py b/numpy/distutils/ccompiler_opt.py index f1d024b94..854584998 100644 --- a/numpy/distutils/ccompiler_opt.py +++ b/numpy/distutils/ccompiler_opt.py @@ -294,6 +294,9 @@ class _Config: VSX2 = dict(interest=2, implies="VSX", implies_detect=False), ## Power9/ISA 3.00 VSX3 = dict(interest=3, implies="VSX2", implies_detect=False), + ## Power10/ISA 3.1 + VSX4 = dict(interest=4, implies="VSX3", implies_detect=False, + extra_checks="VSX4_MMA"), # IBM/Z ## VX(z13) support VX = dict(interest=1, headers="vecintrin.h"), @@ -471,12 +474,16 @@ class _Config: ), VSX3 = dict( flags="-mcpu=power9 -mtune=power9", implies_detect=False + ), + VSX4 = dict( + flags="-mcpu=power10 -mtune=power10", implies_detect=False ) ) if self.cc_is_clang: partial["VSX"]["flags"] = "-maltivec -mvsx" partial["VSX2"]["flags"] = "-mpower8-vector" partial["VSX3"]["flags"] = "-mpower9-vector" + partial["VSX4"]["flags"] = "-mpower10-vector" return partial diff --git a/numpy/distutils/checks/cpu_vsx4.c b/numpy/distutils/checks/cpu_vsx4.c new file mode 100644 index 000000000..a6acc7384 --- /dev/null +++ b/numpy/distutils/checks/cpu_vsx4.c @@ -0,0 +1,14 @@ +#ifndef __VSX__ + #error "VSX is not supported" +#endif +#include <altivec.h> + +typedef __vector unsigned int v_uint32x4; + +int main(void) +{ + v_uint32x4 v1 = (v_uint32x4){2, 4, 8, 16}; + v_uint32x4 v2 = (v_uint32x4){2, 2, 2, 2}; + v_uint32x4 v3 = vec_mod(v1, v2); + return (int)vec_extractm(v3); +} diff --git a/numpy/distutils/checks/extra_vsx4_mma.c b/numpy/distutils/checks/extra_vsx4_mma.c new file mode 100644 index 000000000..a70b2a9f6 --- /dev/null +++ b/numpy/distutils/checks/extra_vsx4_mma.c @@ -0,0 +1,21 @@ +#ifndef __VSX__ + #error "VSX is not supported" +#endif +#include <altivec.h> + +typedef __vector float fv4sf_t; +typedef __vector unsigned char vec_t; + +int main(void) +{ + __vector_quad acc0; + float a[4] = {0,1,2,3}; + float b[4] = {0,1,2,3}; + vec_t *va = (vec_t *) a; + vec_t *vb = (vec_t *) b; + __builtin_mma_xvf32ger(&acc0, va[0], vb[0]); + fv4sf_t result[4]; + __builtin_mma_disassemble_acc((void *)result, &acc0); + fv4sf_t c0 = result[0]; + return (int)((float*)&c0)[0]; +} diff --git a/numpy/distutils/command/build.py b/numpy/distutils/command/build.py index dc1ab3b9b..80830d559 100644 --- a/numpy/distutils/command/build.py +++ b/numpy/distutils/command/build.py @@ -47,8 +47,8 @@ class build(old_build): - not part of dispatch-able features(--cpu-dispatch) - not supported by compiler or platform """ - self.simd_test = "BASELINE SSE2 SSE42 XOP FMA4 (FMA3 AVX2) AVX512F" \ - " AVX512_SKX VSX VSX2 VSX3 NEON ASIMD VX VXE VXE2" + self.simd_test = "BASELINE SSE2 SSE42 XOP FMA4 (FMA3 AVX2) AVX512F " \ + "AVX512_SKX VSX VSX2 VSX3 VSX4 NEON ASIMD VX VXE VXE2" def finalize_options(self): build_scripts = self.build_scripts diff --git a/numpy/distutils/tests/test_ccompiler_opt.py b/numpy/distutils/tests/test_ccompiler_opt.py index 6f9970c75..1ca8bc09b 100644 --- a/numpy/distutils/tests/test_ccompiler_opt.py +++ b/numpy/distutils/tests/test_ccompiler_opt.py @@ -405,7 +405,7 @@ class _Test_CCompilerOpt: # in msvc, avx512_knl avx512_knm aren't supported x86_msvc=".* xop fma4 .* avx512f .* avx512_skx .*", armhf=".* asimd asimdhp asimddp .*", - ppc64="vsx vsx2 vsx3.*", + ppc64="vsx vsx2 vsx3 vsx4.*", s390x="vx vxe vxe2.*" ) # min @@ -544,13 +544,13 @@ class _Test_CCompilerOpt: """ /*@targets sse sse2 sse41 avx avx2 avx512f - vsx vsx2 vsx3 + vsx vsx2 vsx3 vsx4 neon neon_fp16 asimdhp asimddp vx vxe vxe2 */ """, baseline="avx vsx2 asimd vx vxe", - x86="avx512f avx2", armhf="asimddp asimdhp", ppc64="vsx3", + x86="avx512f avx2", armhf="asimddp asimdhp", ppc64="vsx4 vsx3", s390x="vxe2" ) # test skipping non-dispatch features @@ -558,7 +558,7 @@ class _Test_CCompilerOpt: """ /*@targets sse41 avx avx2 avx512f - vsx2 vsx3 + vsx2 vsx3 vsx4 asimd asimdhp asimddp vx vxe vxe2 */ @@ -571,13 +571,13 @@ class _Test_CCompilerOpt: """ /*@targets sse2 sse41 avx2 avx512f - vsx2 vsx3 + vsx2 vsx3 vsx4 neon asimdhp asimddp vx vxe vxe2 */ """, baseline="", - trap_files=".*(avx2|avx512f|vsx3|asimddp|vxe2).c", + trap_files=".*(avx2|avx512f|vsx3|vsx4|asimddp|vxe2).c", x86="sse41 sse2", ppc64="vsx2", armhf="asimdhp neon", s390x="vxe vx" ) |