9 files changed, 88 insertions, 30 deletions
diff --git a/doc/source/reference/simd/generated_tables/cpu_features.inc b/doc/source/reference/simd/generated_tables/cpu_features.inc
index 17d1b4951..7782172d2 100644
--- a/doc/source/reference/simd/generated_tables/cpu_features.inc
+++ b/doc/source/reference/simd/generated_tables/cpu_features.inc
@@ -36,26 +36,28 @@ On IBM/POWER big-endian
 .. table::
     :align: left
 
-    ======== ================
-    Name     Implies         
-    ======== ================
-    ``VSX``                  
-    ``VSX2`` ``VSX``         
-    ``VSX3`` ``VSX`` ``VSX2``
-    ======== ================
+    ======== =========================
+    Name     Implies                  
+    ======== =========================
+    ``VSX``                           
+    ``VSX2`` ``VSX``                  
+    ``VSX3`` ``VSX`` ``VSX2``         
+    ``VSX4`` ``VSX`` ``VSX2`` ``VSX3``
+    ======== =========================
 
 On IBM/POWER little-endian
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. table::
     :align: left
 
-    ======== ================
-    Name     Implies         
-    ======== ================
-    ``VSX``  ``VSX2``        
-    ``VSX2`` ``VSX``         
-    ``VSX3`` ``VSX`` ``VSX2``
-    ======== ================
+    ======== =========================
+    Name     Implies                  
+    ======== =========================
+    ``VSX``  ``VSX2``                 
+    ``VSX2`` ``VSX``                  
+    ``VSX3`` ``VSX`` ``VSX2``         
+    ``VSX4`` ``VSX`` ``VSX2`` ``VSX3``
+    ======== =========================
 
 On ARMv7/A32
 ~~~~~~~~~~~~
diff --git a/numpy/core/src/common/npy_cpu_features.c.src b/numpy/core/src/common/npy_cpu_features.c.src
index 1385220f9..ff4f9f60a 100644
--- a/numpy/core/src/common/npy_cpu_features.c.src
+++ b/numpy/core/src/common/npy_cpu_features.c.src
@@ -61,7 +61,7 @@ npy_cpu_features_dict(void)
      *            AVX512VPOPCNTDQ, AVX512VL, AVX512BW, AVX512DQ, AVX512VNNI,
      *            AVX512IFMA, AVX512VBMI, AVX512VBMI2, AVX512BITALG,
      *            AVX512_KNL, AVX512_KNM, AVX512_SKX, AVX512_CLX, AVX512_CNL, AVX512_ICL,
-     *            VSX, VSX2, VSX3,
+     *            VSX, VSX2, VSX3, VSX4,
      *            VX, VXE, VXE2,
      *            NEON, NEON_FP16, NEON_VFPV4, ASIMD, FPHP, ASIMDHP, ASIMDDP, ASIMDFHM#
     */
@@ -474,9 +474,15 @@ npy__cpu_init_features(void)
     #ifndef AT_HWCAP2
         #define AT_HWCAP2 26
     #endif
+    #ifndef PPC_FEATURE2_ARCH_2_07
+        #define PPC_FEATURE2_ARCH_2_07 0x80000000
+    #endif
     #ifndef PPC_FEATURE2_ARCH_3_00
         #define PPC_FEATURE2_ARCH_3_00 0x00800000
     #endif
+    #ifndef PPC_FEATURE2_ARCH_3_1
+        #define PPC_FEATURE2_ARCH_3_1  0x00040000
+    #endif
 #endif
 
 static void
@@ -489,15 +495,18 @@ npy__cpu_init_features(void)
         return;
 
     hwcap = getauxval(AT_HWCAP2);
-    if (hwcap & PPC_FEATURE2_ARCH_3_00)
+    if (hwcap & PPC_FEATURE2_ARCH_3_1)
     {
         npy__cpu_have[NPY_CPU_FEATURE_VSX]  =
         npy__cpu_have[NPY_CPU_FEATURE_VSX2] =
-        npy__cpu_have[NPY_CPU_FEATURE_VSX3] = 1;
+        npy__cpu_have[NPY_CPU_FEATURE_VSX3] =
+        npy__cpu_have[NPY_CPU_FEATURE_VSX4] = 1;
         return;
     }
-    npy__cpu_have[NPY_CPU_FEATURE_VSX2] = (hwcap & PPC_FEATURE2_ARCH_2_07) != 0;
     npy__cpu_have[NPY_CPU_FEATURE_VSX]  = 1;
+    npy__cpu_have[NPY_CPU_FEATURE_VSX2] = (hwcap & PPC_FEATURE2_ARCH_2_07) != 0;
+    npy__cpu_have[NPY_CPU_FEATURE_VSX3] = (hwcap & PPC_FEATURE2_ARCH_3_00) != 0;
+    npy__cpu_have[NPY_CPU_FEATURE_VSX4] = (hwcap & PPC_FEATURE2_ARCH_3_1) != 0;
 // TODO: AIX, FreeBSD
 #else
     npy__cpu_have[NPY_CPU_FEATURE_VSX]  = 1;
@@ -507,6 +516,9 @@ npy__cpu_init_features(void)
     #ifdef NPY_HAVE_VSX3
     npy__cpu_have[NPY_CPU_FEATURE_VSX3] = 1;
     #endif
+    #ifdef NPY_HAVE_VSX4
+    npy__cpu_have[NPY_CPU_FEATURE_VSX4] = 1;
+    #endif
 #endif
 }
 
diff --git a/numpy/core/src/common/npy_cpu_features.h b/numpy/core/src/common/npy_cpu_features.h
index 1f52a445d..3d5f2e75c 100644
--- a/numpy/core/src/common/npy_cpu_features.h
+++ b/numpy/core/src/common/npy_cpu_features.h
@@ -65,6 +65,8 @@ enum npy_cpu_features
     NPY_CPU_FEATURE_VSX2              = 201,
     // POWER9
     NPY_CPU_FEATURE_VSX3              = 202,
+    // POWER10
+    NPY_CPU_FEATURE_VSX4              = 203,
 
     // ARM
     NPY_CPU_FEATURE_NEON              = 300,
@@ -167,8 +169,8 @@ npy_cpu_baseline_list(void);
  * On x64: ['SSSE3', 'SSE41', 'POPCNT', 'SSE42', 'AVX', 'F16C', 'FMA3', 'AVX2', 'AVX512F', ...]
  * On armhf: ['NEON', 'NEON_FP16', 'NEON_VPFV4', 'ASIMD', 'ASIMDHP', 'ASIMDDP', 'ASIMDFHM']
  * On aarch64: ['ASIMDHP', 'ASIMDDP', 'ASIMDFHM']
- * On ppc64:  ['VSX', 'VSX2', 'VSX3']
- * On ppc64le: ['VSX3']
+ * On ppc64:  ['VSX', 'VSX2', 'VSX3', 'VSX4']
+ * On ppc64le: ['VSX3', 'VSX4']
  * On s390x: ['VX', 'VXE', VXE2]
  * On any other arch or if the optimization is disabled: []
  */
diff --git a/numpy/core/tests/test_cpu_features.py b/numpy/core/tests/test_cpu_features.py
index 706cf7a7e..1a76897e2 100644
--- a/numpy/core/tests/test_cpu_features.py
+++ b/numpy/core/tests/test_cpu_features.py
@@ -140,8 +140,8 @@ class Test_X86_Features(AbstractTest):
 is_power = re.match("^(powerpc|ppc)64", machine, re.IGNORECASE)
 @pytest.mark.skipif(not is_linux or not is_power, reason="Only for Linux and Power")
 class Test_POWER_Features(AbstractTest):
-    features = ["VSX", "VSX2", "VSX3"]
-    features_map = dict(VSX2="ARCH_2_07", VSX3="ARCH_3_00")
+    features = ["VSX", "VSX2", "VSX3", "VSX4"]
+    features_map = dict(VSX2="ARCH_2_07", VSX3="ARCH_3_00", VSX4="ARCH_3_1")
 
     def load_flags(self):
         self.load_flags_auxv()
diff --git a/numpy/distutils/ccompiler_opt.py b/numpy/distutils/ccompiler_opt.py
index f1d024b94..854584998 100644
--- a/numpy/distutils/ccompiler_opt.py
+++ b/numpy/distutils/ccompiler_opt.py
@@ -294,6 +294,9 @@ class _Config:
         VSX2 = dict(interest=2, implies="VSX", implies_detect=False),
         ## Power9/ISA 3.00
         VSX3 = dict(interest=3, implies="VSX2", implies_detect=False),
+        ## Power10/ISA 3.1
+        VSX4 = dict(interest=4, implies="VSX3", implies_detect=False,
+                    extra_checks="VSX4_MMA"),
         # IBM/Z
         ## VX(z13) support
         VX = dict(interest=1, headers="vecintrin.h"),
@@ -471,12 +474,16 @@ class _Config:
                 ),
                 VSX3 = dict(
                     flags="-mcpu=power9 -mtune=power9", implies_detect=False
+                ),
+                VSX4 = dict(
+                    flags="-mcpu=power10 -mtune=power10", implies_detect=False
                 )
             )
             if self.cc_is_clang:
                 partial["VSX"]["flags"]  = "-maltivec -mvsx"
                 partial["VSX2"]["flags"] = "-mpower8-vector"
                 partial["VSX3"]["flags"] = "-mpower9-vector"
+                partial["VSX4"]["flags"] = "-mpower10-vector"
 
             return partial
 
diff --git a/numpy/distutils/checks/cpu_vsx4.c b/numpy/distutils/checks/cpu_vsx4.c
new file mode 100644
index 000000000..a6acc7384
--- /dev/null
+++ b/numpy/distutils/checks/cpu_vsx4.c
@@ -0,0 +1,14 @@
+#ifndef __VSX__
+    #error "VSX is not supported"
+#endif
+#include <altivec.h>
+
+typedef __vector unsigned int v_uint32x4;
+
+int main(void)
+{
+    v_uint32x4 v1 = (v_uint32x4){2, 4, 8, 16};
+    v_uint32x4 v2 = (v_uint32x4){2, 2, 2, 2};
+    v_uint32x4 v3 = vec_mod(v1, v2);
+    return (int)vec_extractm(v3);
+}
diff --git a/numpy/distutils/checks/extra_vsx4_mma.c b/numpy/distutils/checks/extra_vsx4_mma.c
new file mode 100644
index 000000000..a70b2a9f6
--- /dev/null
+++ b/numpy/distutils/checks/extra_vsx4_mma.c
@@ -0,0 +1,21 @@
+#ifndef __VSX__
+    #error "VSX is not supported"
+#endif
+#include <altivec.h>
+
+typedef __vector float fv4sf_t;
+typedef __vector unsigned char vec_t;
+
+int main(void)
+{
+    __vector_quad acc0;
+    float a[4] = {0,1,2,3};
+    float b[4] = {0,1,2,3};
+    vec_t *va = (vec_t *) a;
+    vec_t *vb = (vec_t *) b;
+    __builtin_mma_xvf32ger(&acc0, va[0], vb[0]);
+    fv4sf_t result[4];
+    __builtin_mma_disassemble_acc((void *)result, &acc0);
+    fv4sf_t c0 = result[0];
+    return (int)((float*)&c0)[0];
+}
diff --git a/numpy/distutils/command/build.py b/numpy/distutils/command/build.py
index dc1ab3b9b..80830d559 100644
--- a/numpy/distutils/command/build.py
+++ b/numpy/distutils/command/build.py
@@ -47,8 +47,8 @@ class build(old_build):
             - not part of dispatch-able features(--cpu-dispatch)
             - not supported by compiler or platform
         """
-        self.simd_test = "BASELINE SSE2 SSE42 XOP FMA4 (FMA3 AVX2) AVX512F" \
-                         " AVX512_SKX VSX VSX2 VSX3 NEON ASIMD VX VXE VXE2"
+        self.simd_test = "BASELINE SSE2 SSE42 XOP FMA4 (FMA3 AVX2) AVX512F " \
+                         "AVX512_SKX VSX VSX2 VSX3 VSX4 NEON ASIMD VX VXE VXE2"
 
     def finalize_options(self):
         build_scripts = self.build_scripts
diff --git a/numpy/distutils/tests/test_ccompiler_opt.py b/numpy/distutils/tests/test_ccompiler_opt.py
index 6f9970c75..1ca8bc09b 100644
--- a/numpy/distutils/tests/test_ccompiler_opt.py
+++ b/numpy/distutils/tests/test_ccompiler_opt.py
@@ -405,7 +405,7 @@ class _Test_CCompilerOpt:
                 # in msvc, avx512_knl avx512_knm aren't supported
                 x86_msvc=".* xop fma4 .* avx512f .* avx512_skx .*",
                 armhf=".* asimd asimdhp asimddp .*",
-                ppc64="vsx vsx2 vsx3.*",
+                ppc64="vsx vsx2 vsx3 vsx4.*",
                 s390x="vx vxe vxe2.*"
             )
         # min
@@ -544,13 +544,13 @@ class _Test_CCompilerOpt:
             """
             /*@targets
                 sse sse2 sse41 avx avx2 avx512f
-                vsx vsx2 vsx3
+                vsx vsx2 vsx3 vsx4
                 neon neon_fp16 asimdhp asimddp
                 vx vxe vxe2
             */
             """,
             baseline="avx vsx2 asimd vx vxe",
-            x86="avx512f avx2", armhf="asimddp asimdhp", ppc64="vsx3",
+            x86="avx512f avx2", armhf="asimddp asimdhp", ppc64="vsx4 vsx3",
             s390x="vxe2"
         )
         # test skipping non-dispatch features
@@ -558,7 +558,7 @@ class _Test_CCompilerOpt:
             """
             /*@targets
                 sse41 avx avx2 avx512f
-                vsx2 vsx3
+                vsx2 vsx3 vsx4
                 asimd asimdhp asimddp
                 vx vxe vxe2
             */
@@ -571,13 +571,13 @@ class _Test_CCompilerOpt:
             """
             /*@targets
                 sse2 sse41 avx2 avx512f
-                vsx2 vsx3
+                vsx2 vsx3 vsx4
                 neon asimdhp asimddp
                 vx vxe vxe2
             */
             """,
             baseline="",
-            trap_files=".*(avx2|avx512f|vsx3|asimddp|vxe2).c",
+            trap_files=".*(avx2|avx512f|vsx3|vsx4|asimddp|vxe2).c",
             x86="sse41 sse2", ppc64="vsx2", armhf="asimdhp neon",
             s390x="vxe vx"
         )