summaryrefslogtreecommitdiff
path: root/numpy/core
diff options
context:
space:
mode:
authorRafael Cardoso Fernandes Sousa <rafaelcfsousa@ibm.com>2022-01-07 10:48:36 -0600
committerRafael Cardoso Fernandes Sousa <rafaelcfsousa@ibm.com>2022-01-13 14:47:51 -0600
commitd88104fd0e2f85ac4eb667fcd7161aaa1d41e11a (patch)
treedfcfeafda07a79d06d75bbaa104ebb7e2d51ced0 /numpy/core
parent7191d9a4773d77205349ac151f84b72c0ffcf848 (diff)
downloadnumpy-d88104fd0e2f85ac4eb667fcd7161aaa1d41e11a.tar.gz
ENH: Add CPU feature detection for VSX4 (Power10)
Diffstat (limited to 'numpy/core')
-rw-r--r--numpy/core/src/common/npy_cpu_features.c.src20
-rw-r--r--numpy/core/src/common/npy_cpu_features.h6
-rw-r--r--numpy/core/tests/test_cpu_features.py4
3 files changed, 22 insertions, 8 deletions
diff --git a/numpy/core/src/common/npy_cpu_features.c.src b/numpy/core/src/common/npy_cpu_features.c.src
index 1385220f9..ff4f9f60a 100644
--- a/numpy/core/src/common/npy_cpu_features.c.src
+++ b/numpy/core/src/common/npy_cpu_features.c.src
@@ -61,7 +61,7 @@ npy_cpu_features_dict(void)
* AVX512VPOPCNTDQ, AVX512VL, AVX512BW, AVX512DQ, AVX512VNNI,
* AVX512IFMA, AVX512VBMI, AVX512VBMI2, AVX512BITALG,
* AVX512_KNL, AVX512_KNM, AVX512_SKX, AVX512_CLX, AVX512_CNL, AVX512_ICL,
- * VSX, VSX2, VSX3,
+ * VSX, VSX2, VSX3, VSX4,
* VX, VXE, VXE2,
* NEON, NEON_FP16, NEON_VFPV4, ASIMD, FPHP, ASIMDHP, ASIMDDP, ASIMDFHM#
*/
@@ -474,9 +474,15 @@ npy__cpu_init_features(void)
#ifndef AT_HWCAP2
#define AT_HWCAP2 26
#endif
+ #ifndef PPC_FEATURE2_ARCH_2_07
+ #define PPC_FEATURE2_ARCH_2_07 0x80000000
+ #endif
#ifndef PPC_FEATURE2_ARCH_3_00
#define PPC_FEATURE2_ARCH_3_00 0x00800000
#endif
+ #ifndef PPC_FEATURE2_ARCH_3_1
+ #define PPC_FEATURE2_ARCH_3_1 0x00040000
+ #endif
#endif
static void
@@ -489,15 +495,18 @@ npy__cpu_init_features(void)
return;
hwcap = getauxval(AT_HWCAP2);
- if (hwcap & PPC_FEATURE2_ARCH_3_00)
+ if (hwcap & PPC_FEATURE2_ARCH_3_1)
{
npy__cpu_have[NPY_CPU_FEATURE_VSX] =
npy__cpu_have[NPY_CPU_FEATURE_VSX2] =
- npy__cpu_have[NPY_CPU_FEATURE_VSX3] = 1;
+ npy__cpu_have[NPY_CPU_FEATURE_VSX3] =
+ npy__cpu_have[NPY_CPU_FEATURE_VSX4] = 1;
return;
}
- npy__cpu_have[NPY_CPU_FEATURE_VSX2] = (hwcap & PPC_FEATURE2_ARCH_2_07) != 0;
npy__cpu_have[NPY_CPU_FEATURE_VSX] = 1;
+ npy__cpu_have[NPY_CPU_FEATURE_VSX2] = (hwcap & PPC_FEATURE2_ARCH_2_07) != 0;
+ npy__cpu_have[NPY_CPU_FEATURE_VSX3] = (hwcap & PPC_FEATURE2_ARCH_3_00) != 0;
+ npy__cpu_have[NPY_CPU_FEATURE_VSX4] = (hwcap & PPC_FEATURE2_ARCH_3_1) != 0;
// TODO: AIX, FreeBSD
#else
npy__cpu_have[NPY_CPU_FEATURE_VSX] = 1;
@@ -507,6 +516,9 @@ npy__cpu_init_features(void)
#ifdef NPY_HAVE_VSX3
npy__cpu_have[NPY_CPU_FEATURE_VSX3] = 1;
#endif
+ #ifdef NPY_HAVE_VSX4
+ npy__cpu_have[NPY_CPU_FEATURE_VSX4] = 1;
+ #endif
#endif
}
diff --git a/numpy/core/src/common/npy_cpu_features.h b/numpy/core/src/common/npy_cpu_features.h
index 1f52a445d..3d5f2e75c 100644
--- a/numpy/core/src/common/npy_cpu_features.h
+++ b/numpy/core/src/common/npy_cpu_features.h
@@ -65,6 +65,8 @@ enum npy_cpu_features
NPY_CPU_FEATURE_VSX2 = 201,
// POWER9
NPY_CPU_FEATURE_VSX3 = 202,
+ // POWER10
+ NPY_CPU_FEATURE_VSX4 = 203,
// ARM
NPY_CPU_FEATURE_NEON = 300,
@@ -167,8 +169,8 @@ npy_cpu_baseline_list(void);
* On x64: ['SSSE3', 'SSE41', 'POPCNT', 'SSE42', 'AVX', 'F16C', 'FMA3', 'AVX2', 'AVX512F', ...]
* On armhf: ['NEON', 'NEON_FP16', 'NEON_VPFV4', 'ASIMD', 'ASIMDHP', 'ASIMDDP', 'ASIMDFHM']
* On aarch64: ['ASIMDHP', 'ASIMDDP', 'ASIMDFHM']
- * On ppc64: ['VSX', 'VSX2', 'VSX3']
- * On ppc64le: ['VSX3']
+ * On ppc64: ['VSX', 'VSX2', 'VSX3', 'VSX4']
+ * On ppc64le: ['VSX3', 'VSX4']
* On s390x: ['VX', 'VXE', VXE2]
* On any other arch or if the optimization is disabled: []
*/
diff --git a/numpy/core/tests/test_cpu_features.py b/numpy/core/tests/test_cpu_features.py
index 706cf7a7e..1a76897e2 100644
--- a/numpy/core/tests/test_cpu_features.py
+++ b/numpy/core/tests/test_cpu_features.py
@@ -140,8 +140,8 @@ class Test_X86_Features(AbstractTest):
is_power = re.match("^(powerpc|ppc)64", machine, re.IGNORECASE)
@pytest.mark.skipif(not is_linux or not is_power, reason="Only for Linux and Power")
class Test_POWER_Features(AbstractTest):
- features = ["VSX", "VSX2", "VSX3"]
- features_map = dict(VSX2="ARCH_2_07", VSX3="ARCH_3_00")
+ features = ["VSX", "VSX2", "VSX3", "VSX4"]
+ features_map = dict(VSX2="ARCH_2_07", VSX3="ARCH_3_00", VSX4="ARCH_3_1")
def load_flags(self):
self.load_flags_auxv()