summaryrefslogtreecommitdiff
path: root/numpy/core/tests
diff options
context:
space:
mode:
authorSayed Adel <seiko@imavr.com>2020-01-21 11:56:33 +0200
committerSayed Adel <seiko@imavr.com>2020-02-05 05:09:21 +0200
commitad174001a869f42bb89ccff77ac3eec04a9d71e8 (patch)
tree3dbcad1b3e121d499ec2f072b7e80b32bb9a62ef /numpy/core/tests
parentf71d9937d1e8a1e709f325f689f1e971e64c26a7 (diff)
downloadnumpy-ad174001a869f42bb89ccff77ac3eec04a9d71e8.tar.gz
ENH: improve runtime detection of CPU features
- Put the old CPU detection code to rest The current CPU detection code only supports x86 and it's count on compiler built-in functions that not widely supported by other compilers or platforms. NOTE: `npy_cpu_supports` is removed rather than deprecated, use the macro `NPY_CPU_HAVE(FEATURE_NAME_WITHOUT_QUOTES)` instead. - Initialize the new CPU features runtime detector Almost similar to GCC built-in functions, so instead of `__builtin_cpu_init`, `__builtin_cpu_supports` its provide `npy_cpu_init`, `npy_cpu_have` and `NPY_CPU_HAVE`. NOTE: `npy_cpu_init` must be called before any use of `npy_cpu_have` and `NPY_CPU_HAVE`, however `npy_cpu_init` already called during the load of module `umath` so there's no reason to call it again in most of the cases. - Add X86 support detect almost all x86 features, also provide CPU feature groups that gather several features. e.g. `AVX512_KNM` detect Knights Mill's `AVX512` features - Add IBM/Power support only supports Linux and count here on `glibc(getauxval)` to detect VSX support and fail-back to the compiler definitions for other platforms. - Add ARM support Same as IBM/Power but its parse `/proc/self/auxv` if `glibc(getauxval)` isn't available. - Update umath generator - Add testing unit for Linux only - Add new attribute `__cpu_features__` to umath module `__cpu_features__` is a dictionary contains all supported CPU feature names with runtime availability
Diffstat (limited to 'numpy/core/tests')
-rw-r--r--numpy/core/tests/test_cpu_features.py104
1 files changed, 104 insertions, 0 deletions
diff --git a/numpy/core/tests/test_cpu_features.py b/numpy/core/tests/test_cpu_features.py
new file mode 100644
index 000000000..3b5cb3157
--- /dev/null
+++ b/numpy/core/tests/test_cpu_features.py
@@ -0,0 +1,104 @@
+import sys, platform, re, pytest
+
+from numpy.testing import assert_equal
+from numpy.core._multiarray_umath import __cpu_features__
+
+class AbstractTest(object):
+ features = []
+ features_groups = {}
+ features_map = {}
+ features_flags = set()
+
+ def load_flags(self):
+ # a hook
+ pass
+
+ def test_features(self):
+ self.load_flags()
+ for gname, features in self.features_groups.items():
+ test_features = [self.features_map.get(f, f) in self.features_flags for f in features]
+ assert_equal(__cpu_features__.get(gname), all(test_features))
+
+ for feature_name in self.features:
+ map_name = self.features_map.get(feature_name, feature_name)
+ cpu_have = map_name in self.features_flags
+ npy_have = __cpu_features__.get(feature_name)
+ assert_equal(npy_have, cpu_have)
+
+ def load_flags_proc(self, magic_key):
+ with open('/proc/cpuinfo') as fd:
+ for line in fd:
+ if not line.startswith(magic_key):
+ continue
+ flags_value = [s.strip() for s in line.split(':', 1)]
+ if len(flags_value) == 2:
+ self.features_flags = self.features_flags.union(flags_value[1].upper().split())
+
+ def load_flags_auxv(self):
+ import subprocess
+ auxv = subprocess.check_output(['/bin/true'], env=dict(LD_SHOW_AUXV="1"))
+ for at in auxv.split(b'\n'):
+ if not at.startswith(b"AT_HWCAP"):
+ continue
+ hwcap_value = [s.strip() for s in at.split(b':', 1)]
+ if len(hwcap_value) == 2:
+ self.features_flags = self.features_flags.union(
+ hwcap_value[1].upper().decode().split()
+ )
+
+is_linux = sys.platform.startswith('linux')
+machine = platform.machine()
+is_x86 = re.match("^(amd64|x86|i386|i686)", machine, re.IGNORECASE)
+@pytest.mark.skipif(not is_linux or not is_x86, reason="Only for Linux and x86")
+class Test_X86_Features(AbstractTest):
+ features = [
+ "MMX", "SSE", "SSE2", "SSE3", "SSSE3", "SSE41", "POPCNT", "SSE42",
+ "AVX", "F16C", "XOP", "FMA4", "FMA3", "AVX2", "AVX512F", "AVX512CD",
+ "AVX512ER", "AVX512PF", "AVX5124FMAPS", "AVX5124VNNIW", "AVX512VPOPCNTDQ",
+ "AVX512VL", "AVX512BW", "AVX512DQ", "AVX512VNNI", "AVX512IFMA",
+ "AVX512VBMI", "AVX512VBMI2", "AVX512BITALG",
+ ]
+ features_groups = dict(
+ AVX512_KNL = ["AVX512F", "AVX512CD", "AVX512ER", "AVX512PF"],
+ AVX512_KNM = ["AVX512F", "AVX512CD", "AVX512ER", "AVX512PF", "AVX5124FMAPS",
+ "AVX5124VNNIW", "AVX512VPOPCNTDQ"],
+ AVX512_SKX = ["AVX512F", "AVX512CD", "AVX512BW", "AVX512DQ", "AVX512VL"],
+ AVX512_CLX = ["AVX512F", "AVX512CD", "AVX512BW", "AVX512DQ", "AVX512VL", "AVX512VNNI"],
+ AVX512_CNL = ["AVX512F", "AVX512CD", "AVX512BW", "AVX512DQ", "AVX512VL", "AVX512IFMA",
+ "AVX512VBMI"],
+ AVX512_ICL = ["AVX512F", "AVX512CD", "AVX512BW", "AVX512DQ", "AVX512VL", "AVX512IFMA",
+ "AVX512VBMI", "AVX512VNNI", "AVX512VBMI2", "AVX512BITALG", "AVX512VPOPCNTDQ"],
+ )
+ features_map = dict(
+ SSE3="PNI", SSE41="SSE4_1", SSE42="SSE4_2", FMA3="FMA",
+ AVX512VNNI="AVX512_VNNI", AVX512BITALG="AVX512_BITALG", AVX512VBMI2="AVX512_VBMI2",
+ AVX5124FMAPS="AVX512_4FMAPS", AVX5124VNNIW="AVX512_4VNNIW", AVX512VPOPCNTDQ="AVX512_VPOPCNTDQ",
+ )
+ def load_flags(self):
+ self.load_flags_proc("flags")
+
+is_power = re.match("^(powerpc|ppc)64", machine, re.IGNORECASE)
+@pytest.mark.skipif(not is_linux or not is_power, reason="Only for Linux and Power")
+class Test_POWER_Features(AbstractTest):
+ features = ["VSX", "VSX2", "VSX3"]
+ features_map = dict(VSX2="ARCH_2_07", VSX3="ARCH_3_00")
+
+ def load_flags(self):
+ self.load_flags_auxv()
+
+is_arm = re.match("^(arm|aarch64)", machine, re.IGNORECASE)
+@pytest.mark.skipif(not is_linux or not is_arm, reason="Only for Linux and ARM")
+class Test_ARM_Features(AbstractTest):
+ features = [
+ "NEON", "ASIMD", "FPHP", "ASIMDHP", "ASIMDDP", "ASIMDFHM"
+ ]
+ features_groups = dict(
+ NEON_FP16 = ["NEON", "HALF"],
+ NEON_VFPV4 = ["NEON", "VFPV4"],
+ )
+ def load_flags(self):
+ self.load_flags_proc("Features")
+ if re.match("^(aarch64|AARCH64)", platform.machine()):
+ self.features_map = dict(
+ NEON="ASIMD", HALF="ASIMD", VFPV4="ASIMD"
+ )