diff options
author | Sayed Adel <seiko@imavr.com> | 2020-01-21 11:56:33 +0200 |
---|---|---|
committer | Sayed Adel <seiko@imavr.com> | 2020-02-05 05:09:21 +0200 |
commit | ad174001a869f42bb89ccff77ac3eec04a9d71e8 (patch) | |
tree | 3dbcad1b3e121d499ec2f072b7e80b32bb9a62ef /numpy/core/tests | |
parent | f71d9937d1e8a1e709f325f689f1e971e64c26a7 (diff) | |
download | numpy-ad174001a869f42bb89ccff77ac3eec04a9d71e8.tar.gz |
ENH: improve runtime detection of CPU features
- Put the old CPU detection code to rest
The current CPU detection code only supports x86 and
it's count on compiler built-in functions that not widely supported
by other compilers or platforms.
NOTE: `npy_cpu_supports` is removed rather than deprecated,
use the macro `NPY_CPU_HAVE(FEATURE_NAME_WITHOUT_QUOTES)` instead.
- Initialize the new CPU features runtime detector
Almost similar to GCC built-in functions,
so instead of `__builtin_cpu_init`, `__builtin_cpu_supports`
its provide `npy_cpu_init`, `npy_cpu_have` and `NPY_CPU_HAVE`.
NOTE: `npy_cpu_init` must be called before any use of
`npy_cpu_have` and `NPY_CPU_HAVE`, however `npy_cpu_init`
already called during the load of module `umath`
so there's no reason to call it again in most of the cases.
- Add X86 support
detect almost all x86 features, also provide
CPU feature groups that gather several features.
e.g. `AVX512_KNM` detect Knights Mill's `AVX512` features
- Add IBM/Power support
only supports Linux and count here on `glibc(getauxval)`
to detect VSX support and fail-back to the compiler definitions
for other platforms.
- Add ARM support
Same as IBM/Power but its parse `/proc/self/auxv`
if `glibc(getauxval)` isn't available.
- Update umath generator
- Add testing unit for Linux only
- Add new attribute `__cpu_features__` to umath module
`__cpu_features__` is a dictionary contains all supported
CPU feature names with runtime availability
Diffstat (limited to 'numpy/core/tests')
-rw-r--r-- | numpy/core/tests/test_cpu_features.py | 104 |
1 files changed, 104 insertions, 0 deletions
diff --git a/numpy/core/tests/test_cpu_features.py b/numpy/core/tests/test_cpu_features.py new file mode 100644 index 000000000..3b5cb3157 --- /dev/null +++ b/numpy/core/tests/test_cpu_features.py @@ -0,0 +1,104 @@ +import sys, platform, re, pytest + +from numpy.testing import assert_equal +from numpy.core._multiarray_umath import __cpu_features__ + +class AbstractTest(object): + features = [] + features_groups = {} + features_map = {} + features_flags = set() + + def load_flags(self): + # a hook + pass + + def test_features(self): + self.load_flags() + for gname, features in self.features_groups.items(): + test_features = [self.features_map.get(f, f) in self.features_flags for f in features] + assert_equal(__cpu_features__.get(gname), all(test_features)) + + for feature_name in self.features: + map_name = self.features_map.get(feature_name, feature_name) + cpu_have = map_name in self.features_flags + npy_have = __cpu_features__.get(feature_name) + assert_equal(npy_have, cpu_have) + + def load_flags_proc(self, magic_key): + with open('/proc/cpuinfo') as fd: + for line in fd: + if not line.startswith(magic_key): + continue + flags_value = [s.strip() for s in line.split(':', 1)] + if len(flags_value) == 2: + self.features_flags = self.features_flags.union(flags_value[1].upper().split()) + + def load_flags_auxv(self): + import subprocess + auxv = subprocess.check_output(['/bin/true'], env=dict(LD_SHOW_AUXV="1")) + for at in auxv.split(b'\n'): + if not at.startswith(b"AT_HWCAP"): + continue + hwcap_value = [s.strip() for s in at.split(b':', 1)] + if len(hwcap_value) == 2: + self.features_flags = self.features_flags.union( + hwcap_value[1].upper().decode().split() + ) + +is_linux = sys.platform.startswith('linux') +machine = platform.machine() +is_x86 = re.match("^(amd64|x86|i386|i686)", machine, re.IGNORECASE) +@pytest.mark.skipif(not is_linux or not is_x86, reason="Only for Linux and x86") +class Test_X86_Features(AbstractTest): + features = [ + "MMX", "SSE", "SSE2", "SSE3", "SSSE3", "SSE41", "POPCNT", "SSE42", + "AVX", "F16C", "XOP", "FMA4", "FMA3", "AVX2", "AVX512F", "AVX512CD", + "AVX512ER", "AVX512PF", "AVX5124FMAPS", "AVX5124VNNIW", "AVX512VPOPCNTDQ", + "AVX512VL", "AVX512BW", "AVX512DQ", "AVX512VNNI", "AVX512IFMA", + "AVX512VBMI", "AVX512VBMI2", "AVX512BITALG", + ] + features_groups = dict( + AVX512_KNL = ["AVX512F", "AVX512CD", "AVX512ER", "AVX512PF"], + AVX512_KNM = ["AVX512F", "AVX512CD", "AVX512ER", "AVX512PF", "AVX5124FMAPS", + "AVX5124VNNIW", "AVX512VPOPCNTDQ"], + AVX512_SKX = ["AVX512F", "AVX512CD", "AVX512BW", "AVX512DQ", "AVX512VL"], + AVX512_CLX = ["AVX512F", "AVX512CD", "AVX512BW", "AVX512DQ", "AVX512VL", "AVX512VNNI"], + AVX512_CNL = ["AVX512F", "AVX512CD", "AVX512BW", "AVX512DQ", "AVX512VL", "AVX512IFMA", + "AVX512VBMI"], + AVX512_ICL = ["AVX512F", "AVX512CD", "AVX512BW", "AVX512DQ", "AVX512VL", "AVX512IFMA", + "AVX512VBMI", "AVX512VNNI", "AVX512VBMI2", "AVX512BITALG", "AVX512VPOPCNTDQ"], + ) + features_map = dict( + SSE3="PNI", SSE41="SSE4_1", SSE42="SSE4_2", FMA3="FMA", + AVX512VNNI="AVX512_VNNI", AVX512BITALG="AVX512_BITALG", AVX512VBMI2="AVX512_VBMI2", + AVX5124FMAPS="AVX512_4FMAPS", AVX5124VNNIW="AVX512_4VNNIW", AVX512VPOPCNTDQ="AVX512_VPOPCNTDQ", + ) + def load_flags(self): + self.load_flags_proc("flags") + +is_power = re.match("^(powerpc|ppc)64", machine, re.IGNORECASE) +@pytest.mark.skipif(not is_linux or not is_power, reason="Only for Linux and Power") +class Test_POWER_Features(AbstractTest): + features = ["VSX", "VSX2", "VSX3"] + features_map = dict(VSX2="ARCH_2_07", VSX3="ARCH_3_00") + + def load_flags(self): + self.load_flags_auxv() + +is_arm = re.match("^(arm|aarch64)", machine, re.IGNORECASE) +@pytest.mark.skipif(not is_linux or not is_arm, reason="Only for Linux and ARM") +class Test_ARM_Features(AbstractTest): + features = [ + "NEON", "ASIMD", "FPHP", "ASIMDHP", "ASIMDDP", "ASIMDFHM" + ] + features_groups = dict( + NEON_FP16 = ["NEON", "HALF"], + NEON_VFPV4 = ["NEON", "VFPV4"], + ) + def load_flags(self): + self.load_flags_proc("Features") + if re.match("^(aarch64|AARCH64)", platform.machine()): + self.features_map = dict( + NEON="ASIMD", HALF="ASIMD", VFPV4="ASIMD" + ) |