diff options
author | Matti Picus <matti.picus@gmail.com> | 2020-09-10 12:23:39 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-09-10 12:23:39 +0300 |
commit | 74712a53df240f1661fbced15ae984888fd9afa6 (patch) | |
tree | 2952220bfde815a673f99382a2cac849a51e781c | |
parent | 3a9588ae8041d611cb1df4be849db88fefc5ea46 (diff) | |
parent | 6138616f5dcb573056984597c99085faf5ae7fe7 (diff) | |
download | numpy-74712a53df240f1661fbced15ae984888fd9afa6.tar.gz |
Merge pull request #17129 from seiko2plus/distutils_avx512bw_mask
BLD: Check for reduce intrinsics and AVX512BW mask operations
-rw-r--r-- | numpy/distutils/ccompiler_opt.py | 92 | ||||
-rw-r--r-- | numpy/distutils/checks/extra_avx512bw_mask.c | 18 | ||||
-rw-r--r-- | numpy/distutils/checks/extra_avx512f_reduce.c | 41 | ||||
-rw-r--r-- | numpy/distutils/tests/test_ccompiler_opt_conf.py | 51 |
4 files changed, 170 insertions, 32 deletions
diff --git a/numpy/distutils/ccompiler_opt.py b/numpy/distutils/ccompiler_opt.py index 85dc2f1e8..9d6bfcbd4 100644 --- a/numpy/distutils/ccompiler_opt.py +++ b/numpy/distutils/ccompiler_opt.py @@ -152,6 +152,18 @@ class _Config: By default(None), treated as True if the feature contains at least one applicable flag. see `feature_can_autovec()` + "extra_checks": str or list, optional + Extra test case names for the CPU feature that need to be tested + against the compiler. + + Each test case must have a C file named ``extra_xxxx.c``, where + ``xxxx`` is the case name in lower case, under 'conf_check_path'. + It should contain at least one intrinsic or function related to the test case. + + If the compiler able to successfully compile the C file then `CCompilerOpt` + will add a C ``#define`` for it into the main dispatch header, e.g. + ```#define {conf_c_prefix}_XXXX`` where ``XXXX`` is the case name in upper case. + **NOTES**: * space can be used as separator with options that supports "str or list" * case-sensitive for all values and feature name must be in upper-case. @@ -230,7 +242,10 @@ class _Config: F16C = dict(interest=11, implies="AVX"), FMA3 = dict(interest=12, implies="F16C"), AVX2 = dict(interest=13, implies="F16C"), - AVX512F = dict(interest=20, implies="FMA3 AVX2", implies_detect=False), + AVX512F = dict( + interest=20, implies="FMA3 AVX2", implies_detect=False, + extra_checks="AVX512F_REDUCE" + ), AVX512CD = dict(interest=21, implies="AVX512F"), AVX512_KNL = dict( interest=40, implies="AVX512CD", group="AVX512ER AVX512PF", @@ -243,7 +258,8 @@ class _Config: ), AVX512_SKX = dict( interest=42, implies="AVX512CD", group="AVX512VL AVX512BW AVX512DQ", - detect="AVX512_SKX", implies_detect=False + detect="AVX512_SKX", implies_detect=False, + extra_checks="AVX512BW_MASK" ), AVX512_CLX = dict( interest=43, implies="AVX512_SKX", group="AVX512VNNI", @@ -673,7 +689,7 @@ class _Distutils: # intel and msvc compilers don't raise # fatal errors when flags are wrong or unsupported ".*(" - "warning D9002|" # msvc, it should be work with any language. + "warning D9002|" # msvc, it should be work with any language. "invalid argument for option" # intel ").*" ) @@ -1137,7 +1153,7 @@ class _Feature: continue # list is used internally for these options for option in ( - "implies", "group", "detect", "headers", "flags" + "implies", "group", "detect", "headers", "flags", "extra_checks" ) : oval = feature.get(option) if isinstance(oval, str): @@ -1439,7 +1455,7 @@ class _Feature: self.conf_check_path, "cpu_%s.c" % name.lower() ) if not os.path.exists(test_path): - self.dist_fatal("feature test file is not exist", path) + self.dist_fatal("feature test file is not exist", test_path) test = self.dist_test(test_path, force_flags + self.cc_flags["werror"]) if not test: @@ -1487,6 +1503,45 @@ class _Feature: can = valid_flags and any(valid_flags) return can + @_Cache.me + def feature_extra_checks(self, name): + """ + Return a list of supported extra checks after testing them against + the compiler. + + Parameters + ---------- + names: str + CPU feature name in uppercase. + """ + assert isinstance(name, str) + d = self.feature_supported[name] + extra_checks = d.get("extra_checks", []) + if not extra_checks: + return [] + + self.dist_log("Testing extra checks for feature '%s'" % name, extra_checks) + flags = self.feature_flags(name) + available = [] + not_available = [] + for chk in extra_checks: + test_path = os.path.join( + self.conf_check_path, "extra_%s.c" % chk.lower() + ) + if not os.path.exists(test_path): + self.dist_fatal("extra check file does not exist", test_path) + + is_supported = self.dist_test(test_path, flags + self.cc_flags["werror"]) + if is_supported: + available.append(chk) + else: + not_available.append(chk) + + if not_available: + self.dist_log("testing failed for checks", not_available, stderr=True) + return available + + def feature_c_preprocessor(self, feature_name, tabs=0): """ Generate C preprocessor definitions and include headers of a CPU feature. @@ -1520,14 +1575,18 @@ class _Feature: prepr += [ "#include <%s>" % h for h in feature.get("headers", []) ] - group = feature.get("group", []) - for f in group: - # Guard features in case of duplicate definitions + + extra_defs = feature.get("group", []) + extra_defs += self.feature_extra_checks(feature_name) + for edef in extra_defs: + # Guard extra definitions in case of duplicate with + # another feature prepr += [ - "#ifndef %sHAVE_%s" % (self.conf_c_prefix, f), - "\t#define %sHAVE_%s 1" % (self.conf_c_prefix, f), + "#ifndef %sHAVE_%s" % (self.conf_c_prefix, edef), + "\t#define %sHAVE_%s 1" % (self.conf_c_prefix, edef), "#endif", ] + if tabs > 0: prepr = [('\t'*tabs) + l for l in prepr] return '\n'.join(prepr) @@ -2269,6 +2328,12 @@ class CCompilerOpt(_Config, _Distutils, _Cache, _CCompiler, _Feature, _Parse): baseline_rows.append(( "Flags", (' '.join(baseline_flags) if baseline_flags else "none") )) + extra_checks = [] + for name in baseline_names: + extra_checks += self.feature_extra_checks(name) + baseline_rows.append(( + "Extra checks", (' '.join(extra_checks) if extra_checks else "none") + )) ########## dispatch ########## if self.cc_noopt: @@ -2307,14 +2372,21 @@ class CCompilerOpt(_Config, _Distutils, _Cache, _CCompiler, _Feature, _Parse): else: dispatch_rows.append(("Generated", '')) for tar in self.feature_sorted(target_sources): + tar_as_seq = [tar] if isinstance(tar, str) else tar sources = target_sources[tar] name = tar if isinstance(tar, str) else '(%s)' % ' '.join(tar) flags = ' '.join(self.feature_flags(tar)) implies = ' '.join(self.feature_sorted(self.feature_implies(tar))) detect = ' '.join(self.feature_detect(tar)) + extra_checks = [] + for name in tar_as_seq: + extra_checks += self.feature_extra_checks(name) + extra_checks = (' '.join(extra_checks) if extra_checks else "none") + dispatch_rows.append(('', '')) dispatch_rows.append((name, implies)) dispatch_rows.append(("Flags", flags)) + dispatch_rows.append(("Extra checks", extra_checks)) dispatch_rows.append(("Detect", detect)) for src in sources: dispatch_rows.append(("", src)) diff --git a/numpy/distutils/checks/extra_avx512bw_mask.c b/numpy/distutils/checks/extra_avx512bw_mask.c new file mode 100644 index 000000000..9cfd0c2a5 --- /dev/null +++ b/numpy/distutils/checks/extra_avx512bw_mask.c @@ -0,0 +1,18 @@ +#include <immintrin.h> +/** + * Test BW mask operations due to: + * - MSVC has supported it since vs2019 see, + * https://developercommunity.visualstudio.com/content/problem/518298/missing-avx512bw-mask-intrinsics.html + * - Clang >= v8.0 + * - GCC >= v7.1 + */ +int main(void) +{ + __mmask64 m64 = _mm512_cmpeq_epi8_mask(_mm512_set1_epi8((char)1), _mm512_set1_epi8((char)1)); + m64 = _kor_mask64(m64, m64); + m64 = _kxor_mask64(m64, m64); + m64 = _cvtu64_mask64(_cvtmask64_u64(m64)); + m64 = _mm512_kunpackd(m64, m64); + m64 = (__mmask64)_mm512_kunpackw((__mmask32)m64, (__mmask32)m64); + return (int)_cvtmask64_u64(m64); +} diff --git a/numpy/distutils/checks/extra_avx512f_reduce.c b/numpy/distutils/checks/extra_avx512f_reduce.c new file mode 100644 index 000000000..f979d504e --- /dev/null +++ b/numpy/distutils/checks/extra_avx512f_reduce.c @@ -0,0 +1,41 @@ +#include <immintrin.h> +/** + * The following intrinsics don't have direct native support but compilers + * tend to emulate them. + * They're usually supported by gcc >= 7.1, clang >= 4 and icc >= 19 + */ +int main(void) +{ + __m512 one_ps = _mm512_set1_ps(1.0f); + __m512d one_pd = _mm512_set1_pd(1.0); + __m512i one_i64 = _mm512_set1_epi64(1.0); + // add + float sum_ps = _mm512_reduce_add_ps(one_ps); + double sum_pd = _mm512_reduce_add_pd(one_pd); + int sum_int = (int)_mm512_reduce_add_epi64(one_i64); + sum_int += (int)_mm512_reduce_add_epi32(one_i64); + // mul + sum_ps += _mm512_reduce_mul_ps(one_ps); + sum_pd += _mm512_reduce_mul_pd(one_pd); + sum_int += (int)_mm512_reduce_mul_epi64(one_i64); + sum_int += (int)_mm512_reduce_mul_epi32(one_i64); + // min + sum_ps += _mm512_reduce_min_ps(one_ps); + sum_pd += _mm512_reduce_min_pd(one_pd); + sum_int += (int)_mm512_reduce_min_epi32(one_i64); + sum_int += (int)_mm512_reduce_min_epu32(one_i64); + sum_int += (int)_mm512_reduce_min_epi64(one_i64); + // max + sum_ps += _mm512_reduce_max_ps(one_ps); + sum_pd += _mm512_reduce_max_pd(one_pd); + sum_int += (int)_mm512_reduce_max_epi32(one_i64); + sum_int += (int)_mm512_reduce_max_epu32(one_i64); + sum_int += (int)_mm512_reduce_max_epi64(one_i64); + // and + sum_int += (int)_mm512_reduce_and_epi32(one_i64); + sum_int += (int)_mm512_reduce_and_epi64(one_i64); + // or + sum_int += (int)_mm512_reduce_or_epi32(one_i64); + sum_int += (int)_mm512_reduce_or_epi64(one_i64); + return (int)sum_ps + (int)sum_pd + sum_int; +} diff --git a/numpy/distutils/tests/test_ccompiler_opt_conf.py b/numpy/distutils/tests/test_ccompiler_opt_conf.py index 2f83a59e0..244748e58 100644 --- a/numpy/distutils/tests/test_ccompiler_opt_conf.py +++ b/numpy/distutils/tests/test_ccompiler_opt_conf.py @@ -66,11 +66,12 @@ class _TestConfFeatures(FakeCCompilerOpt): self.test_implies(error_msg, search_in, feature_name, feature_dict) self.test_group(error_msg, search_in, feature_name, feature_dict) + self.test_extra_checks(error_msg, search_in, feature_name, feature_dict) def test_option_types(self, error_msg, option, val): for tp, available in ( ((str, list), ( - "implies", "headers", "flags", "group", "detect" + "implies", "headers", "flags", "group", "detect", "extra_checks" )), ((str,), ("disable",)), ((int,), ("interest",)), @@ -83,29 +84,25 @@ class _TestConfFeatures(FakeCCompilerOpt): if not isinstance(val, tp): error_tp = [t.__name__ for t in (*tp,)] error_tp = ' or '.join(error_tp) - raise AssertionError(error_msg + \ + raise AssertionError(error_msg + "expected '%s' type for option '%s' not '%s'" % ( error_tp, option, type(val).__name__ )) break if not found_it: - raise AssertionError(error_msg + \ - "invalid option name '%s'" % option - ) + raise AssertionError(error_msg + "invalid option name '%s'" % option) def test_duplicates(self, error_msg, option, val): if option not in ( - "implies", "headers", "flags", "group", "detect" + "implies", "headers", "flags", "group", "detect", "extra_checks" ) : return if isinstance(val, str): val = val.split() if len(val) != len(set(val)): - raise AssertionError(error_msg + \ - "duplicated values in option '%s'" % option - ) + raise AssertionError(error_msg + "duplicated values in option '%s'" % option) def test_implies(self, error_msg, search_in, feature_name, feature_dict): if feature_dict.get("disabled") is not None: @@ -117,21 +114,15 @@ class _TestConfFeatures(FakeCCompilerOpt): implies = implies.split() if feature_name in implies: - raise AssertionError(error_msg + \ - "feature implies itself" - ) + raise AssertionError(error_msg + "feature implies itself") for impl in implies: impl_dict = search_in.get(impl) if impl_dict is not None: if "disable" in impl_dict: - raise AssertionError(error_msg + \ - "implies disabled feature '%s'" % impl - ) + raise AssertionError(error_msg + "implies disabled feature '%s'" % impl) continue - raise AssertionError(error_msg + \ - "implies non-exist feature '%s'" % impl - ) + raise AssertionError(error_msg + "implies non-exist feature '%s'" % impl) def test_group(self, error_msg, search_in, feature_name, feature_dict): if feature_dict.get("disabled") is not None: @@ -146,10 +137,26 @@ class _TestConfFeatures(FakeCCompilerOpt): impl_dict = search_in.get(f) if not impl_dict or "disable" in impl_dict: continue - raise AssertionError(error_msg + \ - "in option '%s', '%s' already exists as a feature name" % ( - option, f - )) + raise AssertionError(error_msg + + "in option 'group', '%s' already exists as a feature name" % f + ) + + def test_extra_checks(self, error_msg, search_in, feature_name, feature_dict): + if feature_dict.get("disabled") is not None: + return + extra_checks = feature_dict.get("extra_checks", "") + if not extra_checks: + return + if isinstance(extra_checks, str): + extra_checks = extra_checks.split() + + for f in extra_checks: + impl_dict = search_in.get(f) + if not impl_dict or "disable" in impl_dict: + continue + raise AssertionError(error_msg + + "in option 'extra_checks', extra test case '%s' already exists as a feature name" % f + ) class TestConfFeatures(unittest.TestCase): def __init__(self, methodName="runTest"): |