summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatti Picus <matti.picus@gmail.com>2020-09-10 12:23:39 +0300
committerGitHub <noreply@github.com>2020-09-10 12:23:39 +0300
commit74712a53df240f1661fbced15ae984888fd9afa6 (patch)
tree2952220bfde815a673f99382a2cac849a51e781c
parent3a9588ae8041d611cb1df4be849db88fefc5ea46 (diff)
parent6138616f5dcb573056984597c99085faf5ae7fe7 (diff)
downloadnumpy-74712a53df240f1661fbced15ae984888fd9afa6.tar.gz
Merge pull request #17129 from seiko2plus/distutils_avx512bw_mask
BLD: Check for reduce intrinsics and AVX512BW mask operations
-rw-r--r--numpy/distutils/ccompiler_opt.py92
-rw-r--r--numpy/distutils/checks/extra_avx512bw_mask.c18
-rw-r--r--numpy/distutils/checks/extra_avx512f_reduce.c41
-rw-r--r--numpy/distutils/tests/test_ccompiler_opt_conf.py51
4 files changed, 170 insertions, 32 deletions
diff --git a/numpy/distutils/ccompiler_opt.py b/numpy/distutils/ccompiler_opt.py
index 85dc2f1e8..9d6bfcbd4 100644
--- a/numpy/distutils/ccompiler_opt.py
+++ b/numpy/distutils/ccompiler_opt.py
@@ -152,6 +152,18 @@ class _Config:
By default(None), treated as True if the feature contains at
least one applicable flag. see `feature_can_autovec()`
+ "extra_checks": str or list, optional
+ Extra test case names for the CPU feature that need to be tested
+ against the compiler.
+
+ Each test case must have a C file named ``extra_xxxx.c``, where
+ ``xxxx`` is the case name in lower case, under 'conf_check_path'.
+ It should contain at least one intrinsic or function related to the test case.
+
+ If the compiler able to successfully compile the C file then `CCompilerOpt`
+ will add a C ``#define`` for it into the main dispatch header, e.g.
+ ```#define {conf_c_prefix}_XXXX`` where ``XXXX`` is the case name in upper case.
+
**NOTES**:
* space can be used as separator with options that supports "str or list"
* case-sensitive for all values and feature name must be in upper-case.
@@ -230,7 +242,10 @@ class _Config:
F16C = dict(interest=11, implies="AVX"),
FMA3 = dict(interest=12, implies="F16C"),
AVX2 = dict(interest=13, implies="F16C"),
- AVX512F = dict(interest=20, implies="FMA3 AVX2", implies_detect=False),
+ AVX512F = dict(
+ interest=20, implies="FMA3 AVX2", implies_detect=False,
+ extra_checks="AVX512F_REDUCE"
+ ),
AVX512CD = dict(interest=21, implies="AVX512F"),
AVX512_KNL = dict(
interest=40, implies="AVX512CD", group="AVX512ER AVX512PF",
@@ -243,7 +258,8 @@ class _Config:
),
AVX512_SKX = dict(
interest=42, implies="AVX512CD", group="AVX512VL AVX512BW AVX512DQ",
- detect="AVX512_SKX", implies_detect=False
+ detect="AVX512_SKX", implies_detect=False,
+ extra_checks="AVX512BW_MASK"
),
AVX512_CLX = dict(
interest=43, implies="AVX512_SKX", group="AVX512VNNI",
@@ -673,7 +689,7 @@ class _Distutils:
# intel and msvc compilers don't raise
# fatal errors when flags are wrong or unsupported
".*("
- "warning D9002|" # msvc, it should be work with any language.
+ "warning D9002|" # msvc, it should be work with any language.
"invalid argument for option" # intel
").*"
)
@@ -1137,7 +1153,7 @@ class _Feature:
continue
# list is used internally for these options
for option in (
- "implies", "group", "detect", "headers", "flags"
+ "implies", "group", "detect", "headers", "flags", "extra_checks"
) :
oval = feature.get(option)
if isinstance(oval, str):
@@ -1439,7 +1455,7 @@ class _Feature:
self.conf_check_path, "cpu_%s.c" % name.lower()
)
if not os.path.exists(test_path):
- self.dist_fatal("feature test file is not exist", path)
+ self.dist_fatal("feature test file is not exist", test_path)
test = self.dist_test(test_path, force_flags + self.cc_flags["werror"])
if not test:
@@ -1487,6 +1503,45 @@ class _Feature:
can = valid_flags and any(valid_flags)
return can
+ @_Cache.me
+ def feature_extra_checks(self, name):
+ """
+ Return a list of supported extra checks after testing them against
+ the compiler.
+
+ Parameters
+ ----------
+ names: str
+ CPU feature name in uppercase.
+ """
+ assert isinstance(name, str)
+ d = self.feature_supported[name]
+ extra_checks = d.get("extra_checks", [])
+ if not extra_checks:
+ return []
+
+ self.dist_log("Testing extra checks for feature '%s'" % name, extra_checks)
+ flags = self.feature_flags(name)
+ available = []
+ not_available = []
+ for chk in extra_checks:
+ test_path = os.path.join(
+ self.conf_check_path, "extra_%s.c" % chk.lower()
+ )
+ if not os.path.exists(test_path):
+ self.dist_fatal("extra check file does not exist", test_path)
+
+ is_supported = self.dist_test(test_path, flags + self.cc_flags["werror"])
+ if is_supported:
+ available.append(chk)
+ else:
+ not_available.append(chk)
+
+ if not_available:
+ self.dist_log("testing failed for checks", not_available, stderr=True)
+ return available
+
+
def feature_c_preprocessor(self, feature_name, tabs=0):
"""
Generate C preprocessor definitions and include headers of a CPU feature.
@@ -1520,14 +1575,18 @@ class _Feature:
prepr += [
"#include <%s>" % h for h in feature.get("headers", [])
]
- group = feature.get("group", [])
- for f in group:
- # Guard features in case of duplicate definitions
+
+ extra_defs = feature.get("group", [])
+ extra_defs += self.feature_extra_checks(feature_name)
+ for edef in extra_defs:
+ # Guard extra definitions in case of duplicate with
+ # another feature
prepr += [
- "#ifndef %sHAVE_%s" % (self.conf_c_prefix, f),
- "\t#define %sHAVE_%s 1" % (self.conf_c_prefix, f),
+ "#ifndef %sHAVE_%s" % (self.conf_c_prefix, edef),
+ "\t#define %sHAVE_%s 1" % (self.conf_c_prefix, edef),
"#endif",
]
+
if tabs > 0:
prepr = [('\t'*tabs) + l for l in prepr]
return '\n'.join(prepr)
@@ -2269,6 +2328,12 @@ class CCompilerOpt(_Config, _Distutils, _Cache, _CCompiler, _Feature, _Parse):
baseline_rows.append((
"Flags", (' '.join(baseline_flags) if baseline_flags else "none")
))
+ extra_checks = []
+ for name in baseline_names:
+ extra_checks += self.feature_extra_checks(name)
+ baseline_rows.append((
+ "Extra checks", (' '.join(extra_checks) if extra_checks else "none")
+ ))
########## dispatch ##########
if self.cc_noopt:
@@ -2307,14 +2372,21 @@ class CCompilerOpt(_Config, _Distutils, _Cache, _CCompiler, _Feature, _Parse):
else:
dispatch_rows.append(("Generated", ''))
for tar in self.feature_sorted(target_sources):
+ tar_as_seq = [tar] if isinstance(tar, str) else tar
sources = target_sources[tar]
name = tar if isinstance(tar, str) else '(%s)' % ' '.join(tar)
flags = ' '.join(self.feature_flags(tar))
implies = ' '.join(self.feature_sorted(self.feature_implies(tar)))
detect = ' '.join(self.feature_detect(tar))
+ extra_checks = []
+ for name in tar_as_seq:
+ extra_checks += self.feature_extra_checks(name)
+ extra_checks = (' '.join(extra_checks) if extra_checks else "none")
+
dispatch_rows.append(('', ''))
dispatch_rows.append((name, implies))
dispatch_rows.append(("Flags", flags))
+ dispatch_rows.append(("Extra checks", extra_checks))
dispatch_rows.append(("Detect", detect))
for src in sources:
dispatch_rows.append(("", src))
diff --git a/numpy/distutils/checks/extra_avx512bw_mask.c b/numpy/distutils/checks/extra_avx512bw_mask.c
new file mode 100644
index 000000000..9cfd0c2a5
--- /dev/null
+++ b/numpy/distutils/checks/extra_avx512bw_mask.c
@@ -0,0 +1,18 @@
+#include <immintrin.h>
+/**
+ * Test BW mask operations due to:
+ * - MSVC has supported it since vs2019 see,
+ * https://developercommunity.visualstudio.com/content/problem/518298/missing-avx512bw-mask-intrinsics.html
+ * - Clang >= v8.0
+ * - GCC >= v7.1
+ */
+int main(void)
+{
+ __mmask64 m64 = _mm512_cmpeq_epi8_mask(_mm512_set1_epi8((char)1), _mm512_set1_epi8((char)1));
+ m64 = _kor_mask64(m64, m64);
+ m64 = _kxor_mask64(m64, m64);
+ m64 = _cvtu64_mask64(_cvtmask64_u64(m64));
+ m64 = _mm512_kunpackd(m64, m64);
+ m64 = (__mmask64)_mm512_kunpackw((__mmask32)m64, (__mmask32)m64);
+ return (int)_cvtmask64_u64(m64);
+}
diff --git a/numpy/distutils/checks/extra_avx512f_reduce.c b/numpy/distutils/checks/extra_avx512f_reduce.c
new file mode 100644
index 000000000..f979d504e
--- /dev/null
+++ b/numpy/distutils/checks/extra_avx512f_reduce.c
@@ -0,0 +1,41 @@
+#include <immintrin.h>
+/**
+ * The following intrinsics don't have direct native support but compilers
+ * tend to emulate them.
+ * They're usually supported by gcc >= 7.1, clang >= 4 and icc >= 19
+ */
+int main(void)
+{
+ __m512 one_ps = _mm512_set1_ps(1.0f);
+ __m512d one_pd = _mm512_set1_pd(1.0);
+ __m512i one_i64 = _mm512_set1_epi64(1.0);
+ // add
+ float sum_ps = _mm512_reduce_add_ps(one_ps);
+ double sum_pd = _mm512_reduce_add_pd(one_pd);
+ int sum_int = (int)_mm512_reduce_add_epi64(one_i64);
+ sum_int += (int)_mm512_reduce_add_epi32(one_i64);
+ // mul
+ sum_ps += _mm512_reduce_mul_ps(one_ps);
+ sum_pd += _mm512_reduce_mul_pd(one_pd);
+ sum_int += (int)_mm512_reduce_mul_epi64(one_i64);
+ sum_int += (int)_mm512_reduce_mul_epi32(one_i64);
+ // min
+ sum_ps += _mm512_reduce_min_ps(one_ps);
+ sum_pd += _mm512_reduce_min_pd(one_pd);
+ sum_int += (int)_mm512_reduce_min_epi32(one_i64);
+ sum_int += (int)_mm512_reduce_min_epu32(one_i64);
+ sum_int += (int)_mm512_reduce_min_epi64(one_i64);
+ // max
+ sum_ps += _mm512_reduce_max_ps(one_ps);
+ sum_pd += _mm512_reduce_max_pd(one_pd);
+ sum_int += (int)_mm512_reduce_max_epi32(one_i64);
+ sum_int += (int)_mm512_reduce_max_epu32(one_i64);
+ sum_int += (int)_mm512_reduce_max_epi64(one_i64);
+ // and
+ sum_int += (int)_mm512_reduce_and_epi32(one_i64);
+ sum_int += (int)_mm512_reduce_and_epi64(one_i64);
+ // or
+ sum_int += (int)_mm512_reduce_or_epi32(one_i64);
+ sum_int += (int)_mm512_reduce_or_epi64(one_i64);
+ return (int)sum_ps + (int)sum_pd + sum_int;
+}
diff --git a/numpy/distutils/tests/test_ccompiler_opt_conf.py b/numpy/distutils/tests/test_ccompiler_opt_conf.py
index 2f83a59e0..244748e58 100644
--- a/numpy/distutils/tests/test_ccompiler_opt_conf.py
+++ b/numpy/distutils/tests/test_ccompiler_opt_conf.py
@@ -66,11 +66,12 @@ class _TestConfFeatures(FakeCCompilerOpt):
self.test_implies(error_msg, search_in, feature_name, feature_dict)
self.test_group(error_msg, search_in, feature_name, feature_dict)
+ self.test_extra_checks(error_msg, search_in, feature_name, feature_dict)
def test_option_types(self, error_msg, option, val):
for tp, available in (
((str, list), (
- "implies", "headers", "flags", "group", "detect"
+ "implies", "headers", "flags", "group", "detect", "extra_checks"
)),
((str,), ("disable",)),
((int,), ("interest",)),
@@ -83,29 +84,25 @@ class _TestConfFeatures(FakeCCompilerOpt):
if not isinstance(val, tp):
error_tp = [t.__name__ for t in (*tp,)]
error_tp = ' or '.join(error_tp)
- raise AssertionError(error_msg + \
+ raise AssertionError(error_msg +
"expected '%s' type for option '%s' not '%s'" % (
error_tp, option, type(val).__name__
))
break
if not found_it:
- raise AssertionError(error_msg + \
- "invalid option name '%s'" % option
- )
+ raise AssertionError(error_msg + "invalid option name '%s'" % option)
def test_duplicates(self, error_msg, option, val):
if option not in (
- "implies", "headers", "flags", "group", "detect"
+ "implies", "headers", "flags", "group", "detect", "extra_checks"
) : return
if isinstance(val, str):
val = val.split()
if len(val) != len(set(val)):
- raise AssertionError(error_msg + \
- "duplicated values in option '%s'" % option
- )
+ raise AssertionError(error_msg + "duplicated values in option '%s'" % option)
def test_implies(self, error_msg, search_in, feature_name, feature_dict):
if feature_dict.get("disabled") is not None:
@@ -117,21 +114,15 @@ class _TestConfFeatures(FakeCCompilerOpt):
implies = implies.split()
if feature_name in implies:
- raise AssertionError(error_msg + \
- "feature implies itself"
- )
+ raise AssertionError(error_msg + "feature implies itself")
for impl in implies:
impl_dict = search_in.get(impl)
if impl_dict is not None:
if "disable" in impl_dict:
- raise AssertionError(error_msg + \
- "implies disabled feature '%s'" % impl
- )
+ raise AssertionError(error_msg + "implies disabled feature '%s'" % impl)
continue
- raise AssertionError(error_msg + \
- "implies non-exist feature '%s'" % impl
- )
+ raise AssertionError(error_msg + "implies non-exist feature '%s'" % impl)
def test_group(self, error_msg, search_in, feature_name, feature_dict):
if feature_dict.get("disabled") is not None:
@@ -146,10 +137,26 @@ class _TestConfFeatures(FakeCCompilerOpt):
impl_dict = search_in.get(f)
if not impl_dict or "disable" in impl_dict:
continue
- raise AssertionError(error_msg + \
- "in option '%s', '%s' already exists as a feature name" % (
- option, f
- ))
+ raise AssertionError(error_msg +
+ "in option 'group', '%s' already exists as a feature name" % f
+ )
+
+ def test_extra_checks(self, error_msg, search_in, feature_name, feature_dict):
+ if feature_dict.get("disabled") is not None:
+ return
+ extra_checks = feature_dict.get("extra_checks", "")
+ if not extra_checks:
+ return
+ if isinstance(extra_checks, str):
+ extra_checks = extra_checks.split()
+
+ for f in extra_checks:
+ impl_dict = search_in.get(f)
+ if not impl_dict or "disable" in impl_dict:
+ continue
+ raise AssertionError(error_msg +
+ "in option 'extra_checks', extra test case '%s' already exists as a feature name" % f
+ )
class TestConfFeatures(unittest.TestCase):
def __init__(self, methodName="runTest"):