diff options
author | Charles Harris <charlesr.harris@gmail.com> | 2021-05-28 11:09:19 -0600 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-05-28 11:09:19 -0600 |
commit | 05a5b96b96d6295ebe1278d124ed2e6fd6892755 (patch) | |
tree | 6a7a8d415f51dfce6c5a59117fce8e11a8c14b24 | |
parent | 0725c47df4388d0e806a2e81d5298973bb3da545 (diff) | |
parent | 0ec2c91dfdc6d62c7e2ebd0071a0dc990e1a84ec (diff) | |
download | numpy-05a5b96b96d6295ebe1278d124ed2e6fd6892755.tar.gz |
Merge pull request #19098 from seiko2plus/issue_19084
BUG, SIMD: Fix detect host/native CPU features on ICC during compile-time
-rw-r--r-- | numpy/distutils/ccompiler_opt.py | 57 | ||||
-rw-r--r-- | numpy/distutils/checks/cpu_avx.c | 13 | ||||
-rw-r--r-- | numpy/distutils/checks/cpu_avx2.c | 13 | ||||
-rw-r--r-- | numpy/distutils/checks/cpu_avx512_clx.c | 13 | ||||
-rw-r--r-- | numpy/distutils/checks/cpu_avx512_cnl.c | 13 | ||||
-rw-r--r-- | numpy/distutils/checks/cpu_avx512_icl.c | 13 | ||||
-rw-r--r-- | numpy/distutils/checks/cpu_avx512_knl.c | 13 | ||||
-rw-r--r-- | numpy/distutils/checks/cpu_avx512_knm.c | 13 | ||||
-rw-r--r-- | numpy/distutils/checks/cpu_avx512_skx.c | 13 | ||||
-rw-r--r-- | numpy/distutils/checks/cpu_avx512cd.c | 13 | ||||
-rw-r--r-- | numpy/distutils/checks/cpu_avx512f.c | 13 | ||||
-rw-r--r-- | numpy/distutils/checks/cpu_f16c.c | 13 | ||||
-rw-r--r-- | numpy/distutils/checks/cpu_fma3.c | 13 | ||||
-rw-r--r-- | numpy/distutils/checks/cpu_popcnt.c | 15 | ||||
-rw-r--r-- | numpy/distutils/checks/cpu_sse.c | 13 | ||||
-rw-r--r-- | numpy/distutils/checks/cpu_sse2.c | 13 | ||||
-rw-r--r-- | numpy/distutils/checks/cpu_sse3.c | 13 | ||||
-rw-r--r-- | numpy/distutils/checks/cpu_sse41.c | 13 | ||||
-rw-r--r-- | numpy/distutils/checks/cpu_sse42.c | 13 | ||||
-rw-r--r-- | numpy/distutils/checks/cpu_ssse3.c | 13 |
20 files changed, 284 insertions, 22 deletions
diff --git a/numpy/distutils/ccompiler_opt.py b/numpy/distutils/ccompiler_opt.py index ae1e6a180..47d07ad4a 100644 --- a/numpy/distutils/ccompiler_opt.py +++ b/numpy/distutils/ccompiler_opt.py @@ -552,7 +552,7 @@ class _Distutils: ccompiler = self._ccompiler return ccompiler.compile(sources, extra_postargs=flags, **kwargs) - def dist_test(self, source, flags): + def dist_test(self, source, flags, macros=[]): """Return True if 'CCompiler.compile()' able to compile a source file with certain flags. """ @@ -569,7 +569,7 @@ class _Distutils: test = False try: self.dist_compile( - [source], flags, output_dir=self.conf_tmp_path + [source], flags, macros=macros, output_dir=self.conf_tmp_path ) test = True except CompileError as e: @@ -1172,20 +1172,23 @@ class _Feature: self.feature_is_cached = True - def feature_names(self, names=None, force_flags=None): + def feature_names(self, names=None, force_flags=None, macros=[]): """ Returns a set of CPU feature names that supported by platform and the **C** compiler. Parameters ---------- - 'names': sequence or None, optional + names: sequence or None, optional Specify certain CPU features to test it against the **C** compiler. if None(default), it will test all current supported features. **Note**: feature names must be in upper-case. - 'force_flags': list or None, optional - If None(default), default compiler flags for every CPU feature will be used - during the test. + force_flags: list or None, optional + If None(default), default compiler flags for every CPU feature will + be used during the test. + + macros : list of tuples, optional + A list of C macro definitions. """ assert( names is None or ( @@ -1198,7 +1201,9 @@ class _Feature: names = self.feature_supported.keys() supported_names = set() for f in names: - if self.feature_is_supported(f, force_flags=force_flags): + if self.feature_is_supported( + f, force_flags=force_flags, macros=macros + ): supported_names.add(f) return supported_names @@ -1433,20 +1438,23 @@ class _Feature: return self.cc_normalize_flags(flags) @_Cache.me - def feature_test(self, name, force_flags=None): + def feature_test(self, name, force_flags=None, macros=[]): """ Test a certain CPU feature against the compiler through its own check file. Parameters ---------- - 'name': str + name: str Supported CPU feature name. - 'force_flags': list or None, optional + force_flags: list or None, optional If None(default), the returned flags from `feature_flags()` will be used. - """ + + macros : list of tuples, optional + A list of C macro definitions. + """ if force_flags is None: force_flags = self.feature_flags(name) @@ -1462,24 +1470,29 @@ class _Feature: if not os.path.exists(test_path): self.dist_fatal("feature test file is not exist", test_path) - test = self.dist_test(test_path, force_flags + self.cc_flags["werror"]) + test = self.dist_test( + test_path, force_flags + self.cc_flags["werror"], macros=macros + ) if not test: self.dist_log("testing failed", stderr=True) return test @_Cache.me - def feature_is_supported(self, name, force_flags=None): + def feature_is_supported(self, name, force_flags=None, macros=[]): """ Check if a certain CPU feature is supported by the platform and compiler. Parameters ---------- - 'name': str + name: str CPU feature name in uppercase. - 'force_flags': list or None, optional - If None(default), default compiler flags for every CPU feature will be used - during test. + force_flags: list or None, optional + If None(default), default compiler flags for every CPU feature will + be used during test. + + macros : list of tuples, optional + A list of C macro definitions. """ assert(name.isupper()) assert(force_flags is None or isinstance(force_flags, list)) @@ -1487,9 +1500,9 @@ class _Feature: supported = name in self.feature_supported if supported: for impl in self.feature_implies(name): - if not self.feature_test(impl, force_flags): + if not self.feature_test(impl, force_flags, macros=macros): return False - if not self.feature_test(name, force_flags): + if not self.feature_test(name, force_flags, macros=macros): return False return supported @@ -1812,7 +1825,9 @@ class _Parse: self.dist_fatal(arg_name, "native option isn't supported by the compiler" ) - features_to = self.feature_names(force_flags=native) + features_to = self.feature_names( + force_flags=native, macros=[("DETECT_FEATURES", 1)] + ) elif TOK == "MAX": features_to = self.feature_supported.keys() elif TOK == "MIN": diff --git a/numpy/distutils/checks/cpu_avx.c b/numpy/distutils/checks/cpu_avx.c index cee4f36ab..26ae18466 100644 --- a/numpy/distutils/checks/cpu_avx.c +++ b/numpy/distutils/checks/cpu_avx.c @@ -1,3 +1,16 @@ +#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER) + /* + * Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics, + * whether or not the build options for those features are specified. + * Therefore, we must test #definitions of CPU features when option native/host + * is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise + * the test will be broken and leads to enable all possible features. + */ + #ifndef __AVX__ + #error "HOST/ARCH doesn't support AVX" + #endif +#endif + #include <immintrin.h> int main(int argc, char **argv) diff --git a/numpy/distutils/checks/cpu_avx2.c b/numpy/distutils/checks/cpu_avx2.c index 15b6c919b..ddde868f1 100644 --- a/numpy/distutils/checks/cpu_avx2.c +++ b/numpy/distutils/checks/cpu_avx2.c @@ -1,3 +1,16 @@ +#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER) + /* + * Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics, + * whether or not the build options for those features are specified. + * Therefore, we must test #definitions of CPU features when option native/host + * is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise + * the test will be broken and leads to enable all possible features. + */ + #ifndef __AVX2__ + #error "HOST/ARCH doesn't support AVX2" + #endif +#endif + #include <immintrin.h> int main(int argc, char **argv) diff --git a/numpy/distutils/checks/cpu_avx512_clx.c b/numpy/distutils/checks/cpu_avx512_clx.c index 4baa8fea0..81edcd067 100644 --- a/numpy/distutils/checks/cpu_avx512_clx.c +++ b/numpy/distutils/checks/cpu_avx512_clx.c @@ -1,3 +1,16 @@ +#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER) + /* + * Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics, + * whether or not the build options for those features are specified. + * Therefore, we must test #definitions of CPU features when option native/host + * is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise + * the test will be broken and leads to enable all possible features. + */ + #ifndef __AVX512VNNI__ + #error "HOST/ARCH doesn't support CascadeLake AVX512 features" + #endif +#endif + #include <immintrin.h> int main(int argc, char **argv) diff --git a/numpy/distutils/checks/cpu_avx512_cnl.c b/numpy/distutils/checks/cpu_avx512_cnl.c index f2ff3725e..5799f122b 100644 --- a/numpy/distutils/checks/cpu_avx512_cnl.c +++ b/numpy/distutils/checks/cpu_avx512_cnl.c @@ -1,3 +1,16 @@ +#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER) + /* + * Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics, + * whether or not the build options for those features are specified. + * Therefore, we must test #definitions of CPU features when option native/host + * is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise + * the test will be broken and leads to enable all possible features. + */ + #if !defined(__AVX512VBMI__) || !defined(__AVX512IFMA__) + #error "HOST/ARCH doesn't support CannonLake AVX512 features" + #endif +#endif + #include <immintrin.h> int main(int argc, char **argv) diff --git a/numpy/distutils/checks/cpu_avx512_icl.c b/numpy/distutils/checks/cpu_avx512_icl.c index 085b947e0..3cf44d731 100644 --- a/numpy/distutils/checks/cpu_avx512_icl.c +++ b/numpy/distutils/checks/cpu_avx512_icl.c @@ -1,3 +1,16 @@ +#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER) + /* + * Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics, + * whether or not the build options for those features are specified. + * Therefore, we must test #definitions of CPU features when option native/host + * is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise + * the test will be broken and leads to enable all possible features. + */ + #if !defined(__AVX512VPOPCNTDQ__) || !defined(__AVX512BITALG__) || !defined(__AVX512VPOPCNTDQ__) + #error "HOST/ARCH doesn't support IceLake AVX512 features" + #endif +#endif + #include <immintrin.h> int main(int argc, char **argv) diff --git a/numpy/distutils/checks/cpu_avx512_knl.c b/numpy/distutils/checks/cpu_avx512_knl.c index 10ba52bcc..b3f4f6976 100644 --- a/numpy/distutils/checks/cpu_avx512_knl.c +++ b/numpy/distutils/checks/cpu_avx512_knl.c @@ -1,3 +1,16 @@ +#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER) + /* + * Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics, + * whether or not the build options for those features are specified. + * Therefore, we must test #definitions of CPU features when option native/host + * is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise + * the test will be broken and leads to enable all possible features. + */ + #if !defined(__AVX512ER__) || !defined(__AVX512PF__) + #error "HOST/ARCH doesn't support Knights Landing AVX512 features" + #endif +#endif + #include <immintrin.h> int main(int argc, char **argv) diff --git a/numpy/distutils/checks/cpu_avx512_knm.c b/numpy/distutils/checks/cpu_avx512_knm.c index d03b0fe8b..2c426462b 100644 --- a/numpy/distutils/checks/cpu_avx512_knm.c +++ b/numpy/distutils/checks/cpu_avx512_knm.c @@ -1,3 +1,16 @@ +#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER) + /* + * Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics, + * whether or not the build options for those features are specified. + * Therefore, we must test #definitions of CPU features when option native/host + * is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise + * the test will be broken and leads to enable all possible features. + */ + #if !defined(__AVX5124FMAPS__) || !defined(__AVX5124VNNIW__) || !defined(__AVX512VPOPCNTDQ__) + #error "HOST/ARCH doesn't support Knights Mill AVX512 features" + #endif +#endif + #include <immintrin.h> int main(int argc, char **argv) diff --git a/numpy/distutils/checks/cpu_avx512_skx.c b/numpy/distutils/checks/cpu_avx512_skx.c index 047618762..8840efb7e 100644 --- a/numpy/distutils/checks/cpu_avx512_skx.c +++ b/numpy/distutils/checks/cpu_avx512_skx.c @@ -1,3 +1,16 @@ +#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER) + /* + * Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics, + * whether or not the build options for those features are specified. + * Therefore, we must test #definitions of CPU features when option native/host + * is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise + * the test will be broken and leads to enable all possible features. + */ + #if !defined(__AVX512VL__) || !defined(__AVX512BW__) || !defined(__AVX512DQ__) + #error "HOST/ARCH doesn't support SkyLake AVX512 features" + #endif +#endif + #include <immintrin.h> int main(int argc, char **argv) diff --git a/numpy/distutils/checks/cpu_avx512cd.c b/numpy/distutils/checks/cpu_avx512cd.c index 52f4c7f8b..5e29c79e3 100644 --- a/numpy/distutils/checks/cpu_avx512cd.c +++ b/numpy/distutils/checks/cpu_avx512cd.c @@ -1,3 +1,16 @@ +#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER) + /* + * Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics, + * whether or not the build options for those features are specified. + * Therefore, we must test #definitions of CPU features when option native/host + * is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise + * the test will be broken and leads to enable all possible features. + */ + #ifndef __AVX512CD__ + #error "HOST/ARCH doesn't support AVX512CD" + #endif +#endif + #include <immintrin.h> int main(int argc, char **argv) diff --git a/numpy/distutils/checks/cpu_avx512f.c b/numpy/distutils/checks/cpu_avx512f.c index 22d861471..d0eb7b1ad 100644 --- a/numpy/distutils/checks/cpu_avx512f.c +++ b/numpy/distutils/checks/cpu_avx512f.c @@ -1,3 +1,16 @@ +#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER) + /* + * Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics, + * whether or not the build options for those features are specified. + * Therefore, we must test #definitions of CPU features when option native/host + * is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise + * the test will be broken and leads to enable all possible features. + */ + #ifndef __AVX512F__ + #error "HOST/ARCH doesn't support AVX512F" + #endif +#endif + #include <immintrin.h> int main(int argc, char **argv) diff --git a/numpy/distutils/checks/cpu_f16c.c b/numpy/distutils/checks/cpu_f16c.c index 678c582e4..fdf36cec5 100644 --- a/numpy/distutils/checks/cpu_f16c.c +++ b/numpy/distutils/checks/cpu_f16c.c @@ -1,3 +1,16 @@ +#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER) + /* + * Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics, + * whether or not the build options for those features are specified. + * Therefore, we must test #definitions of CPU features when option native/host + * is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise + * the test will be broken and leads to enable all possible features. + */ + #ifndef __F16C__ + #error "HOST/ARCH doesn't support F16C" + #endif +#endif + #include <emmintrin.h> #include <immintrin.h> diff --git a/numpy/distutils/checks/cpu_fma3.c b/numpy/distutils/checks/cpu_fma3.c index 2f879c3b3..bfeef22b5 100644 --- a/numpy/distutils/checks/cpu_fma3.c +++ b/numpy/distutils/checks/cpu_fma3.c @@ -1,3 +1,16 @@ +#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER) + /* + * Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics, + * whether or not the build options for those features are specified. + * Therefore, we must test #definitions of CPU features when option native/host + * is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise + * the test will be broken and leads to enable all possible features. + */ + #if !defined(__FMA__) && !defined(__AVX2__) + #error "HOST/ARCH doesn't support FMA3" + #endif +#endif + #include <xmmintrin.h> #include <immintrin.h> diff --git a/numpy/distutils/checks/cpu_popcnt.c b/numpy/distutils/checks/cpu_popcnt.c index 540c98dab..813c461f0 100644 --- a/numpy/distutils/checks/cpu_popcnt.c +++ b/numpy/distutils/checks/cpu_popcnt.c @@ -1,3 +1,16 @@ +#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER) + /* + * Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics, + * whether or not the build options for those features are specified. + * Therefore, we must test #definitions of CPU features when option native/host + * is enabled via `--cpu-baseline` or through env vr `CFLAGS` otherwise + * the test will be broken and leads to enable all possible features. + */ + #if !defined(__SSE4_2__) && !defined(__POPCNT__) + #error "HOST/ARCH doesn't support POPCNT" + #endif +#endif + #ifdef _MSC_VER #include <nmmintrin.h> #else @@ -11,7 +24,7 @@ int main(int argc, char **argv) unsigned long long a = *((unsigned long long*)argv[argc-1]); unsigned int b = *((unsigned int*)argv[argc-2]); -#if defined(_M_X64) || defined(__x86_64__) +#if defined(_M_X64) || defined(__x86_64__) a = _mm_popcnt_u64(a); #endif b = _mm_popcnt_u32(b); diff --git a/numpy/distutils/checks/cpu_sse.c b/numpy/distutils/checks/cpu_sse.c index bb98bf63c..602b74e7b 100644 --- a/numpy/distutils/checks/cpu_sse.c +++ b/numpy/distutils/checks/cpu_sse.c @@ -1,3 +1,16 @@ +#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER) + /* + * Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics, + * whether or not the build options for those features are specified. + * Therefore, we must test #definitions of CPU features when option native/host + * is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise + * the test will be broken and leads to enable all possible features. + */ + #ifndef __SSE__ + #error "HOST/ARCH doesn't support SSE" + #endif +#endif + #include <xmmintrin.h> int main(void) diff --git a/numpy/distutils/checks/cpu_sse2.c b/numpy/distutils/checks/cpu_sse2.c index 658afc9b4..33826a9ed 100644 --- a/numpy/distutils/checks/cpu_sse2.c +++ b/numpy/distutils/checks/cpu_sse2.c @@ -1,3 +1,16 @@ +#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER) + /* + * Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics, + * whether or not the build options for those features are specified. + * Therefore, we must test #definitions of CPU features when option native/host + * is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise + * the test will be broken and leads to enable all possible features. + */ + #ifndef __SSE2__ + #error "HOST/ARCH doesn't support SSE2" + #endif +#endif + #include <emmintrin.h> int main(void) diff --git a/numpy/distutils/checks/cpu_sse3.c b/numpy/distutils/checks/cpu_sse3.c index aece1e601..d47c20f74 100644 --- a/numpy/distutils/checks/cpu_sse3.c +++ b/numpy/distutils/checks/cpu_sse3.c @@ -1,3 +1,16 @@ +#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER) + /* + * Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics, + * whether or not the build options for those features are specified. + * Therefore, we must test #definitions of CPU features when option native/host + * is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise + * the test will be broken and leads to enable all possible features. + */ + #ifndef __SSE3__ + #error "HOST/ARCH doesn't support SSE3" + #endif +#endif + #include <pmmintrin.h> int main(void) diff --git a/numpy/distutils/checks/cpu_sse41.c b/numpy/distutils/checks/cpu_sse41.c index bfdb9feac..7c80238a3 100644 --- a/numpy/distutils/checks/cpu_sse41.c +++ b/numpy/distutils/checks/cpu_sse41.c @@ -1,3 +1,16 @@ +#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER) + /* + * Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics, + * whether or not the build options for those features are specified. + * Therefore, we must test #definitions of CPU features when option native/host + * is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise + * the test will be broken and leads to enable all possible features. + */ + #ifndef __SSE4_1__ + #error "HOST/ARCH doesn't support SSE41" + #endif +#endif + #include <smmintrin.h> int main(void) diff --git a/numpy/distutils/checks/cpu_sse42.c b/numpy/distutils/checks/cpu_sse42.c index 24f5d93fe..f60e18f3c 100644 --- a/numpy/distutils/checks/cpu_sse42.c +++ b/numpy/distutils/checks/cpu_sse42.c @@ -1,3 +1,16 @@ +#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER) + /* + * Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics, + * whether or not the build options for those features are specified. + * Therefore, we must test #definitions of CPU features when option native/host + * is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise + * the test will be broken and leads to enable all possible features. + */ + #ifndef __SSE4_2__ + #error "HOST/ARCH doesn't support SSE42" + #endif +#endif + #include <smmintrin.h> int main(void) diff --git a/numpy/distutils/checks/cpu_ssse3.c b/numpy/distutils/checks/cpu_ssse3.c index ad0abc1e6..fde390d6a 100644 --- a/numpy/distutils/checks/cpu_ssse3.c +++ b/numpy/distutils/checks/cpu_ssse3.c @@ -1,3 +1,16 @@ +#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER) + /* + * Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics, + * whether or not the build options for those features are specified. + * Therefore, we must test #definitions of CPU features when option native/host + * is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise + * the test will be broken and leads to enable all possible features. + */ + #ifndef __SSSE3__ + #error "HOST/ARCH doesn't support SSSE3" + #endif +#endif + #include <tmmintrin.h> int main(void) |