diff options
Diffstat (limited to 'doc/source/reference')
6 files changed, 320 insertions, 330 deletions
diff --git a/doc/source/reference/simd/gen_features.py b/doc/source/reference/simd/gen_features.py new file mode 100644 index 000000000..d74d54016 --- /dev/null +++ b/doc/source/reference/simd/gen_features.py @@ -0,0 +1,194 @@ +""" +Generate CPU features tables from CCompilerOpt +""" +from os import sys, path +from numpy.distutils.ccompiler_opt import CCompilerOpt + +class FakeCCompilerOpt(CCompilerOpt): + # disable caching no need for it + conf_nocache = True + + def __init__(self, arch, cc, *args, **kwargs): + self.fake_info = (arch, cc, '') + CCompilerOpt.__init__(self, None, **kwargs) + + def dist_compile(self, sources, flags, **kwargs): + return sources + + def dist_info(self): + return self.fake_info + + @staticmethod + def dist_log(*args, stderr=False): + # avoid printing + pass + + def feature_test(self, name, force_flags=None, macros=[]): + # To speed up + return True + +class Features: + def __init__(self, arch, cc): + self.copt = FakeCCompilerOpt(arch, cc, cpu_baseline="max") + + def names(self): + return self.copt.cpu_baseline_names() + + def serialize(self, features_names): + result = [] + for f in self.copt.feature_sorted(features_names): + gather = self.copt.feature_supported.get(f, {}).get("group", []) + implies = self.copt.feature_sorted(self.copt.feature_implies(f)) + result.append((f, implies, gather)) + return result + + def table(self, **kwargs): + return self.gen_table(self.serialize(self.names()), **kwargs) + + def table_diff(self, vs, **kwargs): + fnames = set(self.names()) + fnames_vs = set(vs.names()) + common = fnames.intersection(fnames_vs) + extra = fnames.difference(fnames_vs) + notavl = fnames_vs.difference(fnames) + iextra = {} + inotavl = {} + idiff = set() + for f in common: + implies = self.copt.feature_implies(f) + implies_vs = vs.copt.feature_implies(f) + e = implies.difference(implies_vs) + i = implies_vs.difference(implies) + if not i and not e: + continue + if e: + iextra[f] = e + if i: + inotavl[f] = e + idiff.add(f) + + def fbold(f): + if f in extra: + return f':enabled:`{f}`' + if f in notavl: + return f':disabled:`{f}`' + return f + + def fbold_implies(f, i): + if i in iextra.get(f, {}): + return f':enabled:`{i}`' + if f in notavl or i in inotavl.get(f, {}): + return f':disabled:`{i}`' + return i + + diff_all = self.serialize(idiff.union(extra)) + diff_all += vs.serialize(notavl) + content = self.gen_table( + diff_all, fstyle=fbold, fstyle_implies=fbold_implies, **kwargs + ) + return content + + def gen_table(self, serialized_features, fstyle=None, fstyle_implies=None, + **kwargs): + + if fstyle is None: + fstyle = lambda ft: f'``{ft}``' + if fstyle_implies is None: + fstyle_implies = lambda origin, ft: fstyle(ft) + + rows = [] + have_gather = False + for f, implies, gather in serialized_features: + if gather: + have_gather = True + name = fstyle(f) + implies = ' '.join([fstyle_implies(f, i) for i in implies]) + gather = ' '.join([fstyle_implies(f, i) for i in gather]) + rows.append((name, implies, gather)) + if not rows: + return '' + fields = ["Name", "Implies", "Gathers"] + if not have_gather: + del fields[2] + rows = [(name, implies) for name, implies, _ in rows] + return self.gen_rst_table(fields, rows, **kwargs) + + def gen_rst_table(self, field_names, rows, tab_size=4): + assert(not rows or len(field_names) == len(rows[0])) + rows.append(field_names) + fld_len = len(field_names) + cls_len = [max(len(c[i]) for c in rows) for i in range(fld_len)] + del rows[-1] + cformat = ' '.join('{:<%d}' % i for i in cls_len) + border = cformat.format(*['='*i for i in cls_len]) + + rows = [cformat.format(*row) for row in rows] + # header + rows = [border, cformat.format(*field_names), border] + rows + # footer + rows += [border] + # add left margin + rows = [(' ' * tab_size) + r for r in rows] + return '\n'.join(rows) + +def wrapper_section(title, content, tab_size=4): + tab = ' '*tab_size + if content: + return ( + f"{title}\n{'~'*len(title)}" + f"\n.. table::\n{tab}:align: left\n\n" + f"{content}\n\n" + ) + return '' + +def wrapper_tab(title, table, tab_size=4): + tab = ' '*tab_size + if table: + ('\n' + tab).join(( + '.. tab:: ' + title, + tab + '.. table::', + tab + 'align: left', + table + '\n\n' + )) + return '' + + +if __name__ == '__main__': + + pretty_names = { + "PPC64": "IBM/POWER big-endian", + "PPC64LE": "IBM/POWER little-endian", + "ARMHF": "ARMv7/A32", + "AARCH64": "ARMv8/A64", + "ICC": "Intel Compiler", + # "ICCW": "Intel Compiler msvc-like", + "MSVC": "Microsoft Visual C/C++" + } + gen_path = path.join( + path.dirname(path.realpath(__file__)), "generated_tables" + ) + with open(path.join(gen_path, 'cpu_features.inc'), 'wt') as fd: + fd.write(f'.. generated via {__file__}\n\n') + for arch in ( + ("x86", "PPC64", "PPC64LE", "ARMHF", "AARCH64") + ): + title = "On " + pretty_names.get(arch, arch) + table = Features(arch, 'gcc').table() + fd.write(wrapper_section(title, table)) + + with open(path.join(gen_path, 'compilers-diff.inc'), 'wt') as fd: + fd.write(f'.. generated via {__file__}\n\n') + for arch, cc_names in ( + ("x86", ("clang", "ICC", "MSVC")), + ("PPC64", ("clang",)), + ("PPC64LE", ("clang",)), + ("ARMHF", ("clang",)), + ("AARCH64", ("clang",)) + ): + arch_pname = pretty_names.get(arch, arch) + for cc in cc_names: + title = f"On {arch_pname}::{pretty_names.get(cc, cc)}" + table = Features(arch, cc).table_diff(Features(arch, "gcc")) + fd.write(wrapper_section(title, table)) + + diff --git a/doc/source/reference/simd/generated_tables/compilers-diff.inc b/doc/source/reference/simd/generated_tables/compilers-diff.inc new file mode 100644 index 000000000..4b9009a68 --- /dev/null +++ b/doc/source/reference/simd/generated_tables/compilers-diff.inc @@ -0,0 +1,33 @@ +.. generated via /home/seiko/work/repos/numpy/doc/source/reference/simd/./gen_features.py + +On x86::Intel Compiler +~~~~~~~~~~~~~~~~~~~~~~ +.. table:: + :align: left + + ================ ========================================================================================================================================== + Name Implies + ================ ========================================================================================================================================== + FMA3 SSE SSE2 SSE3 SSSE3 SSE41 POPCNT SSE42 AVX F16C :enabled:`AVX2` + AVX2 SSE SSE2 SSE3 SSSE3 SSE41 POPCNT SSE42 AVX F16C :enabled:`FMA3` + AVX512F SSE SSE2 SSE3 SSSE3 SSE41 POPCNT SSE42 AVX F16C FMA3 AVX2 :enabled:`AVX512CD` + :disabled:`XOP` :disabled:`SSE` :disabled:`SSE2` :disabled:`SSE3` :disabled:`SSSE3` :disabled:`SSE41` :disabled:`POPCNT` :disabled:`SSE42` :disabled:`AVX` + :disabled:`FMA4` :disabled:`SSE` :disabled:`SSE2` :disabled:`SSE3` :disabled:`SSSE3` :disabled:`SSE41` :disabled:`POPCNT` :disabled:`SSE42` :disabled:`AVX` + ================ ========================================================================================================================================== + +On x86::Microsoft Visual C/C++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. table:: + :align: left + + ====================== ============================================================================================================================================================================================================================================================= ============================================================================= + Name Implies Gathers + ====================== ============================================================================================================================================================================================================================================================= ============================================================================= + FMA3 SSE SSE2 SSE3 SSSE3 SSE41 POPCNT SSE42 AVX F16C :enabled:`AVX2` + AVX2 SSE SSE2 SSE3 SSSE3 SSE41 POPCNT SSE42 AVX F16C :enabled:`FMA3` + AVX512F SSE SSE2 SSE3 SSSE3 SSE41 POPCNT SSE42 AVX F16C FMA3 AVX2 :enabled:`AVX512CD` :enabled:`AVX512_SKX` + AVX512CD SSE SSE2 SSE3 SSSE3 SSE41 POPCNT SSE42 AVX F16C FMA3 AVX2 AVX512F :enabled:`AVX512_SKX` + :disabled:`AVX512_KNL` :disabled:`SSE` :disabled:`SSE2` :disabled:`SSE3` :disabled:`SSSE3` :disabled:`SSE41` :disabled:`POPCNT` :disabled:`SSE42` :disabled:`AVX` :disabled:`F16C` :disabled:`FMA3` :disabled:`AVX2` :disabled:`AVX512F` :disabled:`AVX512CD` :disabled:`AVX512ER` :disabled:`AVX512PF` + :disabled:`AVX512_KNM` :disabled:`SSE` :disabled:`SSE2` :disabled:`SSE3` :disabled:`SSSE3` :disabled:`SSE41` :disabled:`POPCNT` :disabled:`SSE42` :disabled:`AVX` :disabled:`F16C` :disabled:`FMA3` :disabled:`AVX2` :disabled:`AVX512F` :disabled:`AVX512CD` :disabled:`AVX512_KNL` :disabled:`AVX5124FMAPS` :disabled:`AVX5124VNNIW` :disabled:`AVX512VPOPCNTDQ` + ====================== ============================================================================================================================================================================================================================================================= ============================================================================= + diff --git a/doc/source/reference/simd/generated_tables/cpu_features.inc b/doc/source/reference/simd/generated_tables/cpu_features.inc new file mode 100644 index 000000000..a7eae5652 --- /dev/null +++ b/doc/source/reference/simd/generated_tables/cpu_features.inc @@ -0,0 +1,93 @@ +.. generated via /home/seiko/work/repos/numpy/doc/source/reference/simd/./gen_features.py + +On x86 +~~~~~~ +.. table:: + :align: left + + ============== =========================================================================================================================================================================== ===================================================== + Name Implies Gathers + ============== =========================================================================================================================================================================== ===================================================== + ``SSE`` ``SSE2`` + ``SSE2`` ``SSE`` + ``SSE3`` ``SSE`` ``SSE2`` + ``SSSE3`` ``SSE`` ``SSE2`` ``SSE3`` + ``SSE41`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` + ``POPCNT`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` + ``SSE42`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` + ``AVX`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` + ``XOP`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` + ``FMA4`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` + ``F16C`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` + ``FMA3`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` + ``AVX2`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` + ``AVX512F`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` + ``AVX512CD`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` + ``AVX512_KNL`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512ER`` ``AVX512PF`` + ``AVX512_KNM`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512_KNL`` ``AVX5124FMAPS`` ``AVX5124VNNIW`` ``AVX512VPOPCNTDQ`` + ``AVX512_SKX`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512VL`` ``AVX512BW`` ``AVX512DQ`` + ``AVX512_CLX`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512_SKX`` ``AVX512VNNI`` + ``AVX512_CNL`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512_SKX`` ``AVX512IFMA`` ``AVX512VBMI`` + ``AVX512_ICL`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512_SKX`` ``AVX512_CLX`` ``AVX512_CNL`` ``AVX512VBMI2`` ``AVX512BITALG`` ``AVX512VPOPCNTDQ`` + ============== =========================================================================================================================================================================== ===================================================== + +On IBM/POWER big-endian +~~~~~~~~~~~~~~~~~~~~~~~ +.. table:: + :align: left + + ======== ================ + Name Implies + ======== ================ + ``VSX`` + ``VSX2`` ``VSX`` + ``VSX3`` ``VSX`` ``VSX2`` + ======== ================ + +On IBM/POWER little-endian +~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. table:: + :align: left + + ======== ================ + Name Implies + ======== ================ + ``VSX`` ``VSX2`` + ``VSX2`` ``VSX`` + ``VSX3`` ``VSX`` ``VSX2`` + ======== ================ + +On ARMv7/A32 +~~~~~~~~~~~~ +.. table:: + :align: left + + ============== =========================================================== + Name Implies + ============== =========================================================== + ``NEON`` + ``NEON_FP16`` ``NEON`` + ``NEON_VFPV4`` ``NEON`` ``NEON_FP16`` + ``ASIMD`` ``NEON`` ``NEON_FP16`` ``NEON_VFPV4`` + ``ASIMDHP`` ``NEON`` ``NEON_FP16`` ``NEON_VFPV4`` ``ASIMD`` + ``ASIMDDP`` ``NEON`` ``NEON_FP16`` ``NEON_VFPV4`` ``ASIMD`` + ``ASIMDFHM`` ``NEON`` ``NEON_FP16`` ``NEON_VFPV4`` ``ASIMD`` ``ASIMDHP`` + ============== =========================================================== + +On ARMv8/A64 +~~~~~~~~~~~~ +.. table:: + :align: left + + ============== =========================================================== + Name Implies + ============== =========================================================== + ``NEON`` ``NEON_FP16`` ``NEON_VFPV4`` ``ASIMD`` + ``NEON_FP16`` ``NEON`` ``NEON_VFPV4`` ``ASIMD`` + ``NEON_VFPV4`` ``NEON`` ``NEON_FP16`` ``ASIMD`` + ``ASIMD`` ``NEON`` ``NEON_FP16`` ``NEON_VFPV4`` + ``ASIMDHP`` ``NEON`` ``NEON_FP16`` ``NEON_VFPV4`` ``ASIMD`` + ``ASIMDDP`` ``NEON`` ``NEON_FP16`` ``NEON_VFPV4`` ``ASIMD`` + ``ASIMDFHM`` ``NEON`` ``NEON_FP16`` ``NEON_VFPV4`` ``ASIMD`` ``ASIMDHP`` + ============== =========================================================== + diff --git a/doc/source/reference/simd/simd-optimizations-tables-diff.inc b/doc/source/reference/simd/simd-optimizations-tables-diff.inc deleted file mode 100644 index 41fa96703..000000000 --- a/doc/source/reference/simd/simd-optimizations-tables-diff.inc +++ /dev/null @@ -1,37 +0,0 @@ -.. generated via source/reference/simd/simd-optimizations.py - -x86::Intel Compiler - CPU feature names -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. table:: - :align: left - - =========== ================================================================================================================== - Name Implies - =========== ================================================================================================================== - ``FMA3`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` **AVX2** - ``AVX2`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` **FMA3** - ``AVX512F`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` **AVX512CD** - =========== ================================================================================================================== - -.. note:: - The following features aren't supported by x86::Intel Compiler: - **XOP FMA4** - -x86::Microsoft Visual C/C++ - CPU feature names -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. table:: - :align: left - - ============ ================================================================================================================================= - Name Implies - ============ ================================================================================================================================= - ``FMA3`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` **AVX2** - ``AVX2`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` **FMA3** - ``AVX512F`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` **AVX512CD** **AVX512_SKX** - ``AVX512CD`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` **AVX512_SKX** - ============ ================================================================================================================================= - -.. note:: - The following features aren't supported by x86::Microsoft Visual C/C++: - **AVX512_KNL AVX512_KNM** - diff --git a/doc/source/reference/simd/simd-optimizations-tables.inc b/doc/source/reference/simd/simd-optimizations-tables.inc deleted file mode 100644 index f038a91e1..000000000 --- a/doc/source/reference/simd/simd-optimizations-tables.inc +++ /dev/null @@ -1,103 +0,0 @@ -.. generated via source/reference/simd/simd-optimizations.py - -x86 - CPU feature names -~~~~~~~~~~~~~~~~~~~~~~~ -.. table:: - :align: left - - ============ ================================================================================================================= - Name Implies - ============ ================================================================================================================= - ``SSE`` ``SSE2`` - ``SSE2`` ``SSE`` - ``SSE3`` ``SSE`` ``SSE2`` - ``SSSE3`` ``SSE`` ``SSE2`` ``SSE3`` - ``SSE41`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` - ``POPCNT`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` - ``SSE42`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` - ``AVX`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` - ``XOP`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` - ``FMA4`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` - ``F16C`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` - ``FMA3`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` - ``AVX2`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` - ``AVX512F`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` - ``AVX512CD`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` - ============ ================================================================================================================= - -x86 - Group names -~~~~~~~~~~~~~~~~~ -.. table:: - :align: left - - ============== ===================================================== =========================================================================================================================================================================== - Name Gather Implies - ============== ===================================================== =========================================================================================================================================================================== - ``AVX512_KNL`` ``AVX512ER`` ``AVX512PF`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` - ``AVX512_KNM`` ``AVX5124FMAPS`` ``AVX5124VNNIW`` ``AVX512VPOPCNTDQ`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512_KNL`` - ``AVX512_SKX`` ``AVX512VL`` ``AVX512BW`` ``AVX512DQ`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` - ``AVX512_CLX`` ``AVX512VNNI`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512_SKX`` - ``AVX512_CNL`` ``AVX512IFMA`` ``AVX512VBMI`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512_SKX`` - ``AVX512_ICL`` ``AVX512VBMI2`` ``AVX512BITALG`` ``AVX512VPOPCNTDQ`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512_SKX`` ``AVX512_CLX`` ``AVX512_CNL`` - ============== ===================================================== =========================================================================================================================================================================== - -IBM/POWER big-endian - CPU feature names -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. table:: - :align: left - - ======== ================ - Name Implies - ======== ================ - ``VSX`` - ``VSX2`` ``VSX`` - ``VSX3`` ``VSX`` ``VSX2`` - ======== ================ - -IBM/POWER little-endian - CPU feature names -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. table:: - :align: left - - ======== ================ - Name Implies - ======== ================ - ``VSX`` ``VSX2`` - ``VSX2`` ``VSX`` - ``VSX3`` ``VSX`` ``VSX2`` - ======== ================ - -ARMv7/A32 - CPU feature names -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. table:: - :align: left - - ============== =========================================================== - Name Implies - ============== =========================================================== - ``NEON`` - ``NEON_FP16`` ``NEON`` - ``NEON_VFPV4`` ``NEON`` ``NEON_FP16`` - ``ASIMD`` ``NEON`` ``NEON_FP16`` ``NEON_VFPV4`` - ``ASIMDHP`` ``NEON`` ``NEON_FP16`` ``NEON_VFPV4`` ``ASIMD`` - ``ASIMDDP`` ``NEON`` ``NEON_FP16`` ``NEON_VFPV4`` ``ASIMD`` - ``ASIMDFHM`` ``NEON`` ``NEON_FP16`` ``NEON_VFPV4`` ``ASIMD`` ``ASIMDHP`` - ============== =========================================================== - -ARMv8/A64 - CPU feature names -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. table:: - :align: left - - ============== =========================================================== - Name Implies - ============== =========================================================== - ``NEON`` ``NEON_FP16`` ``NEON_VFPV4`` ``ASIMD`` - ``NEON_FP16`` ``NEON`` ``NEON_VFPV4`` ``ASIMD`` - ``NEON_VFPV4`` ``NEON`` ``NEON_FP16`` ``ASIMD`` - ``ASIMD`` ``NEON`` ``NEON_FP16`` ``NEON_VFPV4`` - ``ASIMDHP`` ``NEON`` ``NEON_FP16`` ``NEON_VFPV4`` ``ASIMD`` - ``ASIMDDP`` ``NEON`` ``NEON_FP16`` ``NEON_VFPV4`` ``ASIMD`` - ``ASIMDFHM`` ``NEON`` ``NEON_FP16`` ``NEON_VFPV4`` ``ASIMD`` ``ASIMDHP`` - ============== =========================================================== - diff --git a/doc/source/reference/simd/simd-optimizations.py b/doc/source/reference/simd/simd-optimizations.py deleted file mode 100644 index a78302db5..000000000 --- a/doc/source/reference/simd/simd-optimizations.py +++ /dev/null @@ -1,190 +0,0 @@ -""" -Generate CPU features tables from CCompilerOpt -""" -from os import sys, path -gen_path = path.dirname(path.realpath(__file__)) -#sys.path.append(path.abspath(path.join(gen_path, *([".."]*4), "numpy", "distutils"))) -#from ccompiler_opt import CCompilerOpt -from numpy.distutils.ccompiler_opt import CCompilerOpt - -class FakeCCompilerOpt(CCompilerOpt): - fake_info = ("arch", "compiler", "extra_args") - # disable caching no need for it - conf_nocache = True - def __init__(self, *args, **kwargs): - no_cc = None - CCompilerOpt.__init__(self, no_cc, **kwargs) - def dist_compile(self, sources, flags, **kwargs): - return sources - def dist_info(self): - return FakeCCompilerOpt.fake_info - @staticmethod - def dist_log(*args, stderr=False): - # avoid printing - pass - def feature_test(self, name, force_flags=None): - # To speed up - return True - - def gen_features_table(self, features, ignore_groups=True, - field_names=["Name", "Implies"], - fstyle=None, fstyle_implies=None, **kwargs): - rows = [] - if fstyle is None: - fstyle = lambda ft: f'``{ft}``' - if fstyle_implies is None: - fstyle_implies = lambda origin, ft: fstyle(ft) - for f in self.feature_sorted(features): - is_group = "group" in self.feature_supported.get(f, {}) - if ignore_groups and is_group: - continue - implies = self.feature_sorted(self.feature_implies(f)) - implies = ' '.join([fstyle_implies(f, i) for i in implies]) - rows.append([fstyle(f), implies]) - if rows: - return self.gen_rst_table(field_names, rows, **kwargs) - - def gen_gfeatures_table(self, features, - field_names=["Name", "Gather", "Implies"], - fstyle=None, fstyle_implies=None, **kwargs): - rows = [] - if fstyle is None: - fstyle = lambda ft: f'``{ft}``' - if fstyle_implies is None: - fstyle_implies = lambda origin, ft: fstyle(ft) - for f in self.feature_sorted(features): - gather = self.feature_supported.get(f, {}).get("group", None) - if not gather: - continue - implies = self.feature_sorted(self.feature_implies(f)) - implies = ' '.join([fstyle_implies(f, i) for i in implies]) - gather = ' '.join([fstyle_implies(f, i) for i in gather]) - rows.append([fstyle(f), gather, implies]) - if rows: - return self.gen_rst_table(field_names, rows, **kwargs) - - def gen_rst_table(self, field_names, rows, tab_size=4): - assert(not rows or len(field_names) == len(rows[0])) - rows.append(field_names) - fld_len = len(field_names) - cls_len = [max(len(c[i]) for c in rows) for i in range(fld_len)] - del rows[-1] - cformat = ' '.join('{:<%d}' % i for i in cls_len) - border = cformat.format(*['='*i for i in cls_len]) - - rows = [cformat.format(*row) for row in rows] - # header - rows = [border, cformat.format(*field_names), border] + rows - # footer - rows += [border] - # add left margin - rows = [(' ' * tab_size) + r for r in rows] - return '\n'.join(rows) - -def features_table_sections(name, ftable=None, gtable=None, tab_size=4): - tab = ' '*tab_size - content = '' - if ftable: - title = f"{name} - CPU feature names" - content = ( - f"{title}\n{'~'*len(title)}" - f"\n.. table::\n{tab}:align: left\n\n" - f"{ftable}\n\n" - ) - if gtable: - title = f"{name} - Group names" - content += ( - f"{title}\n{'~'*len(title)}" - f"\n.. table::\n{tab}:align: left\n\n" - f"{gtable}\n\n" - ) - return content - -def features_table(arch, cc="gcc", pretty_name=None, **kwargs): - FakeCCompilerOpt.fake_info = (arch, cc, '') - ccopt = FakeCCompilerOpt(cpu_baseline="max") - features = ccopt.cpu_baseline_names() - ftable = ccopt.gen_features_table(features, **kwargs) - gtable = ccopt.gen_gfeatures_table(features, **kwargs) - - if not pretty_name: - pretty_name = arch + '/' + cc - return features_table_sections(pretty_name, ftable, gtable, **kwargs) - -def features_table_diff(arch, cc, cc_vs="gcc", pretty_name=None, **kwargs): - FakeCCompilerOpt.fake_info = (arch, cc, '') - ccopt = FakeCCompilerOpt(cpu_baseline="max") - fnames = ccopt.cpu_baseline_names() - features = {f:ccopt.feature_implies(f) for f in fnames} - - FakeCCompilerOpt.fake_info = (arch, cc_vs, '') - ccopt_vs = FakeCCompilerOpt(cpu_baseline="max") - fnames_vs = ccopt_vs.cpu_baseline_names() - features_vs = {f:ccopt_vs.feature_implies(f) for f in fnames_vs} - - common = set(fnames).intersection(fnames_vs) - extra_avl = set(fnames).difference(fnames_vs) - not_avl = set(fnames_vs).difference(fnames) - diff_impl_f = {f:features[f].difference(features_vs[f]) for f in common} - diff_impl = {k for k, v in diff_impl_f.items() if v} - - fbold = lambda ft: f'**{ft}**' if ft in extra_avl else f'``{ft}``' - fbold_implies = lambda origin, ft: ( - f'**{ft}**' if ft in diff_impl_f.get(origin, {}) else f'``{ft}``' - ) - diff_all = diff_impl.union(extra_avl) - ftable = ccopt.gen_features_table( - diff_all, fstyle=fbold, fstyle_implies=fbold_implies, **kwargs - ) - gtable = ccopt.gen_gfeatures_table( - diff_all, fstyle=fbold, fstyle_implies=fbold_implies, **kwargs - ) - if not pretty_name: - pretty_name = arch + '/' + cc - content = features_table_sections(pretty_name, ftable, gtable, **kwargs) - - if not_avl: - not_avl = ccopt_vs.feature_sorted(not_avl) - not_avl = ' '.join(not_avl) - content += ( - ".. note::\n" - f" The following features aren't supported by {pretty_name}:\n" - f" **{not_avl}**\n\n" - ) - return content - -if __name__ == '__main__': - pretty_names = { - "PPC64": "IBM/POWER big-endian", - "PPC64LE": "IBM/POWER little-endian", - "ARMHF": "ARMv7/A32", - "AARCH64": "ARMv8/A64", - "ICC": "Intel Compiler", - # "ICCW": "Intel Compiler msvc-like", - "MSVC": "Microsoft Visual C/C++" - } - with open(path.join(gen_path, 'simd-optimizations-tables.inc'), 'wt') as fd: - fd.write(f'.. generated via {__file__}\n\n') - for arch in ( - ("x86", "PPC64", "PPC64LE", "ARMHF", "AARCH64") - ): - pretty_name = pretty_names.get(arch, arch) - table = features_table(arch=arch, pretty_name=pretty_name) - assert(table) - fd.write(table) - - with open(path.join(gen_path, 'simd-optimizations-tables-diff.inc'), 'wt') as fd: - fd.write(f'.. generated via {__file__}\n\n') - for arch, cc_names in ( - ("x86", ("clang", "ICC", "MSVC")), - ("PPC64", ("clang",)), - ("PPC64LE", ("clang",)), - ("ARMHF", ("clang",)), - ("AARCH64", ("clang",)) - ): - arch_pname = pretty_names.get(arch, arch) - for cc in cc_names: - pretty_name = f"{arch_pname}::{pretty_names.get(cc, cc)}" - table = features_table_diff(arch=arch, cc=cc, pretty_name=pretty_name) - if table: - fd.write(table) |