summaryrefslogtreecommitdiff
path: root/doc/source/reference/simd
diff options
context:
space:
mode:
authorSayed Adel <seiko@imavr.com>2021-11-23 03:51:21 +0200
committerSayed Adel <seiko@imavr.com>2021-12-08 22:18:07 +0200
commit563051aaebbb80da3d453cacf3e1f9782d3077fb (patch)
treed7066b82eb8158e74d2efbd8f730927fcfebb588 /doc/source/reference/simd
parent6ae1a58e508a1f843ec3736488c67ac0bb793c16 (diff)
downloadnumpy-563051aaebbb80da3d453cacf3e1f9782d3077fb.tar.gz
DOC, SIMD: Improve the auto-generated tables of CPU features
Diffstat (limited to 'doc/source/reference/simd')
-rw-r--r--doc/source/reference/simd/gen_features.py194
-rw-r--r--doc/source/reference/simd/generated_tables/compilers-diff.inc33
-rw-r--r--doc/source/reference/simd/generated_tables/cpu_features.inc93
-rw-r--r--doc/source/reference/simd/simd-optimizations-tables-diff.inc37
-rw-r--r--doc/source/reference/simd/simd-optimizations-tables.inc103
-rw-r--r--doc/source/reference/simd/simd-optimizations.py190
6 files changed, 320 insertions, 330 deletions
diff --git a/doc/source/reference/simd/gen_features.py b/doc/source/reference/simd/gen_features.py
new file mode 100644
index 000000000..d74d54016
--- /dev/null
+++ b/doc/source/reference/simd/gen_features.py
@@ -0,0 +1,194 @@
+"""
+Generate CPU features tables from CCompilerOpt
+"""
+from os import sys, path
+from numpy.distutils.ccompiler_opt import CCompilerOpt
+
+class FakeCCompilerOpt(CCompilerOpt):
+ # disable caching no need for it
+ conf_nocache = True
+
+ def __init__(self, arch, cc, *args, **kwargs):
+ self.fake_info = (arch, cc, '')
+ CCompilerOpt.__init__(self, None, **kwargs)
+
+ def dist_compile(self, sources, flags, **kwargs):
+ return sources
+
+ def dist_info(self):
+ return self.fake_info
+
+ @staticmethod
+ def dist_log(*args, stderr=False):
+ # avoid printing
+ pass
+
+ def feature_test(self, name, force_flags=None, macros=[]):
+ # To speed up
+ return True
+
+class Features:
+ def __init__(self, arch, cc):
+ self.copt = FakeCCompilerOpt(arch, cc, cpu_baseline="max")
+
+ def names(self):
+ return self.copt.cpu_baseline_names()
+
+ def serialize(self, features_names):
+ result = []
+ for f in self.copt.feature_sorted(features_names):
+ gather = self.copt.feature_supported.get(f, {}).get("group", [])
+ implies = self.copt.feature_sorted(self.copt.feature_implies(f))
+ result.append((f, implies, gather))
+ return result
+
+ def table(self, **kwargs):
+ return self.gen_table(self.serialize(self.names()), **kwargs)
+
+ def table_diff(self, vs, **kwargs):
+ fnames = set(self.names())
+ fnames_vs = set(vs.names())
+ common = fnames.intersection(fnames_vs)
+ extra = fnames.difference(fnames_vs)
+ notavl = fnames_vs.difference(fnames)
+ iextra = {}
+ inotavl = {}
+ idiff = set()
+ for f in common:
+ implies = self.copt.feature_implies(f)
+ implies_vs = vs.copt.feature_implies(f)
+ e = implies.difference(implies_vs)
+ i = implies_vs.difference(implies)
+ if not i and not e:
+ continue
+ if e:
+ iextra[f] = e
+ if i:
+ inotavl[f] = e
+ idiff.add(f)
+
+ def fbold(f):
+ if f in extra:
+ return f':enabled:`{f}`'
+ if f in notavl:
+ return f':disabled:`{f}`'
+ return f
+
+ def fbold_implies(f, i):
+ if i in iextra.get(f, {}):
+ return f':enabled:`{i}`'
+ if f in notavl or i in inotavl.get(f, {}):
+ return f':disabled:`{i}`'
+ return i
+
+ diff_all = self.serialize(idiff.union(extra))
+ diff_all += vs.serialize(notavl)
+ content = self.gen_table(
+ diff_all, fstyle=fbold, fstyle_implies=fbold_implies, **kwargs
+ )
+ return content
+
+ def gen_table(self, serialized_features, fstyle=None, fstyle_implies=None,
+ **kwargs):
+
+ if fstyle is None:
+ fstyle = lambda ft: f'``{ft}``'
+ if fstyle_implies is None:
+ fstyle_implies = lambda origin, ft: fstyle(ft)
+
+ rows = []
+ have_gather = False
+ for f, implies, gather in serialized_features:
+ if gather:
+ have_gather = True
+ name = fstyle(f)
+ implies = ' '.join([fstyle_implies(f, i) for i in implies])
+ gather = ' '.join([fstyle_implies(f, i) for i in gather])
+ rows.append((name, implies, gather))
+ if not rows:
+ return ''
+ fields = ["Name", "Implies", "Gathers"]
+ if not have_gather:
+ del fields[2]
+ rows = [(name, implies) for name, implies, _ in rows]
+ return self.gen_rst_table(fields, rows, **kwargs)
+
+ def gen_rst_table(self, field_names, rows, tab_size=4):
+ assert(not rows or len(field_names) == len(rows[0]))
+ rows.append(field_names)
+ fld_len = len(field_names)
+ cls_len = [max(len(c[i]) for c in rows) for i in range(fld_len)]
+ del rows[-1]
+ cformat = ' '.join('{:<%d}' % i for i in cls_len)
+ border = cformat.format(*['='*i for i in cls_len])
+
+ rows = [cformat.format(*row) for row in rows]
+ # header
+ rows = [border, cformat.format(*field_names), border] + rows
+ # footer
+ rows += [border]
+ # add left margin
+ rows = [(' ' * tab_size) + r for r in rows]
+ return '\n'.join(rows)
+
+def wrapper_section(title, content, tab_size=4):
+ tab = ' '*tab_size
+ if content:
+ return (
+ f"{title}\n{'~'*len(title)}"
+ f"\n.. table::\n{tab}:align: left\n\n"
+ f"{content}\n\n"
+ )
+ return ''
+
+def wrapper_tab(title, table, tab_size=4):
+ tab = ' '*tab_size
+ if table:
+ ('\n' + tab).join((
+ '.. tab:: ' + title,
+ tab + '.. table::',
+ tab + 'align: left',
+ table + '\n\n'
+ ))
+ return ''
+
+
+if __name__ == '__main__':
+
+ pretty_names = {
+ "PPC64": "IBM/POWER big-endian",
+ "PPC64LE": "IBM/POWER little-endian",
+ "ARMHF": "ARMv7/A32",
+ "AARCH64": "ARMv8/A64",
+ "ICC": "Intel Compiler",
+ # "ICCW": "Intel Compiler msvc-like",
+ "MSVC": "Microsoft Visual C/C++"
+ }
+ gen_path = path.join(
+ path.dirname(path.realpath(__file__)), "generated_tables"
+ )
+ with open(path.join(gen_path, 'cpu_features.inc'), 'wt') as fd:
+ fd.write(f'.. generated via {__file__}\n\n')
+ for arch in (
+ ("x86", "PPC64", "PPC64LE", "ARMHF", "AARCH64")
+ ):
+ title = "On " + pretty_names.get(arch, arch)
+ table = Features(arch, 'gcc').table()
+ fd.write(wrapper_section(title, table))
+
+ with open(path.join(gen_path, 'compilers-diff.inc'), 'wt') as fd:
+ fd.write(f'.. generated via {__file__}\n\n')
+ for arch, cc_names in (
+ ("x86", ("clang", "ICC", "MSVC")),
+ ("PPC64", ("clang",)),
+ ("PPC64LE", ("clang",)),
+ ("ARMHF", ("clang",)),
+ ("AARCH64", ("clang",))
+ ):
+ arch_pname = pretty_names.get(arch, arch)
+ for cc in cc_names:
+ title = f"On {arch_pname}::{pretty_names.get(cc, cc)}"
+ table = Features(arch, cc).table_diff(Features(arch, "gcc"))
+ fd.write(wrapper_section(title, table))
+
+
diff --git a/doc/source/reference/simd/generated_tables/compilers-diff.inc b/doc/source/reference/simd/generated_tables/compilers-diff.inc
new file mode 100644
index 000000000..4b9009a68
--- /dev/null
+++ b/doc/source/reference/simd/generated_tables/compilers-diff.inc
@@ -0,0 +1,33 @@
+.. generated via /home/seiko/work/repos/numpy/doc/source/reference/simd/./gen_features.py
+
+On x86::Intel Compiler
+~~~~~~~~~~~~~~~~~~~~~~
+.. table::
+ :align: left
+
+ ================ ==========================================================================================================================================
+ Name Implies
+ ================ ==========================================================================================================================================
+ FMA3 SSE SSE2 SSE3 SSSE3 SSE41 POPCNT SSE42 AVX F16C :enabled:`AVX2`
+ AVX2 SSE SSE2 SSE3 SSSE3 SSE41 POPCNT SSE42 AVX F16C :enabled:`FMA3`
+ AVX512F SSE SSE2 SSE3 SSSE3 SSE41 POPCNT SSE42 AVX F16C FMA3 AVX2 :enabled:`AVX512CD`
+ :disabled:`XOP` :disabled:`SSE` :disabled:`SSE2` :disabled:`SSE3` :disabled:`SSSE3` :disabled:`SSE41` :disabled:`POPCNT` :disabled:`SSE42` :disabled:`AVX`
+ :disabled:`FMA4` :disabled:`SSE` :disabled:`SSE2` :disabled:`SSE3` :disabled:`SSSE3` :disabled:`SSE41` :disabled:`POPCNT` :disabled:`SSE42` :disabled:`AVX`
+ ================ ==========================================================================================================================================
+
+On x86::Microsoft Visual C/C++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. table::
+ :align: left
+
+ ====================== ============================================================================================================================================================================================================================================================= =============================================================================
+ Name Implies Gathers
+ ====================== ============================================================================================================================================================================================================================================================= =============================================================================
+ FMA3 SSE SSE2 SSE3 SSSE3 SSE41 POPCNT SSE42 AVX F16C :enabled:`AVX2`
+ AVX2 SSE SSE2 SSE3 SSSE3 SSE41 POPCNT SSE42 AVX F16C :enabled:`FMA3`
+ AVX512F SSE SSE2 SSE3 SSSE3 SSE41 POPCNT SSE42 AVX F16C FMA3 AVX2 :enabled:`AVX512CD` :enabled:`AVX512_SKX`
+ AVX512CD SSE SSE2 SSE3 SSSE3 SSE41 POPCNT SSE42 AVX F16C FMA3 AVX2 AVX512F :enabled:`AVX512_SKX`
+ :disabled:`AVX512_KNL` :disabled:`SSE` :disabled:`SSE2` :disabled:`SSE3` :disabled:`SSSE3` :disabled:`SSE41` :disabled:`POPCNT` :disabled:`SSE42` :disabled:`AVX` :disabled:`F16C` :disabled:`FMA3` :disabled:`AVX2` :disabled:`AVX512F` :disabled:`AVX512CD` :disabled:`AVX512ER` :disabled:`AVX512PF`
+ :disabled:`AVX512_KNM` :disabled:`SSE` :disabled:`SSE2` :disabled:`SSE3` :disabled:`SSSE3` :disabled:`SSE41` :disabled:`POPCNT` :disabled:`SSE42` :disabled:`AVX` :disabled:`F16C` :disabled:`FMA3` :disabled:`AVX2` :disabled:`AVX512F` :disabled:`AVX512CD` :disabled:`AVX512_KNL` :disabled:`AVX5124FMAPS` :disabled:`AVX5124VNNIW` :disabled:`AVX512VPOPCNTDQ`
+ ====================== ============================================================================================================================================================================================================================================================= =============================================================================
+
diff --git a/doc/source/reference/simd/generated_tables/cpu_features.inc b/doc/source/reference/simd/generated_tables/cpu_features.inc
new file mode 100644
index 000000000..a7eae5652
--- /dev/null
+++ b/doc/source/reference/simd/generated_tables/cpu_features.inc
@@ -0,0 +1,93 @@
+.. generated via /home/seiko/work/repos/numpy/doc/source/reference/simd/./gen_features.py
+
+On x86
+~~~~~~
+.. table::
+ :align: left
+
+ ============== =========================================================================================================================================================================== =====================================================
+ Name Implies Gathers
+ ============== =========================================================================================================================================================================== =====================================================
+ ``SSE`` ``SSE2``
+ ``SSE2`` ``SSE``
+ ``SSE3`` ``SSE`` ``SSE2``
+ ``SSSE3`` ``SSE`` ``SSE2`` ``SSE3``
+ ``SSE41`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3``
+ ``POPCNT`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41``
+ ``SSE42`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT``
+ ``AVX`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42``
+ ``XOP`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX``
+ ``FMA4`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX``
+ ``F16C`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX``
+ ``FMA3`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C``
+ ``AVX2`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C``
+ ``AVX512F`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2``
+ ``AVX512CD`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F``
+ ``AVX512_KNL`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512ER`` ``AVX512PF``
+ ``AVX512_KNM`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512_KNL`` ``AVX5124FMAPS`` ``AVX5124VNNIW`` ``AVX512VPOPCNTDQ``
+ ``AVX512_SKX`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512VL`` ``AVX512BW`` ``AVX512DQ``
+ ``AVX512_CLX`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512_SKX`` ``AVX512VNNI``
+ ``AVX512_CNL`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512_SKX`` ``AVX512IFMA`` ``AVX512VBMI``
+ ``AVX512_ICL`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512_SKX`` ``AVX512_CLX`` ``AVX512_CNL`` ``AVX512VBMI2`` ``AVX512BITALG`` ``AVX512VPOPCNTDQ``
+ ============== =========================================================================================================================================================================== =====================================================
+
+On IBM/POWER big-endian
+~~~~~~~~~~~~~~~~~~~~~~~
+.. table::
+ :align: left
+
+ ======== ================
+ Name Implies
+ ======== ================
+ ``VSX``
+ ``VSX2`` ``VSX``
+ ``VSX3`` ``VSX`` ``VSX2``
+ ======== ================
+
+On IBM/POWER little-endian
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. table::
+ :align: left
+
+ ======== ================
+ Name Implies
+ ======== ================
+ ``VSX`` ``VSX2``
+ ``VSX2`` ``VSX``
+ ``VSX3`` ``VSX`` ``VSX2``
+ ======== ================
+
+On ARMv7/A32
+~~~~~~~~~~~~
+.. table::
+ :align: left
+
+ ============== ===========================================================
+ Name Implies
+ ============== ===========================================================
+ ``NEON``
+ ``NEON_FP16`` ``NEON``
+ ``NEON_VFPV4`` ``NEON`` ``NEON_FP16``
+ ``ASIMD`` ``NEON`` ``NEON_FP16`` ``NEON_VFPV4``
+ ``ASIMDHP`` ``NEON`` ``NEON_FP16`` ``NEON_VFPV4`` ``ASIMD``
+ ``ASIMDDP`` ``NEON`` ``NEON_FP16`` ``NEON_VFPV4`` ``ASIMD``
+ ``ASIMDFHM`` ``NEON`` ``NEON_FP16`` ``NEON_VFPV4`` ``ASIMD`` ``ASIMDHP``
+ ============== ===========================================================
+
+On ARMv8/A64
+~~~~~~~~~~~~
+.. table::
+ :align: left
+
+ ============== ===========================================================
+ Name Implies
+ ============== ===========================================================
+ ``NEON`` ``NEON_FP16`` ``NEON_VFPV4`` ``ASIMD``
+ ``NEON_FP16`` ``NEON`` ``NEON_VFPV4`` ``ASIMD``
+ ``NEON_VFPV4`` ``NEON`` ``NEON_FP16`` ``ASIMD``
+ ``ASIMD`` ``NEON`` ``NEON_FP16`` ``NEON_VFPV4``
+ ``ASIMDHP`` ``NEON`` ``NEON_FP16`` ``NEON_VFPV4`` ``ASIMD``
+ ``ASIMDDP`` ``NEON`` ``NEON_FP16`` ``NEON_VFPV4`` ``ASIMD``
+ ``ASIMDFHM`` ``NEON`` ``NEON_FP16`` ``NEON_VFPV4`` ``ASIMD`` ``ASIMDHP``
+ ============== ===========================================================
+
diff --git a/doc/source/reference/simd/simd-optimizations-tables-diff.inc b/doc/source/reference/simd/simd-optimizations-tables-diff.inc
deleted file mode 100644
index 41fa96703..000000000
--- a/doc/source/reference/simd/simd-optimizations-tables-diff.inc
+++ /dev/null
@@ -1,37 +0,0 @@
-.. generated via source/reference/simd/simd-optimizations.py
-
-x86::Intel Compiler - CPU feature names
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-.. table::
- :align: left
-
- =========== ==================================================================================================================
- Name Implies
- =========== ==================================================================================================================
- ``FMA3`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` **AVX2**
- ``AVX2`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` **FMA3**
- ``AVX512F`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` **AVX512CD**
- =========== ==================================================================================================================
-
-.. note::
- The following features aren't supported by x86::Intel Compiler:
- **XOP FMA4**
-
-x86::Microsoft Visual C/C++ - CPU feature names
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-.. table::
- :align: left
-
- ============ =================================================================================================================================
- Name Implies
- ============ =================================================================================================================================
- ``FMA3`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` **AVX2**
- ``AVX2`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` **FMA3**
- ``AVX512F`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` **AVX512CD** **AVX512_SKX**
- ``AVX512CD`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` **AVX512_SKX**
- ============ =================================================================================================================================
-
-.. note::
- The following features aren't supported by x86::Microsoft Visual C/C++:
- **AVX512_KNL AVX512_KNM**
-
diff --git a/doc/source/reference/simd/simd-optimizations-tables.inc b/doc/source/reference/simd/simd-optimizations-tables.inc
deleted file mode 100644
index f038a91e1..000000000
--- a/doc/source/reference/simd/simd-optimizations-tables.inc
+++ /dev/null
@@ -1,103 +0,0 @@
-.. generated via source/reference/simd/simd-optimizations.py
-
-x86 - CPU feature names
-~~~~~~~~~~~~~~~~~~~~~~~
-.. table::
- :align: left
-
- ============ =================================================================================================================
- Name Implies
- ============ =================================================================================================================
- ``SSE`` ``SSE2``
- ``SSE2`` ``SSE``
- ``SSE3`` ``SSE`` ``SSE2``
- ``SSSE3`` ``SSE`` ``SSE2`` ``SSE3``
- ``SSE41`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3``
- ``POPCNT`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41``
- ``SSE42`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT``
- ``AVX`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42``
- ``XOP`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX``
- ``FMA4`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX``
- ``F16C`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX``
- ``FMA3`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C``
- ``AVX2`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C``
- ``AVX512F`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2``
- ``AVX512CD`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F``
- ============ =================================================================================================================
-
-x86 - Group names
-~~~~~~~~~~~~~~~~~
-.. table::
- :align: left
-
- ============== ===================================================== ===========================================================================================================================================================================
- Name Gather Implies
- ============== ===================================================== ===========================================================================================================================================================================
- ``AVX512_KNL`` ``AVX512ER`` ``AVX512PF`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD``
- ``AVX512_KNM`` ``AVX5124FMAPS`` ``AVX5124VNNIW`` ``AVX512VPOPCNTDQ`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512_KNL``
- ``AVX512_SKX`` ``AVX512VL`` ``AVX512BW`` ``AVX512DQ`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD``
- ``AVX512_CLX`` ``AVX512VNNI`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512_SKX``
- ``AVX512_CNL`` ``AVX512IFMA`` ``AVX512VBMI`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512_SKX``
- ``AVX512_ICL`` ``AVX512VBMI2`` ``AVX512BITALG`` ``AVX512VPOPCNTDQ`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512_SKX`` ``AVX512_CLX`` ``AVX512_CNL``
- ============== ===================================================== ===========================================================================================================================================================================
-
-IBM/POWER big-endian - CPU feature names
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-.. table::
- :align: left
-
- ======== ================
- Name Implies
- ======== ================
- ``VSX``
- ``VSX2`` ``VSX``
- ``VSX3`` ``VSX`` ``VSX2``
- ======== ================
-
-IBM/POWER little-endian - CPU feature names
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-.. table::
- :align: left
-
- ======== ================
- Name Implies
- ======== ================
- ``VSX`` ``VSX2``
- ``VSX2`` ``VSX``
- ``VSX3`` ``VSX`` ``VSX2``
- ======== ================
-
-ARMv7/A32 - CPU feature names
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-.. table::
- :align: left
-
- ============== ===========================================================
- Name Implies
- ============== ===========================================================
- ``NEON``
- ``NEON_FP16`` ``NEON``
- ``NEON_VFPV4`` ``NEON`` ``NEON_FP16``
- ``ASIMD`` ``NEON`` ``NEON_FP16`` ``NEON_VFPV4``
- ``ASIMDHP`` ``NEON`` ``NEON_FP16`` ``NEON_VFPV4`` ``ASIMD``
- ``ASIMDDP`` ``NEON`` ``NEON_FP16`` ``NEON_VFPV4`` ``ASIMD``
- ``ASIMDFHM`` ``NEON`` ``NEON_FP16`` ``NEON_VFPV4`` ``ASIMD`` ``ASIMDHP``
- ============== ===========================================================
-
-ARMv8/A64 - CPU feature names
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-.. table::
- :align: left
-
- ============== ===========================================================
- Name Implies
- ============== ===========================================================
- ``NEON`` ``NEON_FP16`` ``NEON_VFPV4`` ``ASIMD``
- ``NEON_FP16`` ``NEON`` ``NEON_VFPV4`` ``ASIMD``
- ``NEON_VFPV4`` ``NEON`` ``NEON_FP16`` ``ASIMD``
- ``ASIMD`` ``NEON`` ``NEON_FP16`` ``NEON_VFPV4``
- ``ASIMDHP`` ``NEON`` ``NEON_FP16`` ``NEON_VFPV4`` ``ASIMD``
- ``ASIMDDP`` ``NEON`` ``NEON_FP16`` ``NEON_VFPV4`` ``ASIMD``
- ``ASIMDFHM`` ``NEON`` ``NEON_FP16`` ``NEON_VFPV4`` ``ASIMD`` ``ASIMDHP``
- ============== ===========================================================
-
diff --git a/doc/source/reference/simd/simd-optimizations.py b/doc/source/reference/simd/simd-optimizations.py
deleted file mode 100644
index a78302db5..000000000
--- a/doc/source/reference/simd/simd-optimizations.py
+++ /dev/null
@@ -1,190 +0,0 @@
-"""
-Generate CPU features tables from CCompilerOpt
-"""
-from os import sys, path
-gen_path = path.dirname(path.realpath(__file__))
-#sys.path.append(path.abspath(path.join(gen_path, *([".."]*4), "numpy", "distutils")))
-#from ccompiler_opt import CCompilerOpt
-from numpy.distutils.ccompiler_opt import CCompilerOpt
-
-class FakeCCompilerOpt(CCompilerOpt):
- fake_info = ("arch", "compiler", "extra_args")
- # disable caching no need for it
- conf_nocache = True
- def __init__(self, *args, **kwargs):
- no_cc = None
- CCompilerOpt.__init__(self, no_cc, **kwargs)
- def dist_compile(self, sources, flags, **kwargs):
- return sources
- def dist_info(self):
- return FakeCCompilerOpt.fake_info
- @staticmethod
- def dist_log(*args, stderr=False):
- # avoid printing
- pass
- def feature_test(self, name, force_flags=None):
- # To speed up
- return True
-
- def gen_features_table(self, features, ignore_groups=True,
- field_names=["Name", "Implies"],
- fstyle=None, fstyle_implies=None, **kwargs):
- rows = []
- if fstyle is None:
- fstyle = lambda ft: f'``{ft}``'
- if fstyle_implies is None:
- fstyle_implies = lambda origin, ft: fstyle(ft)
- for f in self.feature_sorted(features):
- is_group = "group" in self.feature_supported.get(f, {})
- if ignore_groups and is_group:
- continue
- implies = self.feature_sorted(self.feature_implies(f))
- implies = ' '.join([fstyle_implies(f, i) for i in implies])
- rows.append([fstyle(f), implies])
- if rows:
- return self.gen_rst_table(field_names, rows, **kwargs)
-
- def gen_gfeatures_table(self, features,
- field_names=["Name", "Gather", "Implies"],
- fstyle=None, fstyle_implies=None, **kwargs):
- rows = []
- if fstyle is None:
- fstyle = lambda ft: f'``{ft}``'
- if fstyle_implies is None:
- fstyle_implies = lambda origin, ft: fstyle(ft)
- for f in self.feature_sorted(features):
- gather = self.feature_supported.get(f, {}).get("group", None)
- if not gather:
- continue
- implies = self.feature_sorted(self.feature_implies(f))
- implies = ' '.join([fstyle_implies(f, i) for i in implies])
- gather = ' '.join([fstyle_implies(f, i) for i in gather])
- rows.append([fstyle(f), gather, implies])
- if rows:
- return self.gen_rst_table(field_names, rows, **kwargs)
-
- def gen_rst_table(self, field_names, rows, tab_size=4):
- assert(not rows or len(field_names) == len(rows[0]))
- rows.append(field_names)
- fld_len = len(field_names)
- cls_len = [max(len(c[i]) for c in rows) for i in range(fld_len)]
- del rows[-1]
- cformat = ' '.join('{:<%d}' % i for i in cls_len)
- border = cformat.format(*['='*i for i in cls_len])
-
- rows = [cformat.format(*row) for row in rows]
- # header
- rows = [border, cformat.format(*field_names), border] + rows
- # footer
- rows += [border]
- # add left margin
- rows = [(' ' * tab_size) + r for r in rows]
- return '\n'.join(rows)
-
-def features_table_sections(name, ftable=None, gtable=None, tab_size=4):
- tab = ' '*tab_size
- content = ''
- if ftable:
- title = f"{name} - CPU feature names"
- content = (
- f"{title}\n{'~'*len(title)}"
- f"\n.. table::\n{tab}:align: left\n\n"
- f"{ftable}\n\n"
- )
- if gtable:
- title = f"{name} - Group names"
- content += (
- f"{title}\n{'~'*len(title)}"
- f"\n.. table::\n{tab}:align: left\n\n"
- f"{gtable}\n\n"
- )
- return content
-
-def features_table(arch, cc="gcc", pretty_name=None, **kwargs):
- FakeCCompilerOpt.fake_info = (arch, cc, '')
- ccopt = FakeCCompilerOpt(cpu_baseline="max")
- features = ccopt.cpu_baseline_names()
- ftable = ccopt.gen_features_table(features, **kwargs)
- gtable = ccopt.gen_gfeatures_table(features, **kwargs)
-
- if not pretty_name:
- pretty_name = arch + '/' + cc
- return features_table_sections(pretty_name, ftable, gtable, **kwargs)
-
-def features_table_diff(arch, cc, cc_vs="gcc", pretty_name=None, **kwargs):
- FakeCCompilerOpt.fake_info = (arch, cc, '')
- ccopt = FakeCCompilerOpt(cpu_baseline="max")
- fnames = ccopt.cpu_baseline_names()
- features = {f:ccopt.feature_implies(f) for f in fnames}
-
- FakeCCompilerOpt.fake_info = (arch, cc_vs, '')
- ccopt_vs = FakeCCompilerOpt(cpu_baseline="max")
- fnames_vs = ccopt_vs.cpu_baseline_names()
- features_vs = {f:ccopt_vs.feature_implies(f) for f in fnames_vs}
-
- common = set(fnames).intersection(fnames_vs)
- extra_avl = set(fnames).difference(fnames_vs)
- not_avl = set(fnames_vs).difference(fnames)
- diff_impl_f = {f:features[f].difference(features_vs[f]) for f in common}
- diff_impl = {k for k, v in diff_impl_f.items() if v}
-
- fbold = lambda ft: f'**{ft}**' if ft in extra_avl else f'``{ft}``'
- fbold_implies = lambda origin, ft: (
- f'**{ft}**' if ft in diff_impl_f.get(origin, {}) else f'``{ft}``'
- )
- diff_all = diff_impl.union(extra_avl)
- ftable = ccopt.gen_features_table(
- diff_all, fstyle=fbold, fstyle_implies=fbold_implies, **kwargs
- )
- gtable = ccopt.gen_gfeatures_table(
- diff_all, fstyle=fbold, fstyle_implies=fbold_implies, **kwargs
- )
- if not pretty_name:
- pretty_name = arch + '/' + cc
- content = features_table_sections(pretty_name, ftable, gtable, **kwargs)
-
- if not_avl:
- not_avl = ccopt_vs.feature_sorted(not_avl)
- not_avl = ' '.join(not_avl)
- content += (
- ".. note::\n"
- f" The following features aren't supported by {pretty_name}:\n"
- f" **{not_avl}**\n\n"
- )
- return content
-
-if __name__ == '__main__':
- pretty_names = {
- "PPC64": "IBM/POWER big-endian",
- "PPC64LE": "IBM/POWER little-endian",
- "ARMHF": "ARMv7/A32",
- "AARCH64": "ARMv8/A64",
- "ICC": "Intel Compiler",
- # "ICCW": "Intel Compiler msvc-like",
- "MSVC": "Microsoft Visual C/C++"
- }
- with open(path.join(gen_path, 'simd-optimizations-tables.inc'), 'wt') as fd:
- fd.write(f'.. generated via {__file__}\n\n')
- for arch in (
- ("x86", "PPC64", "PPC64LE", "ARMHF", "AARCH64")
- ):
- pretty_name = pretty_names.get(arch, arch)
- table = features_table(arch=arch, pretty_name=pretty_name)
- assert(table)
- fd.write(table)
-
- with open(path.join(gen_path, 'simd-optimizations-tables-diff.inc'), 'wt') as fd:
- fd.write(f'.. generated via {__file__}\n\n')
- for arch, cc_names in (
- ("x86", ("clang", "ICC", "MSVC")),
- ("PPC64", ("clang",)),
- ("PPC64LE", ("clang",)),
- ("ARMHF", ("clang",)),
- ("AARCH64", ("clang",))
- ):
- arch_pname = pretty_names.get(arch, arch)
- for cc in cc_names:
- pretty_name = f"{arch_pname}::{pretty_names.get(cc, cc)}"
- table = features_table_diff(arch=arch, cc=cc, pretty_name=pretty_name)
- if table:
- fd.write(table)