summaryrefslogtreecommitdiff
path: root/doc/source/reference/simd/simd-optimizations.py
diff options
context:
space:
mode:
authorMatti Picus <matti.picus@gmail.com>2020-07-12 16:30:42 +0300
committerGitHub <noreply@github.com>2020-07-12 08:30:42 -0500
commitb234742e2e26ff886f52864c73460fe4916a66d0 (patch)
tree5f6f90804adf53329d03edefadffd6d1e07a6a1b /doc/source/reference/simd/simd-optimizations.py
parent62fa23c44fb49c1d238e1de4f791ffc3ca4b1d11 (diff)
downloadnumpy-b234742e2e26ff886f52864c73460fe4916a66d0.tar.gz
DOC: Add SIMD optimization documentation (gh-15551)
Add documentation for the new build infrastructure and API developed to enable universal intrinsics. Written by @seiko2plus with some fixes by @mattip. * DOC: add SIMD optimization doc (seiko2plus) * DOC: reformat as valid RST * trim whitespace * first part of Understanding CPU Dispatching * update build options and remove implied features, gonna update it later * add more explanations for the dispatcher and fix doc style * fix up style * add figure * Improve and more explanations for Understanding CPU Dispatching * fix up syntax * DOC: tweak formatting * DOC: more tweaks * fix rst formatting * DOC: Generate CPU features tables from CCompilerOpt * DOC: move files around * DOC: add comment to top of file * DOC: rebuild tables, fix links * DOC: minor copyedits Co-authored-by: Sayed Adel <seiko@imavr.com> Co-authored-by: Ross Barnowski <rossbar@berkeley.edu>
Diffstat (limited to 'doc/source/reference/simd/simd-optimizations.py')
-rw-r--r--doc/source/reference/simd/simd-optimizations.py166
1 files changed, 166 insertions, 0 deletions
diff --git a/doc/source/reference/simd/simd-optimizations.py b/doc/source/reference/simd/simd-optimizations.py
new file mode 100644
index 000000000..628356163
--- /dev/null
+++ b/doc/source/reference/simd/simd-optimizations.py
@@ -0,0 +1,166 @@
+"""
+Generate CPU features tables from CCompilerOpt
+"""
+from os import sys, path
+gen_path = path.dirname(path.realpath(__file__))
+from numpy.distutils.ccompiler_opt import CCompilerOpt
+
+class FakeCCompilerOpt(CCompilerOpt):
+ fake_info = ""
+ def __init__(self, *args, **kwargs):
+ no_cc = None
+ CCompilerOpt.__init__(self, no_cc, **kwargs)
+ def dist_compile(self, sources, flags, **kwargs):
+ return sources
+ def dist_info(self):
+ return FakeCCompilerOpt.fake_info
+ @staticmethod
+ def dist_log(*args, stderr=False):
+ # avoid printing
+ pass
+ def feature_test(self, name, force_flags=None):
+ # To speed up
+ return True
+
+ def gen_features_table(self, features, ignore_groups=True,
+ field_names=["Name", "Implies"], **kwargs):
+ rows = []
+ for f in features:
+ is_group = "group" in self.feature_supported.get(f, {})
+ if ignore_groups and is_group:
+ continue
+ implies = self.feature_sorted(self.feature_implies(f))
+ implies = ' '.join(['``%s``' % i for i in implies])
+ rows.append([f, implies])
+ return self.gen_rst_table(field_names, rows, **kwargs)
+
+ def gen_gfeatures_table(self, features,
+ field_names=["Name", "Gather", "Implies"],
+ **kwargs):
+ rows = []
+ for f in features:
+ gather = self.feature_supported.get(f, {}).get("group", None)
+ if not gather:
+ continue
+ implies = self.feature_sorted(self.feature_implies(f))
+ implies = ' '.join(['``%s``' % i for i in implies])
+ gather = ' '.join(['``%s``' % i for i in gather])
+ rows.append([f, gather, implies])
+ return self.gen_rst_table(field_names, rows, **kwargs)
+
+
+ def gen_rst_table(self, field_names, rows, margin_left=2):
+ assert(not rows or len(field_names) == len(rows[0]))
+ rows.append(field_names)
+ fld_len = len(field_names)
+ cls_len = [max(len(c[i]) for c in rows) for i in range(fld_len)]
+ del rows[-1]
+ padding = 0
+ cformat = ' '.join('{:<%d}' % (i+padding) for i in cls_len)
+ border = cformat.format(*['='*i for i in cls_len])
+
+ rows = [cformat.format(*row) for row in rows]
+ # header
+ rows = [border, cformat.format(*field_names), border] + rows
+ # footer
+ rows += [border]
+ # add left margin
+ rows = [(' ' * margin_left) + r for r in rows]
+ return '\n'.join(rows)
+
+if __name__ == '__main__':
+ margin_left = 4*1
+ ############### x86 ###############
+ FakeCCompilerOpt.fake_info = "x86_64 gcc"
+ x64_gcc = FakeCCompilerOpt(cpu_baseline="max")
+ x86_tables = """\
+``X86`` - CPU feature names
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. table::
+ :align: left
+
+{x86_features}
+
+``X86`` - Group names
+~~~~~~~~~~~~~~~~~~~~~
+
+.. table::
+ :align: left
+
+{x86_gfeatures}
+
+""".format(
+ x86_features = x64_gcc.gen_features_table(
+ x64_gcc.cpu_baseline_names(), margin_left=margin_left
+ ),
+ x86_gfeatures = x64_gcc.gen_gfeatures_table(
+ x64_gcc.cpu_baseline_names(), margin_left=margin_left
+ )
+ )
+ ############### Power ###############
+ FakeCCompilerOpt.fake_info = "ppc64 gcc"
+ ppc64_gcc = FakeCCompilerOpt(cpu_baseline="max")
+ FakeCCompilerOpt.fake_info = "ppc64le gcc"
+ ppc64le_gcc = FakeCCompilerOpt(cpu_baseline="max")
+ ppc64_tables = """\
+``IBM/POWER`` ``big-endian`` - CPU feature names
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. table::
+ :align: left
+
+{ppc64_features}
+
+``IBM/POWER`` ``little-endian mode`` - CPU feature names
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. table::
+ :align: left
+
+{ppc64le_features}
+
+""".format(
+ ppc64_features = ppc64_gcc.gen_features_table(
+ ppc64_gcc.cpu_baseline_names(), margin_left=margin_left
+ ),
+ ppc64le_features = ppc64le_gcc.gen_features_table(
+ ppc64le_gcc.cpu_baseline_names(), margin_left=margin_left
+ )
+ )
+ ############### Arm ###############
+ FakeCCompilerOpt.fake_info = "armhf gcc"
+ armhf_gcc = FakeCCompilerOpt(cpu_baseline="max")
+ FakeCCompilerOpt.fake_info = "aarch64 gcc"
+ aarch64_gcc = FakeCCompilerOpt(cpu_baseline="max")
+ arm_tables = """\
+``ARMHF`` - CPU feature names
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. table::
+ :align: left
+
+{armhf_features}
+
+``ARM64`` ``AARCH64`` - CPU feature names
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. table::
+ :align: left
+
+{aarch64_features}
+
+ """.format(
+ armhf_features = armhf_gcc.gen_features_table(
+ armhf_gcc.cpu_baseline_names(), margin_left=margin_left
+ ),
+ aarch64_features = aarch64_gcc.gen_features_table(
+ aarch64_gcc.cpu_baseline_names(), margin_left=margin_left
+ )
+ )
+ # TODO: diff the difference among all supported compilers
+ with open(path.join(gen_path, 'simd-optimizations-tables.inc'), 'wt') as fd:
+ fd.write(f'.. generated via {__file__}\n\n')
+ fd.write(x86_tables)
+ fd.write(ppc64_tables)
+ fd.write(arm_tables)