summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSayed Adel <seiko@imavr.com>2020-06-13 18:15:41 +0200
committerSayed Adel <seiko@imavr.com>2020-06-15 22:49:27 +0200
commit4c97add06371bf80f3bc02e0cfb0ee83737b909c (patch)
treeff6de3901d33d82d11b474b081e5d817850884b2
parent8ccd582937a182391f49f412908fecf1770787d7 (diff)
downloadnumpy-4c97add06371bf80f3bc02e0cfb0ee83737b909c.tar.gz
ENH: [3/7] enable multi-platform SIMD compiler optimizations
- Put `CCompilerOpt` in action through add two command line arguments that passed directly to `CCompilerOpt`'s parameters which explained as follows: * `--cpu-baseline` minimal set of required optimizations, default is 'min' which provides the minimum CPU features that can safely run on a wide range of users platforms. * `--cpu-dispatch` dispatched set of additional optimizations, default is 'max-xop-fma4' which enables all CPU features, except for AMD legacy features. the new arguments can be reached from `build`, `build_clib`, `build_ext`, if `build_clib` or `build_ext` are not specified by the user the arguments of `build` will be used, which also hold the default values. - Activate the new compiler dispatcher that comes with `CCompilerOpt`, through adding a hock inside `build_clib` and `build_ext` that works as a filter taking any C source files ends with `.dispatch.c` and pass it directly to `CCompilerOpt` and then take returned objects and linked to the final C lib. - Add a third command-line argument `--disable-optimization` which explicitly disable the whole new infrastructure, also It adds a new compiler definition called `NPY_DISABLE_OPTIMIZATION`. when `--disable-optimization` is enabled the dispatch-able sources that end with `.dispatch.c` will be treated as a normal C sources, also due to this disabling any C headers that generated by `CCompilerOpt` must guard it with `NPY_DISABLE_OPTIMIZATION`, otherwise, it will definitely break the build. - New auto-generated C header located at `core/include/numpy/_cpu_dispatch.h`, the new header contains all definitions and headers of CPU features that enabled according to specified configuration in `--cpu-baseline` and `--cpu-dispatch`.
-rw-r--r--.gitignore1
-rw-r--r--numpy/distutils/command/build.py9
-rw-r--r--numpy/distutils/command/build_clib.py66
-rw-r--r--numpy/distutils/command/build_ext.py61
-rwxr-xr-xruntests.py12
5 files changed, 144 insertions, 5 deletions
diff --git a/.gitignore b/.gitignore
index 302968a14..2a77f4f83 100644
--- a/.gitignore
+++ b/.gitignore
@@ -130,6 +130,7 @@ numpy/core/src/common/npy_cpu_features.c
numpy/core/src/common/npy_partition.h
numpy/core/src/common/npy_sort.h
numpy/core/src/common/templ_common.h
+numpy/core/src/common/_cpu_dispatch.h
numpy/core/src/multiarray/_multiarray_tests.c
numpy/core/src/multiarray/arraytypes.c
numpy/core/src/multiarray/einsum.c
diff --git a/numpy/distutils/command/build.py b/numpy/distutils/command/build.py
index a156a7c6e..60ba4c917 100644
--- a/numpy/distutils/command/build.py
+++ b/numpy/distutils/command/build.py
@@ -16,6 +16,12 @@ class build(old_build):
"specify the Fortran compiler type"),
('warn-error', None,
"turn all warnings into errors (-Werror)"),
+ ('cpu-baseline=', None,
+ "specify a list of enabled baseline CPU optimizations"),
+ ('cpu-dispatch=', None,
+ "specify a list of dispatched CPU optimizations"),
+ ('disable-optimization', None,
+ "disable CPU optimized code(dispatch,simd,fast...)"),
]
help_options = old_build.help_options + [
@@ -27,6 +33,9 @@ class build(old_build):
old_build.initialize_options(self)
self.fcompiler = None
self.warn_error = False
+ self.cpu_baseline = "min"
+ self.cpu_dispatch = "max -xop -fma4" # drop AMD legacy features by default
+ self.disable_optimization = False
def finalize_options(self):
build_scripts = self.build_scripts
diff --git a/numpy/distutils/command/build_clib.py b/numpy/distutils/command/build_clib.py
index f6a84e351..87345adbc 100644
--- a/numpy/distutils/command/build_clib.py
+++ b/numpy/distutils/command/build_clib.py
@@ -13,6 +13,7 @@ from numpy.distutils.misc_util import (
filter_sources, get_lib_source_files, get_numpy_include_dirs,
has_cxx_sources, has_f_sources, is_sequence
)
+from numpy.distutils.ccompiler_opt import new_ccompiler_opt
# Fix Python distutils bug sf #1718574:
_l = old_build_clib.user_options
@@ -34,9 +35,16 @@ class build_clib(old_build_clib):
"number of parallel jobs"),
('warn-error', None,
"turn all warnings into errors (-Werror)"),
+ ('cpu-baseline=', None,
+ "specify a list of enabled baseline CPU optimizations"),
+ ('cpu-dispatch=', None,
+ "specify a list of dispatched CPU optimizations"),
+ ('disable-optimization', None,
+ "disable CPU optimized code(dispatch,simd,fast...)"),
]
- boolean_options = old_build_clib.boolean_options + ['inplace', 'warn-error']
+ boolean_options = old_build_clib.boolean_options + \
+ ['inplace', 'warn-error', 'disable-optimization']
def initialize_options(self):
old_build_clib.initialize_options(self)
@@ -44,6 +52,10 @@ class build_clib(old_build_clib):
self.inplace = 0
self.parallel = None
self.warn_error = None
+ self.cpu_baseline = None
+ self.cpu_dispatch = None
+ self.disable_optimization = None
+
def finalize_options(self):
if self.parallel:
@@ -55,6 +67,9 @@ class build_clib(old_build_clib):
self.set_undefined_options('build',
('parallel', 'parallel'),
('warn_error', 'warn_error'),
+ ('cpu_baseline', 'cpu_baseline'),
+ ('cpu_dispatch', 'cpu_dispatch'),
+ ('disable_optimization', 'disable_optimization')
)
def have_f_sources(self):
@@ -102,6 +117,25 @@ class build_clib(old_build_clib):
self.compiler.show_customization()
+ if not self.disable_optimization:
+ opt_cache_path = os.path.abspath(
+ os.path.join(self.build_temp, 'ccompiler_opt_cache_clib.py'
+ ))
+ self.compiler_opt = new_ccompiler_opt(
+ compiler=self.compiler, cpu_baseline=self.cpu_baseline,
+ cpu_dispatch=self.cpu_dispatch, cache_path=opt_cache_path
+ )
+ if not self.compiler_opt.is_cached():
+ log.info("Detected changes on compiler optimizations, force rebuilding")
+ self.force = True
+
+ import atexit
+ def report():
+ log.info("\n########### CLIB COMPILER OPTIMIZATION ###########")
+ log.info(self.compiler_opt.report(full=True))
+
+ atexit.register(report)
+
if self.have_f_sources():
from numpy.distutils.fcompiler import new_fcompiler
self._f_compiler = new_fcompiler(compiler=self.fcompiler,
@@ -211,6 +245,8 @@ class build_clib(old_build_clib):
'extra_f90_compile_args') or []
macros = build_info.get('macros')
+ if macros is None:
+ macros = []
include_dirs = build_info.get('include_dirs')
if include_dirs is None:
include_dirs = []
@@ -223,6 +259,31 @@ class build_clib(old_build_clib):
if requiref90:
self.mkpath(module_build_dir)
+ dispatch_objects = []
+ if not self.disable_optimization:
+ dispatch_sources = [
+ c_sources.pop(c_sources.index(src))
+ for src in c_sources[:] if src.endswith(".dispatch.c")
+ ]
+ if dispatch_sources:
+ if not self.inplace:
+ build_src = self.get_finalized_command("build_src").build_src
+ else:
+ build_src = None
+ dispatch_objects = self.compiler_opt.try_dispatch(
+ dispatch_sources,
+ output_dir=self.build_temp,
+ src_dir=build_src,
+ macros=macros,
+ include_dirs=include_dirs,
+ debug=self.debug,
+ extra_postargs=extra_postargs
+ )
+ extra_args_baseopt = extra_postargs + self.compiler_opt.cpu_baseline_flags()
+ else:
+ extra_args_baseopt = extra_postargs
+ macros.append(("NPY_DISABLE_OPTIMIZATION", 1))
+
if compiler.compiler_type == 'msvc':
# this hack works around the msvc compiler attributes
# problem, msvc uses its own convention :(
@@ -237,7 +298,8 @@ class build_clib(old_build_clib):
macros=macros,
include_dirs=include_dirs,
debug=self.debug,
- extra_postargs=extra_postargs)
+ extra_postargs=extra_args_baseopt)
+ objects.extend(dispatch_objects)
if cxx_sources:
log.info("compiling C++ sources")
diff --git a/numpy/distutils/command/build_ext.py b/numpy/distutils/command/build_ext.py
index d53285c92..b6557fcf6 100644
--- a/numpy/distutils/command/build_ext.py
+++ b/numpy/distutils/command/build_ext.py
@@ -19,7 +19,7 @@ from numpy.distutils.misc_util import (
has_cxx_sources, has_f_sources, is_sequence
)
from numpy.distutils.command.config_compiler import show_fortran_compilers
-
+from numpy.distutils.ccompiler_opt import new_ccompiler_opt
class build_ext (old_build_ext):
@@ -33,6 +33,12 @@ class build_ext (old_build_ext):
"number of parallel jobs"),
('warn-error', None,
"turn all warnings into errors (-Werror)"),
+ ('cpu-baseline=', None,
+ "specify a list of enabled baseline CPU optimizations"),
+ ('cpu-dispatch=', None,
+ "specify a list of dispatched CPU optimizations"),
+ ('disable-optimization', None,
+ "disable CPU optimized code(dispatch,simd,fast...)"),
]
help_options = old_build_ext.help_options + [
@@ -40,13 +46,16 @@ class build_ext (old_build_ext):
show_fortran_compilers),
]
- boolean_options = old_build_ext.boolean_options + ['warn-error']
+ boolean_options = old_build_ext.boolean_options + ['warn-error', 'disable-optimization']
def initialize_options(self):
old_build_ext.initialize_options(self)
self.fcompiler = None
self.parallel = None
self.warn_error = None
+ self.cpu_baseline = None
+ self.cpu_dispatch = None
+ self.disable_optimization = None
def finalize_options(self):
if self.parallel:
@@ -75,6 +84,9 @@ class build_ext (old_build_ext):
self.set_undefined_options('build',
('parallel', 'parallel'),
('warn_error', 'warn_error'),
+ ('cpu_baseline', 'cpu_baseline'),
+ ('cpu_dispatch', 'cpu_dispatch'),
+ ('disable_optimization', 'disable_optimization'),
)
def run(self):
@@ -129,6 +141,22 @@ class build_ext (old_build_ext):
self.compiler.show_customization()
+ if not self.disable_optimization:
+ opt_cache_path = os.path.abspath(os.path.join(self.build_temp, 'ccompiler_opt_cache_ext.py'))
+ self.compiler_opt = new_ccompiler_opt(compiler=self.compiler,
+ cpu_baseline=self.cpu_baseline,
+ cpu_dispatch=self.cpu_dispatch,
+ cache_path=opt_cache_path)
+ if not self.compiler_opt.is_cached():
+ log.info("Detected changes on compiler optimizations, force rebuilding")
+ self.force = True
+
+ import atexit
+ def report():
+ log.info("\n########### EXT COMPILER OPTIMIZATION ###########")
+ log.info(self.compiler_opt.report(full=True))
+ atexit.register(report)
+
# Setup directory for storing generated extra DLL files on Windows
self.extra_dll_dir = os.path.join(self.build_temp, '.libs')
if not os.path.isdir(self.extra_dll_dir):
@@ -378,6 +406,32 @@ class build_ext (old_build_ext):
include_dirs = ext.include_dirs + get_numpy_include_dirs()
+ dispatch_objects = []
+ if not self.disable_optimization:
+ dispatch_sources = [
+ c_sources.pop(c_sources.index(src))
+ for src in c_sources[:] if src.endswith(".dispatch.c")
+ ]
+ if dispatch_sources:
+ if not self.inplace:
+ build_src = self.get_finalized_command("build_src").build_src
+ else:
+ build_src = None
+ dispatch_objects = self.compiler_opt.try_dispatch(
+ dispatch_sources,
+ output_dir=output_dir,
+ src_dir=build_src,
+ macros=macros,
+ include_dirs=include_dirs,
+ debug=self.debug,
+ extra_postargs=extra_args,
+ **kws
+ )
+ extra_args_baseopt = extra_args + self.compiler_opt.cpu_baseline_flags()
+ else:
+ extra_args_baseopt = extra_args
+ macros.append(("NPY_DISABLE_OPTIMIZATION", 1))
+
c_objects = []
if c_sources:
log.info("compiling C sources")
@@ -386,8 +440,9 @@ class build_ext (old_build_ext):
macros=macros,
include_dirs=include_dirs,
debug=self.debug,
- extra_postargs=extra_args,
+ extra_postargs=extra_args_baseopt,
**kws)
+ c_objects.extend(dispatch_objects)
if cxx_sources:
log.info("compiling C++ sources")
diff --git a/runtests.py b/runtests.py
index 7f1d55b85..beaf668d6 100755
--- a/runtests.py
+++ b/runtests.py
@@ -114,6 +114,12 @@ def main(argv):
help="Number of parallel jobs during build")
parser.add_argument("--warn-error", action="store_true",
help="Set -Werror to convert all compiler warnings to errors")
+ parser.add_argument("--cpu-baseline", default=None,
+ help="Specify a list of enabled baseline CPU optimizations"),
+ parser.add_argument("--cpu-dispatch", default=None,
+ help="Specify a list of dispatched CPU optimizations"),
+ parser.add_argument("--disable-optimization", action="store_true",
+ help="Disable CPU optimized code(dispatch,simd,fast...)"),
parser.add_argument("--show-build-log", action="store_true",
help="Show build output rather than using a log file")
parser.add_argument("--bench", action="store_true",
@@ -388,6 +394,12 @@ def build_project(args):
cmd += ["build_src", "--verbose-cfg"]
if args.warn_error:
cmd += ["--warn-error"]
+ if args.cpu_baseline:
+ cmd += ["--cpu-baseline", args.cpu_baseline]
+ if args.cpu_dispatch:
+ cmd += ["--cpu-dispatch", args.cpu_dispatch]
+ if args.disable_optimization:
+ cmd += ["--disable-optimization"]
# Install; avoid producing eggs so numpy can be imported from dst_dir.
cmd += ['install', '--prefix=' + dst_dir,
'--single-version-externally-managed',