diff options
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | numpy/distutils/command/build.py | 9 | ||||
-rw-r--r-- | numpy/distutils/command/build_clib.py | 66 | ||||
-rw-r--r-- | numpy/distutils/command/build_ext.py | 61 | ||||
-rwxr-xr-x | runtests.py | 12 |
5 files changed, 144 insertions, 5 deletions
diff --git a/.gitignore b/.gitignore index 302968a14..2a77f4f83 100644 --- a/.gitignore +++ b/.gitignore @@ -130,6 +130,7 @@ numpy/core/src/common/npy_cpu_features.c numpy/core/src/common/npy_partition.h numpy/core/src/common/npy_sort.h numpy/core/src/common/templ_common.h +numpy/core/src/common/_cpu_dispatch.h numpy/core/src/multiarray/_multiarray_tests.c numpy/core/src/multiarray/arraytypes.c numpy/core/src/multiarray/einsum.c diff --git a/numpy/distutils/command/build.py b/numpy/distutils/command/build.py index a156a7c6e..60ba4c917 100644 --- a/numpy/distutils/command/build.py +++ b/numpy/distutils/command/build.py @@ -16,6 +16,12 @@ class build(old_build): "specify the Fortran compiler type"), ('warn-error', None, "turn all warnings into errors (-Werror)"), + ('cpu-baseline=', None, + "specify a list of enabled baseline CPU optimizations"), + ('cpu-dispatch=', None, + "specify a list of dispatched CPU optimizations"), + ('disable-optimization', None, + "disable CPU optimized code(dispatch,simd,fast...)"), ] help_options = old_build.help_options + [ @@ -27,6 +33,9 @@ class build(old_build): old_build.initialize_options(self) self.fcompiler = None self.warn_error = False + self.cpu_baseline = "min" + self.cpu_dispatch = "max -xop -fma4" # drop AMD legacy features by default + self.disable_optimization = False def finalize_options(self): build_scripts = self.build_scripts diff --git a/numpy/distutils/command/build_clib.py b/numpy/distutils/command/build_clib.py index f6a84e351..87345adbc 100644 --- a/numpy/distutils/command/build_clib.py +++ b/numpy/distutils/command/build_clib.py @@ -13,6 +13,7 @@ from numpy.distutils.misc_util import ( filter_sources, get_lib_source_files, get_numpy_include_dirs, has_cxx_sources, has_f_sources, is_sequence ) +from numpy.distutils.ccompiler_opt import new_ccompiler_opt # Fix Python distutils bug sf #1718574: _l = old_build_clib.user_options @@ -34,9 +35,16 @@ class build_clib(old_build_clib): "number of parallel jobs"), ('warn-error', None, "turn all warnings into errors (-Werror)"), + ('cpu-baseline=', None, + "specify a list of enabled baseline CPU optimizations"), + ('cpu-dispatch=', None, + "specify a list of dispatched CPU optimizations"), + ('disable-optimization', None, + "disable CPU optimized code(dispatch,simd,fast...)"), ] - boolean_options = old_build_clib.boolean_options + ['inplace', 'warn-error'] + boolean_options = old_build_clib.boolean_options + \ + ['inplace', 'warn-error', 'disable-optimization'] def initialize_options(self): old_build_clib.initialize_options(self) @@ -44,6 +52,10 @@ class build_clib(old_build_clib): self.inplace = 0 self.parallel = None self.warn_error = None + self.cpu_baseline = None + self.cpu_dispatch = None + self.disable_optimization = None + def finalize_options(self): if self.parallel: @@ -55,6 +67,9 @@ class build_clib(old_build_clib): self.set_undefined_options('build', ('parallel', 'parallel'), ('warn_error', 'warn_error'), + ('cpu_baseline', 'cpu_baseline'), + ('cpu_dispatch', 'cpu_dispatch'), + ('disable_optimization', 'disable_optimization') ) def have_f_sources(self): @@ -102,6 +117,25 @@ class build_clib(old_build_clib): self.compiler.show_customization() + if not self.disable_optimization: + opt_cache_path = os.path.abspath( + os.path.join(self.build_temp, 'ccompiler_opt_cache_clib.py' + )) + self.compiler_opt = new_ccompiler_opt( + compiler=self.compiler, cpu_baseline=self.cpu_baseline, + cpu_dispatch=self.cpu_dispatch, cache_path=opt_cache_path + ) + if not self.compiler_opt.is_cached(): + log.info("Detected changes on compiler optimizations, force rebuilding") + self.force = True + + import atexit + def report(): + log.info("\n########### CLIB COMPILER OPTIMIZATION ###########") + log.info(self.compiler_opt.report(full=True)) + + atexit.register(report) + if self.have_f_sources(): from numpy.distutils.fcompiler import new_fcompiler self._f_compiler = new_fcompiler(compiler=self.fcompiler, @@ -211,6 +245,8 @@ class build_clib(old_build_clib): 'extra_f90_compile_args') or [] macros = build_info.get('macros') + if macros is None: + macros = [] include_dirs = build_info.get('include_dirs') if include_dirs is None: include_dirs = [] @@ -223,6 +259,31 @@ class build_clib(old_build_clib): if requiref90: self.mkpath(module_build_dir) + dispatch_objects = [] + if not self.disable_optimization: + dispatch_sources = [ + c_sources.pop(c_sources.index(src)) + for src in c_sources[:] if src.endswith(".dispatch.c") + ] + if dispatch_sources: + if not self.inplace: + build_src = self.get_finalized_command("build_src").build_src + else: + build_src = None + dispatch_objects = self.compiler_opt.try_dispatch( + dispatch_sources, + output_dir=self.build_temp, + src_dir=build_src, + macros=macros, + include_dirs=include_dirs, + debug=self.debug, + extra_postargs=extra_postargs + ) + extra_args_baseopt = extra_postargs + self.compiler_opt.cpu_baseline_flags() + else: + extra_args_baseopt = extra_postargs + macros.append(("NPY_DISABLE_OPTIMIZATION", 1)) + if compiler.compiler_type == 'msvc': # this hack works around the msvc compiler attributes # problem, msvc uses its own convention :( @@ -237,7 +298,8 @@ class build_clib(old_build_clib): macros=macros, include_dirs=include_dirs, debug=self.debug, - extra_postargs=extra_postargs) + extra_postargs=extra_args_baseopt) + objects.extend(dispatch_objects) if cxx_sources: log.info("compiling C++ sources") diff --git a/numpy/distutils/command/build_ext.py b/numpy/distutils/command/build_ext.py index d53285c92..b6557fcf6 100644 --- a/numpy/distutils/command/build_ext.py +++ b/numpy/distutils/command/build_ext.py @@ -19,7 +19,7 @@ from numpy.distutils.misc_util import ( has_cxx_sources, has_f_sources, is_sequence ) from numpy.distutils.command.config_compiler import show_fortran_compilers - +from numpy.distutils.ccompiler_opt import new_ccompiler_opt class build_ext (old_build_ext): @@ -33,6 +33,12 @@ class build_ext (old_build_ext): "number of parallel jobs"), ('warn-error', None, "turn all warnings into errors (-Werror)"), + ('cpu-baseline=', None, + "specify a list of enabled baseline CPU optimizations"), + ('cpu-dispatch=', None, + "specify a list of dispatched CPU optimizations"), + ('disable-optimization', None, + "disable CPU optimized code(dispatch,simd,fast...)"), ] help_options = old_build_ext.help_options + [ @@ -40,13 +46,16 @@ class build_ext (old_build_ext): show_fortran_compilers), ] - boolean_options = old_build_ext.boolean_options + ['warn-error'] + boolean_options = old_build_ext.boolean_options + ['warn-error', 'disable-optimization'] def initialize_options(self): old_build_ext.initialize_options(self) self.fcompiler = None self.parallel = None self.warn_error = None + self.cpu_baseline = None + self.cpu_dispatch = None + self.disable_optimization = None def finalize_options(self): if self.parallel: @@ -75,6 +84,9 @@ class build_ext (old_build_ext): self.set_undefined_options('build', ('parallel', 'parallel'), ('warn_error', 'warn_error'), + ('cpu_baseline', 'cpu_baseline'), + ('cpu_dispatch', 'cpu_dispatch'), + ('disable_optimization', 'disable_optimization'), ) def run(self): @@ -129,6 +141,22 @@ class build_ext (old_build_ext): self.compiler.show_customization() + if not self.disable_optimization: + opt_cache_path = os.path.abspath(os.path.join(self.build_temp, 'ccompiler_opt_cache_ext.py')) + self.compiler_opt = new_ccompiler_opt(compiler=self.compiler, + cpu_baseline=self.cpu_baseline, + cpu_dispatch=self.cpu_dispatch, + cache_path=opt_cache_path) + if not self.compiler_opt.is_cached(): + log.info("Detected changes on compiler optimizations, force rebuilding") + self.force = True + + import atexit + def report(): + log.info("\n########### EXT COMPILER OPTIMIZATION ###########") + log.info(self.compiler_opt.report(full=True)) + atexit.register(report) + # Setup directory for storing generated extra DLL files on Windows self.extra_dll_dir = os.path.join(self.build_temp, '.libs') if not os.path.isdir(self.extra_dll_dir): @@ -378,6 +406,32 @@ class build_ext (old_build_ext): include_dirs = ext.include_dirs + get_numpy_include_dirs() + dispatch_objects = [] + if not self.disable_optimization: + dispatch_sources = [ + c_sources.pop(c_sources.index(src)) + for src in c_sources[:] if src.endswith(".dispatch.c") + ] + if dispatch_sources: + if not self.inplace: + build_src = self.get_finalized_command("build_src").build_src + else: + build_src = None + dispatch_objects = self.compiler_opt.try_dispatch( + dispatch_sources, + output_dir=output_dir, + src_dir=build_src, + macros=macros, + include_dirs=include_dirs, + debug=self.debug, + extra_postargs=extra_args, + **kws + ) + extra_args_baseopt = extra_args + self.compiler_opt.cpu_baseline_flags() + else: + extra_args_baseopt = extra_args + macros.append(("NPY_DISABLE_OPTIMIZATION", 1)) + c_objects = [] if c_sources: log.info("compiling C sources") @@ -386,8 +440,9 @@ class build_ext (old_build_ext): macros=macros, include_dirs=include_dirs, debug=self.debug, - extra_postargs=extra_args, + extra_postargs=extra_args_baseopt, **kws) + c_objects.extend(dispatch_objects) if cxx_sources: log.info("compiling C++ sources") diff --git a/runtests.py b/runtests.py index 7f1d55b85..beaf668d6 100755 --- a/runtests.py +++ b/runtests.py @@ -114,6 +114,12 @@ def main(argv): help="Number of parallel jobs during build") parser.add_argument("--warn-error", action="store_true", help="Set -Werror to convert all compiler warnings to errors") + parser.add_argument("--cpu-baseline", default=None, + help="Specify a list of enabled baseline CPU optimizations"), + parser.add_argument("--cpu-dispatch", default=None, + help="Specify a list of dispatched CPU optimizations"), + parser.add_argument("--disable-optimization", action="store_true", + help="Disable CPU optimized code(dispatch,simd,fast...)"), parser.add_argument("--show-build-log", action="store_true", help="Show build output rather than using a log file") parser.add_argument("--bench", action="store_true", @@ -388,6 +394,12 @@ def build_project(args): cmd += ["build_src", "--verbose-cfg"] if args.warn_error: cmd += ["--warn-error"] + if args.cpu_baseline: + cmd += ["--cpu-baseline", args.cpu_baseline] + if args.cpu_dispatch: + cmd += ["--cpu-dispatch", args.cpu_dispatch] + if args.disable_optimization: + cmd += ["--disable-optimization"] # Install; avoid producing eggs so numpy can be imported from dst_dir. cmd += ['install', '--prefix=' + dst_dir, '--single-version-externally-managed', |