diff options
Diffstat (limited to 'benchmarks')
-rw-r--r-- | benchmarks/README.rst | 62 | ||||
-rw-r--r-- | benchmarks/asv.conf.json | 85 | ||||
-rw-r--r-- | benchmarks/benchmarks/__init__.py | 3 | ||||
-rw-r--r-- | benchmarks/benchmarks/bench_app.py | 89 | ||||
-rw-r--r-- | benchmarks/benchmarks/bench_core.py | 132 | ||||
-rw-r--r-- | benchmarks/benchmarks/bench_function_base.py | 126 | ||||
-rw-r--r-- | benchmarks/benchmarks/bench_indexing.py | 81 | ||||
-rw-r--r-- | benchmarks/benchmarks/bench_io.py | 64 | ||||
-rw-r--r-- | benchmarks/benchmarks/bench_linalg.py | 109 | ||||
-rw-r--r-- | benchmarks/benchmarks/bench_random.py | 67 | ||||
-rw-r--r-- | benchmarks/benchmarks/bench_reduce.py | 67 | ||||
-rw-r--r-- | benchmarks/benchmarks/bench_ufunc.py | 152 | ||||
-rw-r--r-- | benchmarks/benchmarks/common.py | 116 |
13 files changed, 1153 insertions, 0 deletions
diff --git a/benchmarks/README.rst b/benchmarks/README.rst new file mode 100644 index 000000000..2ed5d150f --- /dev/null +++ b/benchmarks/README.rst @@ -0,0 +1,62 @@ +.. -*- rst -*- + +================ +NumPy benchmarks +================ + +Benchmarking NumPy with Airspeed Velocity. + + +Usage +----- + +Airspeed Velocity manages building and Python virtualenvs by itself, +unless told otherwise. Some of the benchmarking features in +``runtests.py`` also tell ASV to use the NumPy compiled by +``runtests.py``. To run the benchmarks, you do not need to install a +development version of NumPy to your current Python environment. + +Run a benchmark against currently checked out NumPy version (don't +record the result):: + + python runtests.py --bench bench_core + +Compare change in benchmark results to another version:: + + python runtests.py --bench-compare v1.6.2 bench_core + +Run ASV commands (record results and generate HTML):: + + cd benchmarks + asv run --skip-existing-commits --steps 10 ALL + asv publish + asv preview + +More on how to use ``asv`` can be found in `ASV documentation`_ +Command-line help is available as usual via ``asv --help`` and +``asv run --help``. + +.. _ASV documentation: https://spacetelescope.github.io/asv/ + + +Writing benchmarks +------------------ + +See `ASV documentation`_ for basics on how to write benchmarks. + +Some things to consider: + +- The benchmark suite should be importable with any NumPy version. + +- The benchmark parameters etc. should not depend on which NumPy version + is installed. + +- Try to keep the runtime of the benchmark reasonable. + +- Prefer ASV's ``time_`` methods for benchmarking times rather than cooking up + time measurements via ``time.clock``, even if it requires some juggling when + writing the benchmark. + +- Preparing arrays etc. should generally be put in the ``setup`` method rather + than the ``time_`` methods, to avoid counting preparation time together with + the time of the benchmarked operation. diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json new file mode 100644 index 000000000..d837b0d67 --- /dev/null +++ b/benchmarks/asv.conf.json @@ -0,0 +1,85 @@ +{ + // The version of the config file format. Do not change, unless + // you know what you are doing. + "version": 1, + + // The name of the project being benchmarked + "project": "numpy", + + // The project's homepage + "project_url": "http://numpy.org/", + + // The URL or local path of the source code repository for the + // project being benchmarked + "repo": "..", + + // List of branches to benchmark. If not provided, defaults to "master" + // (for git) or "tip" (for mercurial). + "branches": ["master"], + + // The DVCS being used. If not set, it will be automatically + // determined from "repo" by looking at the protocol in the URL + // (if remote), or by looking for special directories, such as + // ".git" (if local). + "dvcs": "git", + + // The tool to use to create environments. May be "conda", + // "virtualenv" or other value depending on the plugins in use. + // If missing or the empty string, the tool will be automatically + // determined by looking for tools on the PATH environment + // variable. + "environment_type": "virtualenv", + + // the base URL to show a commit for the project. + "show_commit_url": "https://github.com/numpy/numpy/commit/", + + // The Pythons you'd like to test against. If not provided, defaults + // to the current version of Python used to run `asv`. + "pythons": ["2.7"], + + // The matrix of dependencies to test. Each key is the name of a + // package (in PyPI) and the values are version numbers. An empty + // list indicates to just test against the default (latest) + // version. + "matrix": { + "six": [], + }, + + // The directory (relative to the current directory) that benchmarks are + // stored in. If not provided, defaults to "benchmarks" + "benchmark_dir": "benchmarks", + + // The directory (relative to the current directory) to cache the Python + // environments in. If not provided, defaults to "env" + "env_dir": "env", + + + // The directory (relative to the current directory) that raw benchmark + // results are stored in. If not provided, defaults to "results". + "results_dir": "results", + + // The directory (relative to the current directory) that the html tree + // should be written to. If not provided, defaults to "html". + "html_dir": "html", + + // The number of characters to retain in the commit hashes. + // "hash_length": 8, + + // `asv` will cache wheels of the recent builds in each + // environment, making them faster to install next time. This is + // number of builds to keep, per environment. + "wheel_cache_size": 2, + + // The commits after which the regression search in `asv publish` + // should start looking for regressions. Dictionary whose keys are + // regexps matching to benchmark names, and values corresponding to + // the commit (exclusive) after which to start looking for + // regressions. The default is to start from the first commit + // with results. If the commit is `null`, regression detection is + // skipped for the matching benchmark. + // + // "regressions_first_commits": { + // "some_benchmark": "352cdf", // Consider regressions only after this commit + // "another_benchmark": null, // Skip regression detection altogether + // } +} diff --git a/benchmarks/benchmarks/__init__.py b/benchmarks/benchmarks/__init__.py new file mode 100644 index 000000000..e8a859ff4 --- /dev/null +++ b/benchmarks/benchmarks/__init__.py @@ -0,0 +1,3 @@ +from __future__ import absolute_import, division, print_function + +from . import common diff --git a/benchmarks/benchmarks/bench_app.py b/benchmarks/benchmarks/bench_app.py new file mode 100644 index 000000000..ccf6e4c4a --- /dev/null +++ b/benchmarks/benchmarks/bench_app.py @@ -0,0 +1,89 @@ +from __future__ import absolute_import, division, print_function + +from .common import Benchmark + +import numpy as np + +from six.moves import xrange + + +class LaplaceInplace(Benchmark): + params = ['inplace', 'normal'] + param_names = ['update'] + + def setup(self, update): + N = 150 + Niter = 1000 + dx = 0.1 + dy = 0.1 + dx2 = (dx * dx) + dy2 = (dy * dy) + + def num_update(u, dx2, dy2): + u[1:(-1), 1:(-1)] = ((((u[2:, 1:(-1)] + u[:(-2), 1:(-1)]) * dy2) + + ((u[1:(-1), 2:] + u[1:(-1), :(-2)]) * dx2)) + / (2 * (dx2 + dy2))) + + def num_inplace(u, dx2, dy2): + tmp = u[:(-2), 1:(-1)].copy() + np.add(tmp, u[2:, 1:(-1)], out=tmp) + np.multiply(tmp, dy2, out=tmp) + tmp2 = u[1:(-1), 2:].copy() + np.add(tmp2, u[1:(-1), :(-2)], out=tmp2) + np.multiply(tmp2, dx2, out=tmp2) + np.add(tmp, tmp2, out=tmp) + np.multiply(tmp, (1.0 / (2.0 * (dx2 + dy2))), + out=u[1:(-1), 1:(-1)]) + + def laplace(N, Niter=100, func=num_update, args=()): + u = np.zeros([N, N], order='C') + u[0] = 1 + for i in range(Niter): + func(u, *args) + return u + + func = {'inplace': num_inplace, 'normal': num_update}[update] + + def run(): + laplace(N, Niter, func, args=(dx2, dy2)) + + self.run = run + + def time_it(self, update): + self.run() + + +class MaxesOfDots(Benchmark): + def setup(self): + np.random.seed(1) + nsubj = 5 + nfeat = 100 + ntime = 200 + + self.arrays = [np.random.normal(size=(ntime, nfeat)) + for i in xrange(nsubj)] + + def maxes_of_dots(self, arrays): + """ + A magical feature score for each feature in each dataset + :ref:`Haxby et al., Neuron (2011) <HGC+11>`. + If arrays are column-wise zscore-d before computation it + results in characterizing each column in each array with + sum of maximal correlations of that column with columns + in other arrays. + + Arrays must agree only on the first dimension. + + For numpy it a join benchmark of dot products and max() + on a set of arrays. + """ + feature_scores = ([0] * len(arrays)) + for (i, sd) in enumerate(arrays): + for (j, sd2) in enumerate(arrays[(i + 1):]): + corr_temp = np.dot(sd.T, sd2) + feature_scores[i] += np.max(corr_temp, axis=1) + feature_scores[((j + i) + 1)] += np.max(corr_temp, axis=0) + return feature_scores + + def time_it(self): + self.maxes_of_dots(self.arrays) diff --git a/benchmarks/benchmarks/bench_core.py b/benchmarks/benchmarks/bench_core.py new file mode 100644 index 000000000..6701917cc --- /dev/null +++ b/benchmarks/benchmarks/bench_core.py @@ -0,0 +1,132 @@ +from __future__ import absolute_import, division, print_function + +from .common import Benchmark + +import numpy as np + + +class Core(Benchmark): + def setup(self): + self.l100 = range(100) + self.l50 = range(50) + self.l = [np.arange(1000), np.arange(1000)] + self.l10x10 = np.ones((10, 10)) + + def time_array_1(self): + np.array(1) + + def time_array_empty(self): + np.array([]) + + def time_array_l1(self): + np.array([1]) + + def time_array_l100(self): + np.array(self.l100) + + def time_array_l(self): + np.array(self.l) + + def time_vstack_l(self): + np.vstack(self.l) + + def time_hstack_l(self): + np.hstack(self.l) + + def time_dstack_l(self): + np.dstack(self.l) + + def time_arange_100(self): + np.arange(100) + + def time_zeros_100(self): + np.zeros(100) + + def time_ones_100(self): + np.ones(100) + + def time_empty_100(self): + np.empty(100) + + def time_eye_100(self): + np.eye(100) + + def time_identity_100(self): + np.identity(100) + + def time_eye_3000(self): + np.eye(3000) + + def time_identity_3000(self): + np.identity(3000) + + def time_diag_l100(self): + np.diag(self.l100) + + def time_diagflat_l100(self): + np.diagflat(self.l100) + + def time_diagflat_l50_l50(self): + np.diagflat([self.l50, self.l50]) + + def time_triu_l10x10(self): + np.triu(self.l10x10) + + def time_tril_l10x10(self): + np.tril(self.l10x10) + + +class MA(Benchmark): + def setup(self): + self.l100 = range(100) + self.t100 = ([True] * 100) + + def time_masked_array(self): + np.ma.masked_array() + + def time_masked_array_l100(self): + np.ma.masked_array(self.l100) + + def time_masked_array_l100_t100(self): + np.ma.masked_array(self.l100, self.t100) + + +class CorrConv(Benchmark): + params = [[50, 1000, 1e5], + [10, 100, 1000, 1e4], + ['valid', 'same', 'full']] + param_names = ['size1', 'size2', 'mode'] + + def setup(self, size1, size2, mode): + self.x1 = np.linspace(0, 1, num=size1) + self.x2 = np.cos(np.linspace(0, 2*np.pi, num=size2)) + + def time_correlate(self, size1, size2, mode): + np.correlate(self.x1, self.x2, mode=mode) + + def time_convolve(self, size1, size2, mode): + np.convolve(self.x1, self.x2, mode=mode) + + +class CountNonzero(Benchmark): + param_names = ['numaxes', 'size', 'dtype'] + params = [ + [1, 2, 3], + [100, 10000, 1000000], + [bool, int, str, object] + ] + + def setup(self, numaxes, size, dtype): + self.x = np.empty(shape=( + numaxes, size), dtype=dtype) + + def time_count_nonzero(self, numaxes, size, dtype): + np.count_nonzero(self.x) + + def time_count_nonzero_axis(self, numaxes, size, dtype): + np.count_nonzero(self.x, axis=self.x.ndim - 1) + + def time_count_nonzero_multi_axis(self, numaxes, size, dtype): + if self.x.ndim >= 2: + np.count_nonzero(self.x, axis=( + self.x.ndim - 1, self.x.ndim - 2)) diff --git a/benchmarks/benchmarks/bench_function_base.py b/benchmarks/benchmarks/bench_function_base.py new file mode 100644 index 000000000..23103ba66 --- /dev/null +++ b/benchmarks/benchmarks/bench_function_base.py @@ -0,0 +1,126 @@ +from __future__ import absolute_import, division, print_function + +from .common import Benchmark + +import numpy as np + + +class Bincount(Benchmark): + def setup(self): + self.d = np.arange(80000, dtype=np.intp) + self.e = self.d.astype(np.float64) + + def time_bincount(self): + np.bincount(self.d) + + def time_weights(self): + np.bincount(self.d, weights=self.e) + + +class Median(Benchmark): + def setup(self): + self.e = np.arange(10000, dtype=np.float32) + self.o = np.arange(10001, dtype=np.float32) + + def time_even(self): + np.median(self.e) + + def time_odd(self): + np.median(self.o) + + def time_even_inplace(self): + np.median(self.e, overwrite_input=True) + + def time_odd_inplace(self): + np.median(self.o, overwrite_input=True) + + def time_even_small(self): + np.median(self.e[:500], overwrite_input=True) + + def time_odd_small(self): + np.median(self.o[:500], overwrite_input=True) + + +class Percentile(Benchmark): + def setup(self): + self.e = np.arange(10000, dtype=np.float32) + self.o = np.arange(10001, dtype=np.float32) + + def time_quartile(self): + np.percentile(self.e, [25, 75]) + + def time_percentile(self): + np.percentile(self.e, [25, 35, 55, 65, 75]) + + +class Select(Benchmark): + def setup(self): + self.d = np.arange(20000) + self.e = self.d.copy() + self.cond = [(self.d > 4), (self.d < 2)] + self.cond_large = [(self.d > 4), (self.d < 2)] * 10 + + def time_select(self): + np.select(self.cond, [self.d, self.e]) + + def time_select_larger(self): + np.select(self.cond_large, ([self.d, self.e] * 10)) + + +class Sort(Benchmark): + def setup(self): + self.e = np.arange(10000, dtype=np.float32) + self.o = np.arange(10001, dtype=np.float32) + np.random.seed(25) + np.random.shuffle(self.o) + # quicksort implementations can have issues with equal elements + self.equal = np.ones(10000) + self.many_equal = np.sort(np.arange(10000) % 10) + + # quicksort median of 3 worst case + self.worst = np.arange(1000000) + x = self.worst + while x.size > 3: + mid = x.size // 2 + x[mid], x[-2] = x[-2], x[mid] + x = x[:-2] + + def time_sort(self): + np.sort(self.e) + + def time_sort_random(self): + np.sort(self.o) + + def time_sort_inplace(self): + self.e.sort() + + def time_sort_equal(self): + self.equal.sort() + + def time_sort_many_equal(self): + self.many_equal.sort() + + def time_sort_worst(self): + np.sort(self.worst) + + def time_argsort(self): + self.e.argsort() + + def time_argsort_random(self): + self.o.argsort() + + +class Where(Benchmark): + def setup(self): + self.d = np.arange(20000) + self.e = self.d.copy() + self.cond = (self.d > 5000) + + def time_1(self): + np.where(self.cond) + + def time_2(self): + np.where(self.cond, self.d, self.e) + + def time_2_broadcast(self): + np.where(self.cond, self.d, 0) diff --git a/benchmarks/benchmarks/bench_indexing.py b/benchmarks/benchmarks/bench_indexing.py new file mode 100644 index 000000000..a62a2050e --- /dev/null +++ b/benchmarks/benchmarks/bench_indexing.py @@ -0,0 +1,81 @@ +from __future__ import absolute_import, division, print_function + +from .common import Benchmark, get_squares_, get_indexes_, get_indexes_rand_ + +from os.path import join as pjoin +import shutil +import sys +import six +from numpy import memmap, float32, array +import numpy as np +from tempfile import mkdtemp + + +class Indexing(Benchmark): + params = [["indexes_", "indexes_rand_"], + ['I', ':,I', 'np.ix_(I, I)'], + ['', '=1']] + param_names = ['indexes', 'sel', 'op'] + + def setup(self, indexes, sel, op): + sel = sel.replace('I', indexes) + + ns = {'squares_': get_squares_(), + 'np': np, + 'indexes_': get_indexes_(), + 'indexes_rand_': get_indexes_rand_()} + + if sys.version_info[0] >= 3: + code = "def run():\n for a in squares_.values(): a[%s]%s" + else: + code = "def run():\n for a in squares_.itervalues(): a[%s]%s" + code = code % (sel, op) + + six.exec_(code, ns) + self.func = ns['run'] + + def time_op(self, indexes, sel, op): + self.func() + + +class IndexingSeparate(Benchmark): + def setup(self): + self.tmp_dir = mkdtemp() + self.fp = memmap(pjoin(self.tmp_dir, 'tmp.dat'), + dtype=float32, mode='w+', shape=(50, 60)) + self.indexes = array([3, 4, 6, 10, 20]) + + def teardown(self): + del self.fp + shutil.rmtree(self.tmp_dir) + + def time_mmap_slicing(self): + for i in range(1000): + self.fp[5:10] + + def time_mmap_fancy_indexing(self): + for i in range(1000): + self.fp[self.indexes] + + +class IndexingStructured0D(Benchmark): + def setup(self): + self.dt = np.dtype([('a', 'f4', 256)]) + + self.A = np.zeros((), self.dt) + self.B = self.A.copy() + + self.a = np.zeros(1, self.dt)[0] + self.b = self.a.copy() + + def time_array_slice(self): + self.B['a'][:] = self.A['a'] + + def time_array_all(self): + self.B['a'] = self.A['a'] + + def time_scalar_slice(self): + self.b['a'][:] = self.a['a'] + + def time_scalar_all(self): + self.b['a'] = self.a['a'] diff --git a/benchmarks/benchmarks/bench_io.py b/benchmarks/benchmarks/bench_io.py new file mode 100644 index 000000000..782d4ab30 --- /dev/null +++ b/benchmarks/benchmarks/bench_io.py @@ -0,0 +1,64 @@ +from __future__ import absolute_import, division, print_function + +from .common import Benchmark, get_squares + +import numpy as np + + +class Copy(Benchmark): + params = ["int8", "int16", "float32", "float64", + "complex64", "complex128"] + param_names = ['type'] + + def setup(self, typename): + dtype = np.dtype(typename) + self.d = np.arange((50 * 500), dtype=dtype).reshape((500, 50)) + self.e = np.arange((50 * 500), dtype=dtype).reshape((50, 500)) + self.e_d = self.e.reshape(self.d.shape) + self.dflat = np.arange((50 * 500), dtype=dtype) + + def time_memcpy(self, typename): + self.d[...] = self.e_d + + def time_cont_assign(self, typename): + self.d[...] = 1 + + def time_strided_copy(self, typename): + self.d[...] = self.e.T + + def time_strided_assign(self, typename): + self.dflat[::2] = 2 + + +class CopyTo(Benchmark): + def setup(self): + self.d = np.ones(50000) + self.e = self.d.copy() + self.m = (self.d == 1) + self.im = (~ self.m) + self.m8 = self.m.copy() + self.m8[::8] = (~ self.m[::8]) + self.im8 = (~ self.m8) + + def time_copyto(self): + np.copyto(self.d, self.e) + + def time_copyto_sparse(self): + np.copyto(self.d, self.e, where=self.m) + + def time_copyto_dense(self): + np.copyto(self.d, self.e, where=self.im) + + def time_copyto_8_sparse(self): + np.copyto(self.d, self.e, where=self.m8) + + def time_copyto_8_dense(self): + np.copyto(self.d, self.e, where=self.im8) + + +class Savez(Benchmark): + def setup(self): + self.squares = get_squares() + + def time_vb_savez_squares(self): + np.savez('tmp.npz', self.squares) diff --git a/benchmarks/benchmarks/bench_linalg.py b/benchmarks/benchmarks/bench_linalg.py new file mode 100644 index 000000000..a65d510be --- /dev/null +++ b/benchmarks/benchmarks/bench_linalg.py @@ -0,0 +1,109 @@ +from __future__ import absolute_import, division, print_function + +from .common import Benchmark, get_squares_, get_indexes_rand, TYPES1 + +import numpy as np + + +class Eindot(Benchmark): + def setup(self): + self.a = np.arange(60000.0).reshape(150, 400) + self.ac = self.a.copy() + self.at = self.a.T + self.atc = self.a.T.copy() + self.b = np.arange(240000.0).reshape(400, 600) + self.c = np.arange(600) + self.d = np.arange(400) + + self.a3 = np.arange(480000.).reshape(60, 80, 100) + self.b3 = np.arange(192000.).reshape(80, 60, 40) + + def time_dot_a_b(self): + np.dot(self.a, self.b) + + def time_dot_d_dot_b_c(self): + np.dot(self.d, np.dot(self.b, self.c)) + + def time_dot_trans_a_at(self): + np.dot(self.a, self.at) + + def time_dot_trans_a_atc(self): + np.dot(self.a, self.atc) + + def time_dot_trans_at_a(self): + np.dot(self.at, self.a) + + def time_dot_trans_atc_a(self): + np.dot(self.atc, self.a) + + def time_einsum_i_ij_j(self): + np.einsum('i,ij,j', self.d, self.b, self.c) + + def time_einsum_ij_jk_a_b(self): + np.einsum('ij,jk', self.a, self.b) + + def time_einsum_ijk_jil_kl(self): + np.einsum('ijk,jil->kl', self.a3, self.b3) + + def time_inner_trans_a_a(self): + np.inner(self.a, self.a) + + def time_inner_trans_a_ac(self): + np.inner(self.a, self.ac) + + def time_matmul_a_b(self): + np.matmul(self.a, self.b) + + def time_matmul_d_matmul_b_c(self): + np.matmul(self.d, np.matmul(self.b, self.c)) + + def time_matmul_trans_a_at(self): + np.matmul(self.a, self.at) + + def time_matmul_trans_a_atc(self): + np.matmul(self.a, self.atc) + + def time_matmul_trans_at_a(self): + np.matmul(self.at, self.a) + + def time_matmul_trans_atc_a(self): + np.matmul(self.atc, self.a) + + def time_tensordot_a_b_axes_1_0_0_1(self): + np.tensordot(self.a3, self.b3, axes=([1, 0], [0, 1])) + + +class Linalg(Benchmark): + params = [['svd', 'pinv', 'det', 'norm'], + TYPES1] + param_names = ['op', 'type'] + + def setup(self, op, typename): + np.seterr(all='ignore') + + self.func = getattr(np.linalg, op) + + if op == 'cholesky': + # we need a positive definite + self.a = np.dot(get_squares_()[typename], + get_squares_()[typename].T) + else: + self.a = get_squares_()[typename] + + # check that dtype is supported at all + try: + self.func(self.a[:2, :2]) + except TypeError: + raise NotImplementedError() + + def time_op(self, op, typename): + self.func(self.a) + + +class Lstsq(Benchmark): + def setup(self): + self.a = get_squares_()['float64'] + self.b = get_indexes_rand()[:100].astype(np.float64) + + def time_numpy_linalg_lstsq_a__b_float64(self): + np.linalg.lstsq(self.a, self.b) diff --git a/benchmarks/benchmarks/bench_random.py b/benchmarks/benchmarks/bench_random.py new file mode 100644 index 000000000..18444b9a1 --- /dev/null +++ b/benchmarks/benchmarks/bench_random.py @@ -0,0 +1,67 @@ +from __future__ import absolute_import, division, print_function + +from .common import Benchmark + +import numpy as np +from numpy.lib import NumpyVersion + + +class Random(Benchmark): + params = ['normal', 'uniform', 'weibull 1', 'binomial 10 0.5', + 'poisson 10'] + + def setup(self, name): + items = name.split() + name = items.pop(0) + params = [float(x) for x in items] + + self.func = getattr(np.random, name) + self.params = tuple(params) + ((100, 100),) + + def time_rng(self, name): + self.func(*self.params) + + +class Shuffle(Benchmark): + def setup(self): + self.a = np.arange(100000) + + def time_100000(self): + np.random.shuffle(self.a) + + +class Randint(Benchmark): + + def time_randint_fast(self): + """Compare to uint32 below""" + np.random.randint(0, 2**30, size=10**5) + + def time_randint_slow(self): + """Compare to uint32 below""" + np.random.randint(0, 2**30 + 1, size=10**5) + + +class Randint_dtype(Benchmark): + high = { + 'bool': 1, + 'uint8': 2**7, + 'uint16': 2**15, + 'uint32': 2**31, + 'uint64': 2**63 + } + + param_names = ['dtype'] + params = ['bool', 'uint8', 'uint16', 'uint32', 'uint64'] + + def setup(self, name): + if NumpyVersion(np.__version__) < '1.11.0.dev0': + raise NotImplementedError + + def time_randint_fast(self, name): + high = self.high[name] + np.random.randint(0, high, size=10**5, dtype=name) + + def time_randint_slow(self, name): + high = self.high[name] + np.random.randint(0, high + 1, size=10**5, dtype=name) + diff --git a/benchmarks/benchmarks/bench_reduce.py b/benchmarks/benchmarks/bench_reduce.py new file mode 100644 index 000000000..704023528 --- /dev/null +++ b/benchmarks/benchmarks/bench_reduce.py @@ -0,0 +1,67 @@ +from __future__ import absolute_import, division, print_function + +from .common import Benchmark, TYPES1, get_squares + +import numpy as np + + +class AddReduce(Benchmark): + def setup(self): + self.squares = get_squares().values() + + def time_axis_0(self): + [np.add.reduce(a, axis=0) for a in self.squares] + + def time_axis_1(self): + [np.add.reduce(a, axis=1) for a in self.squares] + + +class AddReduceSeparate(Benchmark): + params = [[0, 1], TYPES1] + param_names = ['axis', 'type'] + + def setup(self, axis, typename): + self.a = get_squares()[typename] + + def time_reduce(self, axis, typename): + np.add.reduce(self.a, axis=axis) + + +class AnyAll(Benchmark): + def setup(self): + self.zeros = np.zeros(100000, np.bool) + self.ones = np.ones(100000, np.bool) + + def time_all_fast(self): + self.zeros.all() + + def time_all_slow(self): + self.ones.all() + + def time_any_fast(self): + self.ones.any() + + def time_any_slow(self): + self.zeros.any() + + +class MinMax(Benchmark): + params = [np.float32, np.float64, np.intp] + param_names = ['dtype'] + + def setup(self, dtype): + self.d = np.ones(20000, dtype=dtype) + + def time_min(self, dtype): + np.min(self.d) + + def time_max(self, dtype): + np.max(self.d) + + +class SmallReduction(Benchmark): + def setup(self): + self.d = np.ones(100, dtype=np.float32) + + def time_small(self): + np.sum(self.d) diff --git a/benchmarks/benchmarks/bench_ufunc.py b/benchmarks/benchmarks/bench_ufunc.py new file mode 100644 index 000000000..1baee1340 --- /dev/null +++ b/benchmarks/benchmarks/bench_ufunc.py @@ -0,0 +1,152 @@ +from __future__ import absolute_import, division, print_function + +from .common import Benchmark, get_squares_ + +import numpy as np + + +ufuncs = ['abs', 'absolute', 'add', 'arccos', 'arccosh', 'arcsin', + 'arcsinh', 'arctan', 'arctan2', 'arctanh', 'bitwise_and', + 'bitwise_not', 'bitwise_or', 'bitwise_xor', 'cbrt', 'ceil', + 'conj', 'conjugate', 'copysign', 'cos', 'cosh', 'deg2rad', + 'degrees', 'divide', 'equal', 'exp', 'exp2', 'expm1', + 'fabs', 'floor', 'floor_divide', 'fmax', 'fmin', 'fmod', + 'frexp', 'greater', 'greater_equal', 'hypot', 'invert', + 'isfinite', 'isinf', 'isnan', 'ldexp', 'left_shift', 'less', + 'less_equal', 'log', 'log10', 'log1p', 'log2', 'logaddexp', + 'logaddexp2', 'logical_and', 'logical_not', 'logical_or', + 'logical_xor', 'maximum', 'minimum', 'mod', 'modf', + 'multiply', 'negative', 'nextafter', 'not_equal', 'power', + 'rad2deg', 'radians', 'reciprocal', 'remainder', + 'right_shift', 'rint', 'sign', 'signbit', 'sin', 'sinh', + 'spacing', 'sqrt', 'square', 'subtract', 'tan', 'tanh', + 'true_divide', 'trunc'] + +for name in dir(np): + if isinstance(getattr(np, name, None), np.ufunc) and name not in ufuncs: + print("Missing ufunc %r" % (name,)) + + +class Broadcast(Benchmark): + def setup(self): + self.d = np.ones((50000, 100), dtype=np.float64) + self.e = np.ones((100,), dtype=np.float64) + + def time_broadcast(self): + self.d - self.e + + +class UFunc(Benchmark): + params = [ufuncs] + param_names = ['ufunc'] + timeout = 10 + + def setup(self, ufuncname): + np.seterr(all='ignore') + try: + self.f = getattr(np, ufuncname) + except AttributeError: + raise NotImplementedError() + self.args = [] + for t, a in get_squares_().items(): + arg = (a,) * self.f.nin + try: + self.f(*arg) + except TypeError: + continue + self.args.append(arg) + + def time_ufunc_types(self, ufuncname): + [self.f(*arg) for arg in self.args] + + +class Custom(Benchmark): + def setup(self): + self.b = np.ones(20000, dtype=np.bool) + + def time_nonzero(self): + np.nonzero(self.b) + + def time_not_bool(self): + (~self.b) + + def time_and_bool(self): + (self.b & self.b) + + def time_or_bool(self): + (self.b | self.b) + + +class CustomInplace(Benchmark): + def setup(self): + self.c = np.ones(500000, dtype=np.int8) + self.i = np.ones(150000, dtype=np.int32) + self.f = np.zeros(150000, dtype=np.float32) + self.d = np.zeros(75000, dtype=np.float64) + # fault memory + self.f *= 1. + self.d *= 1. + + def time_char_or(self): + np.bitwise_or(self.c, 0, out=self.c) + np.bitwise_or(0, self.c, out=self.c) + + def time_char_or_temp(self): + 0 | self.c | 0 + + def time_int_or(self): + np.bitwise_or(self.i, 0, out=self.i) + np.bitwise_or(0, self.i, out=self.i) + + def time_int_or_temp(self): + 0 | self.i | 0 + + def time_float_add(self): + np.add(self.f, 1., out=self.f) + np.add(1., self.f, out=self.f) + + def time_float_add_temp(self): + 1. + self.f + 1. + + def time_double_add(self): + np.add(self.d, 1., out=self.d) + np.add(1., self.d, out=self.d) + + def time_double_add_temp(self): + 1. + self.d + 1. + + +class CustomScalar(Benchmark): + params = [np.float32, np.float64] + param_names = ['dtype'] + + def setup(self, dtype): + self.d = np.ones(20000, dtype=dtype) + + def time_add_scalar2(self, dtype): + np.add(self.d, 1) + + def time_divide_scalar2(self, dtype): + np.divide(self.d, 1) + + def time_divide_scalar2_inplace(self, dtype): + np.divide(self.d, 1, out=self.d) + + def time_less_than_scalar2(self, dtype): + (self.d < 1) + + +class Scalar(Benchmark): + def setup(self): + self.x = np.asarray(1.0) + self.y = np.asarray((1.0 + 1j)) + self.z = complex(1.0, 1.0) + + def time_add_scalar(self): + (self.x + self.x) + + def time_add_scalar_conv(self): + (self.x + 1.0) + + def time_add_scalar_conv_complex(self): + (self.y + self.z) diff --git a/benchmarks/benchmarks/common.py b/benchmarks/benchmarks/common.py new file mode 100644 index 000000000..18a09fd40 --- /dev/null +++ b/benchmarks/benchmarks/common.py @@ -0,0 +1,116 @@ +from __future__ import absolute_import, division, print_function + +import numpy +import random + +# Various pre-crafted datasets/variables for testing +# !!! Must not be changed -- only appended !!! +# while testing numpy we better not rely on numpy to produce random +# sequences +random.seed(1) +# but will seed it nevertheless +numpy.random.seed(1) + +nx, ny = 1000, 1000 +# reduced squares based on indexes_rand, primarily for testing more +# time-consuming functions (ufunc, linalg, etc) +nxs, nys = 100, 100 + +# a set of interesting types to test +TYPES1 = [ + 'int16', 'float16', + 'int32', 'float32', + 'int64', 'float64', 'complex64', + 'longfloat', 'complex128', +] +if 'complex256' in numpy.typeDict: + TYPES1.append('complex256') + + +def memoize(func): + result = [] + def wrapper(): + if not result: + result.append(func()) + return result[0] + return wrapper + + +# values which will be used to construct our sample data matrices +# replicate 10 times to speed up initial imports of this helper +# and generate some redundancy + +@memoize +def get_values(): + rnd = numpy.random.RandomState(1) + values = numpy.tile(rnd.uniform(0, 100, size=nx*ny//10), 10) + return values + + +@memoize +def get_squares(): + values = get_values() + squares = {t: numpy.array(values, + dtype=getattr(numpy, t)).reshape((nx, ny)) + for t in TYPES1} + + # adjust complex ones to have non-degenerated imagery part -- use + # original data transposed for that + for t, v in squares.items(): + if t.startswith('complex'): + v += v.T*1j + return squares + + +@memoize +def get_squares_(): + # smaller squares + squares_ = {t: s[:nxs, :nys] for t, s in get_squares().items()} + return squares_ + + +@memoize +def get_vectors(): + # vectors + vectors = {t: s[0] for t, s in get_squares().items()} + return vectors + + +@memoize +def get_indexes(): + indexes = list(range(nx)) + # so we do not have all items + indexes.pop(5) + indexes.pop(95) + + indexes = numpy.array(indexes) + return indexes + + +@memoize +def get_indexes_rand(): + rnd = random.Random(1) + + indexes_rand = get_indexes().tolist() # copy + rnd.shuffle(indexes_rand) # in-place shuffle + indexes_rand = numpy.array(indexes_rand) + return indexes_rand + + +@memoize +def get_indexes_(): + # smaller versions + indexes = get_indexes() + indexes_ = indexes[indexes < nxs] + return indexes_ + + +@memoize +def get_indexes_rand_(): + indexes_rand = get_indexes_rand() + indexes_rand_ = indexes_rand[indexes_rand < nxs] + return indexes_rand_ + + +class Benchmark(object): + goal_time = 0.25 |