summaryrefslogtreecommitdiff
path: root/benchmarks
diff options
context:
space:
mode:
Diffstat (limited to 'benchmarks')
-rw-r--r--benchmarks/README.rst62
-rw-r--r--benchmarks/asv.conf.json85
-rw-r--r--benchmarks/benchmarks/__init__.py3
-rw-r--r--benchmarks/benchmarks/bench_app.py89
-rw-r--r--benchmarks/benchmarks/bench_core.py132
-rw-r--r--benchmarks/benchmarks/bench_function_base.py126
-rw-r--r--benchmarks/benchmarks/bench_indexing.py81
-rw-r--r--benchmarks/benchmarks/bench_io.py64
-rw-r--r--benchmarks/benchmarks/bench_linalg.py109
-rw-r--r--benchmarks/benchmarks/bench_random.py67
-rw-r--r--benchmarks/benchmarks/bench_reduce.py67
-rw-r--r--benchmarks/benchmarks/bench_ufunc.py152
-rw-r--r--benchmarks/benchmarks/common.py116
13 files changed, 1153 insertions, 0 deletions
diff --git a/benchmarks/README.rst b/benchmarks/README.rst
new file mode 100644
index 000000000..2ed5d150f
--- /dev/null
+++ b/benchmarks/README.rst
@@ -0,0 +1,62 @@
+.. -*- rst -*-
+
+================
+NumPy benchmarks
+================
+
+Benchmarking NumPy with Airspeed Velocity.
+
+
+Usage
+-----
+
+Airspeed Velocity manages building and Python virtualenvs by itself,
+unless told otherwise. Some of the benchmarking features in
+``runtests.py`` also tell ASV to use the NumPy compiled by
+``runtests.py``. To run the benchmarks, you do not need to install a
+development version of NumPy to your current Python environment.
+
+Run a benchmark against currently checked out NumPy version (don't
+record the result)::
+
+ python runtests.py --bench bench_core
+
+Compare change in benchmark results to another version::
+
+ python runtests.py --bench-compare v1.6.2 bench_core
+
+Run ASV commands (record results and generate HTML)::
+
+ cd benchmarks
+ asv run --skip-existing-commits --steps 10 ALL
+ asv publish
+ asv preview
+
+More on how to use ``asv`` can be found in `ASV documentation`_
+Command-line help is available as usual via ``asv --help`` and
+``asv run --help``.
+
+.. _ASV documentation: https://spacetelescope.github.io/asv/
+
+
+Writing benchmarks
+------------------
+
+See `ASV documentation`_ for basics on how to write benchmarks.
+
+Some things to consider:
+
+- The benchmark suite should be importable with any NumPy version.
+
+- The benchmark parameters etc. should not depend on which NumPy version
+ is installed.
+
+- Try to keep the runtime of the benchmark reasonable.
+
+- Prefer ASV's ``time_`` methods for benchmarking times rather than cooking up
+ time measurements via ``time.clock``, even if it requires some juggling when
+ writing the benchmark.
+
+- Preparing arrays etc. should generally be put in the ``setup`` method rather
+ than the ``time_`` methods, to avoid counting preparation time together with
+ the time of the benchmarked operation.
diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json
new file mode 100644
index 000000000..d837b0d67
--- /dev/null
+++ b/benchmarks/asv.conf.json
@@ -0,0 +1,85 @@
+{
+ // The version of the config file format. Do not change, unless
+ // you know what you are doing.
+ "version": 1,
+
+ // The name of the project being benchmarked
+ "project": "numpy",
+
+ // The project's homepage
+ "project_url": "http://numpy.org/",
+
+ // The URL or local path of the source code repository for the
+ // project being benchmarked
+ "repo": "..",
+
+ // List of branches to benchmark. If not provided, defaults to "master"
+ // (for git) or "tip" (for mercurial).
+ "branches": ["master"],
+
+ // The DVCS being used. If not set, it will be automatically
+ // determined from "repo" by looking at the protocol in the URL
+ // (if remote), or by looking for special directories, such as
+ // ".git" (if local).
+ "dvcs": "git",
+
+ // The tool to use to create environments. May be "conda",
+ // "virtualenv" or other value depending on the plugins in use.
+ // If missing or the empty string, the tool will be automatically
+ // determined by looking for tools on the PATH environment
+ // variable.
+ "environment_type": "virtualenv",
+
+ // the base URL to show a commit for the project.
+ "show_commit_url": "https://github.com/numpy/numpy/commit/",
+
+ // The Pythons you'd like to test against. If not provided, defaults
+ // to the current version of Python used to run `asv`.
+ "pythons": ["2.7"],
+
+ // The matrix of dependencies to test. Each key is the name of a
+ // package (in PyPI) and the values are version numbers. An empty
+ // list indicates to just test against the default (latest)
+ // version.
+ "matrix": {
+ "six": [],
+ },
+
+ // The directory (relative to the current directory) that benchmarks are
+ // stored in. If not provided, defaults to "benchmarks"
+ "benchmark_dir": "benchmarks",
+
+ // The directory (relative to the current directory) to cache the Python
+ // environments in. If not provided, defaults to "env"
+ "env_dir": "env",
+
+
+ // The directory (relative to the current directory) that raw benchmark
+ // results are stored in. If not provided, defaults to "results".
+ "results_dir": "results",
+
+ // The directory (relative to the current directory) that the html tree
+ // should be written to. If not provided, defaults to "html".
+ "html_dir": "html",
+
+ // The number of characters to retain in the commit hashes.
+ // "hash_length": 8,
+
+ // `asv` will cache wheels of the recent builds in each
+ // environment, making them faster to install next time. This is
+ // number of builds to keep, per environment.
+ "wheel_cache_size": 2,
+
+ // The commits after which the regression search in `asv publish`
+ // should start looking for regressions. Dictionary whose keys are
+ // regexps matching to benchmark names, and values corresponding to
+ // the commit (exclusive) after which to start looking for
+ // regressions. The default is to start from the first commit
+ // with results. If the commit is `null`, regression detection is
+ // skipped for the matching benchmark.
+ //
+ // "regressions_first_commits": {
+ // "some_benchmark": "352cdf", // Consider regressions only after this commit
+ // "another_benchmark": null, // Skip regression detection altogether
+ // }
+}
diff --git a/benchmarks/benchmarks/__init__.py b/benchmarks/benchmarks/__init__.py
new file mode 100644
index 000000000..e8a859ff4
--- /dev/null
+++ b/benchmarks/benchmarks/__init__.py
@@ -0,0 +1,3 @@
+from __future__ import absolute_import, division, print_function
+
+from . import common
diff --git a/benchmarks/benchmarks/bench_app.py b/benchmarks/benchmarks/bench_app.py
new file mode 100644
index 000000000..ccf6e4c4a
--- /dev/null
+++ b/benchmarks/benchmarks/bench_app.py
@@ -0,0 +1,89 @@
+from __future__ import absolute_import, division, print_function
+
+from .common import Benchmark
+
+import numpy as np
+
+from six.moves import xrange
+
+
+class LaplaceInplace(Benchmark):
+ params = ['inplace', 'normal']
+ param_names = ['update']
+
+ def setup(self, update):
+ N = 150
+ Niter = 1000
+ dx = 0.1
+ dy = 0.1
+ dx2 = (dx * dx)
+ dy2 = (dy * dy)
+
+ def num_update(u, dx2, dy2):
+ u[1:(-1), 1:(-1)] = ((((u[2:, 1:(-1)] + u[:(-2), 1:(-1)]) * dy2) +
+ ((u[1:(-1), 2:] + u[1:(-1), :(-2)]) * dx2))
+ / (2 * (dx2 + dy2)))
+
+ def num_inplace(u, dx2, dy2):
+ tmp = u[:(-2), 1:(-1)].copy()
+ np.add(tmp, u[2:, 1:(-1)], out=tmp)
+ np.multiply(tmp, dy2, out=tmp)
+ tmp2 = u[1:(-1), 2:].copy()
+ np.add(tmp2, u[1:(-1), :(-2)], out=tmp2)
+ np.multiply(tmp2, dx2, out=tmp2)
+ np.add(tmp, tmp2, out=tmp)
+ np.multiply(tmp, (1.0 / (2.0 * (dx2 + dy2))),
+ out=u[1:(-1), 1:(-1)])
+
+ def laplace(N, Niter=100, func=num_update, args=()):
+ u = np.zeros([N, N], order='C')
+ u[0] = 1
+ for i in range(Niter):
+ func(u, *args)
+ return u
+
+ func = {'inplace': num_inplace, 'normal': num_update}[update]
+
+ def run():
+ laplace(N, Niter, func, args=(dx2, dy2))
+
+ self.run = run
+
+ def time_it(self, update):
+ self.run()
+
+
+class MaxesOfDots(Benchmark):
+ def setup(self):
+ np.random.seed(1)
+ nsubj = 5
+ nfeat = 100
+ ntime = 200
+
+ self.arrays = [np.random.normal(size=(ntime, nfeat))
+ for i in xrange(nsubj)]
+
+ def maxes_of_dots(self, arrays):
+ """
+ A magical feature score for each feature in each dataset
+ :ref:`Haxby et al., Neuron (2011) <HGC+11>`.
+ If arrays are column-wise zscore-d before computation it
+ results in characterizing each column in each array with
+ sum of maximal correlations of that column with columns
+ in other arrays.
+
+ Arrays must agree only on the first dimension.
+
+ For numpy it a join benchmark of dot products and max()
+ on a set of arrays.
+ """
+ feature_scores = ([0] * len(arrays))
+ for (i, sd) in enumerate(arrays):
+ for (j, sd2) in enumerate(arrays[(i + 1):]):
+ corr_temp = np.dot(sd.T, sd2)
+ feature_scores[i] += np.max(corr_temp, axis=1)
+ feature_scores[((j + i) + 1)] += np.max(corr_temp, axis=0)
+ return feature_scores
+
+ def time_it(self):
+ self.maxes_of_dots(self.arrays)
diff --git a/benchmarks/benchmarks/bench_core.py b/benchmarks/benchmarks/bench_core.py
new file mode 100644
index 000000000..6701917cc
--- /dev/null
+++ b/benchmarks/benchmarks/bench_core.py
@@ -0,0 +1,132 @@
+from __future__ import absolute_import, division, print_function
+
+from .common import Benchmark
+
+import numpy as np
+
+
+class Core(Benchmark):
+ def setup(self):
+ self.l100 = range(100)
+ self.l50 = range(50)
+ self.l = [np.arange(1000), np.arange(1000)]
+ self.l10x10 = np.ones((10, 10))
+
+ def time_array_1(self):
+ np.array(1)
+
+ def time_array_empty(self):
+ np.array([])
+
+ def time_array_l1(self):
+ np.array([1])
+
+ def time_array_l100(self):
+ np.array(self.l100)
+
+ def time_array_l(self):
+ np.array(self.l)
+
+ def time_vstack_l(self):
+ np.vstack(self.l)
+
+ def time_hstack_l(self):
+ np.hstack(self.l)
+
+ def time_dstack_l(self):
+ np.dstack(self.l)
+
+ def time_arange_100(self):
+ np.arange(100)
+
+ def time_zeros_100(self):
+ np.zeros(100)
+
+ def time_ones_100(self):
+ np.ones(100)
+
+ def time_empty_100(self):
+ np.empty(100)
+
+ def time_eye_100(self):
+ np.eye(100)
+
+ def time_identity_100(self):
+ np.identity(100)
+
+ def time_eye_3000(self):
+ np.eye(3000)
+
+ def time_identity_3000(self):
+ np.identity(3000)
+
+ def time_diag_l100(self):
+ np.diag(self.l100)
+
+ def time_diagflat_l100(self):
+ np.diagflat(self.l100)
+
+ def time_diagflat_l50_l50(self):
+ np.diagflat([self.l50, self.l50])
+
+ def time_triu_l10x10(self):
+ np.triu(self.l10x10)
+
+ def time_tril_l10x10(self):
+ np.tril(self.l10x10)
+
+
+class MA(Benchmark):
+ def setup(self):
+ self.l100 = range(100)
+ self.t100 = ([True] * 100)
+
+ def time_masked_array(self):
+ np.ma.masked_array()
+
+ def time_masked_array_l100(self):
+ np.ma.masked_array(self.l100)
+
+ def time_masked_array_l100_t100(self):
+ np.ma.masked_array(self.l100, self.t100)
+
+
+class CorrConv(Benchmark):
+ params = [[50, 1000, 1e5],
+ [10, 100, 1000, 1e4],
+ ['valid', 'same', 'full']]
+ param_names = ['size1', 'size2', 'mode']
+
+ def setup(self, size1, size2, mode):
+ self.x1 = np.linspace(0, 1, num=size1)
+ self.x2 = np.cos(np.linspace(0, 2*np.pi, num=size2))
+
+ def time_correlate(self, size1, size2, mode):
+ np.correlate(self.x1, self.x2, mode=mode)
+
+ def time_convolve(self, size1, size2, mode):
+ np.convolve(self.x1, self.x2, mode=mode)
+
+
+class CountNonzero(Benchmark):
+ param_names = ['numaxes', 'size', 'dtype']
+ params = [
+ [1, 2, 3],
+ [100, 10000, 1000000],
+ [bool, int, str, object]
+ ]
+
+ def setup(self, numaxes, size, dtype):
+ self.x = np.empty(shape=(
+ numaxes, size), dtype=dtype)
+
+ def time_count_nonzero(self, numaxes, size, dtype):
+ np.count_nonzero(self.x)
+
+ def time_count_nonzero_axis(self, numaxes, size, dtype):
+ np.count_nonzero(self.x, axis=self.x.ndim - 1)
+
+ def time_count_nonzero_multi_axis(self, numaxes, size, dtype):
+ if self.x.ndim >= 2:
+ np.count_nonzero(self.x, axis=(
+ self.x.ndim - 1, self.x.ndim - 2))
diff --git a/benchmarks/benchmarks/bench_function_base.py b/benchmarks/benchmarks/bench_function_base.py
new file mode 100644
index 000000000..23103ba66
--- /dev/null
+++ b/benchmarks/benchmarks/bench_function_base.py
@@ -0,0 +1,126 @@
+from __future__ import absolute_import, division, print_function
+
+from .common import Benchmark
+
+import numpy as np
+
+
+class Bincount(Benchmark):
+ def setup(self):
+ self.d = np.arange(80000, dtype=np.intp)
+ self.e = self.d.astype(np.float64)
+
+ def time_bincount(self):
+ np.bincount(self.d)
+
+ def time_weights(self):
+ np.bincount(self.d, weights=self.e)
+
+
+class Median(Benchmark):
+ def setup(self):
+ self.e = np.arange(10000, dtype=np.float32)
+ self.o = np.arange(10001, dtype=np.float32)
+
+ def time_even(self):
+ np.median(self.e)
+
+ def time_odd(self):
+ np.median(self.o)
+
+ def time_even_inplace(self):
+ np.median(self.e, overwrite_input=True)
+
+ def time_odd_inplace(self):
+ np.median(self.o, overwrite_input=True)
+
+ def time_even_small(self):
+ np.median(self.e[:500], overwrite_input=True)
+
+ def time_odd_small(self):
+ np.median(self.o[:500], overwrite_input=True)
+
+
+class Percentile(Benchmark):
+ def setup(self):
+ self.e = np.arange(10000, dtype=np.float32)
+ self.o = np.arange(10001, dtype=np.float32)
+
+ def time_quartile(self):
+ np.percentile(self.e, [25, 75])
+
+ def time_percentile(self):
+ np.percentile(self.e, [25, 35, 55, 65, 75])
+
+
+class Select(Benchmark):
+ def setup(self):
+ self.d = np.arange(20000)
+ self.e = self.d.copy()
+ self.cond = [(self.d > 4), (self.d < 2)]
+ self.cond_large = [(self.d > 4), (self.d < 2)] * 10
+
+ def time_select(self):
+ np.select(self.cond, [self.d, self.e])
+
+ def time_select_larger(self):
+ np.select(self.cond_large, ([self.d, self.e] * 10))
+
+
+class Sort(Benchmark):
+ def setup(self):
+ self.e = np.arange(10000, dtype=np.float32)
+ self.o = np.arange(10001, dtype=np.float32)
+ np.random.seed(25)
+ np.random.shuffle(self.o)
+ # quicksort implementations can have issues with equal elements
+ self.equal = np.ones(10000)
+ self.many_equal = np.sort(np.arange(10000) % 10)
+
+ # quicksort median of 3 worst case
+ self.worst = np.arange(1000000)
+ x = self.worst
+ while x.size > 3:
+ mid = x.size // 2
+ x[mid], x[-2] = x[-2], x[mid]
+ x = x[:-2]
+
+ def time_sort(self):
+ np.sort(self.e)
+
+ def time_sort_random(self):
+ np.sort(self.o)
+
+ def time_sort_inplace(self):
+ self.e.sort()
+
+ def time_sort_equal(self):
+ self.equal.sort()
+
+ def time_sort_many_equal(self):
+ self.many_equal.sort()
+
+ def time_sort_worst(self):
+ np.sort(self.worst)
+
+ def time_argsort(self):
+ self.e.argsort()
+
+ def time_argsort_random(self):
+ self.o.argsort()
+
+
+class Where(Benchmark):
+ def setup(self):
+ self.d = np.arange(20000)
+ self.e = self.d.copy()
+ self.cond = (self.d > 5000)
+
+ def time_1(self):
+ np.where(self.cond)
+
+ def time_2(self):
+ np.where(self.cond, self.d, self.e)
+
+ def time_2_broadcast(self):
+ np.where(self.cond, self.d, 0)
diff --git a/benchmarks/benchmarks/bench_indexing.py b/benchmarks/benchmarks/bench_indexing.py
new file mode 100644
index 000000000..a62a2050e
--- /dev/null
+++ b/benchmarks/benchmarks/bench_indexing.py
@@ -0,0 +1,81 @@
+from __future__ import absolute_import, division, print_function
+
+from .common import Benchmark, get_squares_, get_indexes_, get_indexes_rand_
+
+from os.path import join as pjoin
+import shutil
+import sys
+import six
+from numpy import memmap, float32, array
+import numpy as np
+from tempfile import mkdtemp
+
+
+class Indexing(Benchmark):
+ params = [["indexes_", "indexes_rand_"],
+ ['I', ':,I', 'np.ix_(I, I)'],
+ ['', '=1']]
+ param_names = ['indexes', 'sel', 'op']
+
+ def setup(self, indexes, sel, op):
+ sel = sel.replace('I', indexes)
+
+ ns = {'squares_': get_squares_(),
+ 'np': np,
+ 'indexes_': get_indexes_(),
+ 'indexes_rand_': get_indexes_rand_()}
+
+ if sys.version_info[0] >= 3:
+ code = "def run():\n for a in squares_.values(): a[%s]%s"
+ else:
+ code = "def run():\n for a in squares_.itervalues(): a[%s]%s"
+ code = code % (sel, op)
+
+ six.exec_(code, ns)
+ self.func = ns['run']
+
+ def time_op(self, indexes, sel, op):
+ self.func()
+
+
+class IndexingSeparate(Benchmark):
+ def setup(self):
+ self.tmp_dir = mkdtemp()
+ self.fp = memmap(pjoin(self.tmp_dir, 'tmp.dat'),
+ dtype=float32, mode='w+', shape=(50, 60))
+ self.indexes = array([3, 4, 6, 10, 20])
+
+ def teardown(self):
+ del self.fp
+ shutil.rmtree(self.tmp_dir)
+
+ def time_mmap_slicing(self):
+ for i in range(1000):
+ self.fp[5:10]
+
+ def time_mmap_fancy_indexing(self):
+ for i in range(1000):
+ self.fp[self.indexes]
+
+
+class IndexingStructured0D(Benchmark):
+ def setup(self):
+ self.dt = np.dtype([('a', 'f4', 256)])
+
+ self.A = np.zeros((), self.dt)
+ self.B = self.A.copy()
+
+ self.a = np.zeros(1, self.dt)[0]
+ self.b = self.a.copy()
+
+ def time_array_slice(self):
+ self.B['a'][:] = self.A['a']
+
+ def time_array_all(self):
+ self.B['a'] = self.A['a']
+
+ def time_scalar_slice(self):
+ self.b['a'][:] = self.a['a']
+
+ def time_scalar_all(self):
+ self.b['a'] = self.a['a']
diff --git a/benchmarks/benchmarks/bench_io.py b/benchmarks/benchmarks/bench_io.py
new file mode 100644
index 000000000..782d4ab30
--- /dev/null
+++ b/benchmarks/benchmarks/bench_io.py
@@ -0,0 +1,64 @@
+from __future__ import absolute_import, division, print_function
+
+from .common import Benchmark, get_squares
+
+import numpy as np
+
+
+class Copy(Benchmark):
+ params = ["int8", "int16", "float32", "float64",
+ "complex64", "complex128"]
+ param_names = ['type']
+
+ def setup(self, typename):
+ dtype = np.dtype(typename)
+ self.d = np.arange((50 * 500), dtype=dtype).reshape((500, 50))
+ self.e = np.arange((50 * 500), dtype=dtype).reshape((50, 500))
+ self.e_d = self.e.reshape(self.d.shape)
+ self.dflat = np.arange((50 * 500), dtype=dtype)
+
+ def time_memcpy(self, typename):
+ self.d[...] = self.e_d
+
+ def time_cont_assign(self, typename):
+ self.d[...] = 1
+
+ def time_strided_copy(self, typename):
+ self.d[...] = self.e.T
+
+ def time_strided_assign(self, typename):
+ self.dflat[::2] = 2
+
+
+class CopyTo(Benchmark):
+ def setup(self):
+ self.d = np.ones(50000)
+ self.e = self.d.copy()
+ self.m = (self.d == 1)
+ self.im = (~ self.m)
+ self.m8 = self.m.copy()
+ self.m8[::8] = (~ self.m[::8])
+ self.im8 = (~ self.m8)
+
+ def time_copyto(self):
+ np.copyto(self.d, self.e)
+
+ def time_copyto_sparse(self):
+ np.copyto(self.d, self.e, where=self.m)
+
+ def time_copyto_dense(self):
+ np.copyto(self.d, self.e, where=self.im)
+
+ def time_copyto_8_sparse(self):
+ np.copyto(self.d, self.e, where=self.m8)
+
+ def time_copyto_8_dense(self):
+ np.copyto(self.d, self.e, where=self.im8)
+
+
+class Savez(Benchmark):
+ def setup(self):
+ self.squares = get_squares()
+
+ def time_vb_savez_squares(self):
+ np.savez('tmp.npz', self.squares)
diff --git a/benchmarks/benchmarks/bench_linalg.py b/benchmarks/benchmarks/bench_linalg.py
new file mode 100644
index 000000000..a65d510be
--- /dev/null
+++ b/benchmarks/benchmarks/bench_linalg.py
@@ -0,0 +1,109 @@
+from __future__ import absolute_import, division, print_function
+
+from .common import Benchmark, get_squares_, get_indexes_rand, TYPES1
+
+import numpy as np
+
+
+class Eindot(Benchmark):
+ def setup(self):
+ self.a = np.arange(60000.0).reshape(150, 400)
+ self.ac = self.a.copy()
+ self.at = self.a.T
+ self.atc = self.a.T.copy()
+ self.b = np.arange(240000.0).reshape(400, 600)
+ self.c = np.arange(600)
+ self.d = np.arange(400)
+
+ self.a3 = np.arange(480000.).reshape(60, 80, 100)
+ self.b3 = np.arange(192000.).reshape(80, 60, 40)
+
+ def time_dot_a_b(self):
+ np.dot(self.a, self.b)
+
+ def time_dot_d_dot_b_c(self):
+ np.dot(self.d, np.dot(self.b, self.c))
+
+ def time_dot_trans_a_at(self):
+ np.dot(self.a, self.at)
+
+ def time_dot_trans_a_atc(self):
+ np.dot(self.a, self.atc)
+
+ def time_dot_trans_at_a(self):
+ np.dot(self.at, self.a)
+
+ def time_dot_trans_atc_a(self):
+ np.dot(self.atc, self.a)
+
+ def time_einsum_i_ij_j(self):
+ np.einsum('i,ij,j', self.d, self.b, self.c)
+
+ def time_einsum_ij_jk_a_b(self):
+ np.einsum('ij,jk', self.a, self.b)
+
+ def time_einsum_ijk_jil_kl(self):
+ np.einsum('ijk,jil->kl', self.a3, self.b3)
+
+ def time_inner_trans_a_a(self):
+ np.inner(self.a, self.a)
+
+ def time_inner_trans_a_ac(self):
+ np.inner(self.a, self.ac)
+
+ def time_matmul_a_b(self):
+ np.matmul(self.a, self.b)
+
+ def time_matmul_d_matmul_b_c(self):
+ np.matmul(self.d, np.matmul(self.b, self.c))
+
+ def time_matmul_trans_a_at(self):
+ np.matmul(self.a, self.at)
+
+ def time_matmul_trans_a_atc(self):
+ np.matmul(self.a, self.atc)
+
+ def time_matmul_trans_at_a(self):
+ np.matmul(self.at, self.a)
+
+ def time_matmul_trans_atc_a(self):
+ np.matmul(self.atc, self.a)
+
+ def time_tensordot_a_b_axes_1_0_0_1(self):
+ np.tensordot(self.a3, self.b3, axes=([1, 0], [0, 1]))
+
+
+class Linalg(Benchmark):
+ params = [['svd', 'pinv', 'det', 'norm'],
+ TYPES1]
+ param_names = ['op', 'type']
+
+ def setup(self, op, typename):
+ np.seterr(all='ignore')
+
+ self.func = getattr(np.linalg, op)
+
+ if op == 'cholesky':
+ # we need a positive definite
+ self.a = np.dot(get_squares_()[typename],
+ get_squares_()[typename].T)
+ else:
+ self.a = get_squares_()[typename]
+
+ # check that dtype is supported at all
+ try:
+ self.func(self.a[:2, :2])
+ except TypeError:
+ raise NotImplementedError()
+
+ def time_op(self, op, typename):
+ self.func(self.a)
+
+
+class Lstsq(Benchmark):
+ def setup(self):
+ self.a = get_squares_()['float64']
+ self.b = get_indexes_rand()[:100].astype(np.float64)
+
+ def time_numpy_linalg_lstsq_a__b_float64(self):
+ np.linalg.lstsq(self.a, self.b)
diff --git a/benchmarks/benchmarks/bench_random.py b/benchmarks/benchmarks/bench_random.py
new file mode 100644
index 000000000..18444b9a1
--- /dev/null
+++ b/benchmarks/benchmarks/bench_random.py
@@ -0,0 +1,67 @@
+from __future__ import absolute_import, division, print_function
+
+from .common import Benchmark
+
+import numpy as np
+from numpy.lib import NumpyVersion
+
+
+class Random(Benchmark):
+ params = ['normal', 'uniform', 'weibull 1', 'binomial 10 0.5',
+ 'poisson 10']
+
+ def setup(self, name):
+ items = name.split()
+ name = items.pop(0)
+ params = [float(x) for x in items]
+
+ self.func = getattr(np.random, name)
+ self.params = tuple(params) + ((100, 100),)
+
+ def time_rng(self, name):
+ self.func(*self.params)
+
+
+class Shuffle(Benchmark):
+ def setup(self):
+ self.a = np.arange(100000)
+
+ def time_100000(self):
+ np.random.shuffle(self.a)
+
+
+class Randint(Benchmark):
+
+ def time_randint_fast(self):
+ """Compare to uint32 below"""
+ np.random.randint(0, 2**30, size=10**5)
+
+ def time_randint_slow(self):
+ """Compare to uint32 below"""
+ np.random.randint(0, 2**30 + 1, size=10**5)
+
+
+class Randint_dtype(Benchmark):
+ high = {
+ 'bool': 1,
+ 'uint8': 2**7,
+ 'uint16': 2**15,
+ 'uint32': 2**31,
+ 'uint64': 2**63
+ }
+
+ param_names = ['dtype']
+ params = ['bool', 'uint8', 'uint16', 'uint32', 'uint64']
+
+ def setup(self, name):
+ if NumpyVersion(np.__version__) < '1.11.0.dev0':
+ raise NotImplementedError
+
+ def time_randint_fast(self, name):
+ high = self.high[name]
+ np.random.randint(0, high, size=10**5, dtype=name)
+
+ def time_randint_slow(self, name):
+ high = self.high[name]
+ np.random.randint(0, high + 1, size=10**5, dtype=name)
+
diff --git a/benchmarks/benchmarks/bench_reduce.py b/benchmarks/benchmarks/bench_reduce.py
new file mode 100644
index 000000000..704023528
--- /dev/null
+++ b/benchmarks/benchmarks/bench_reduce.py
@@ -0,0 +1,67 @@
+from __future__ import absolute_import, division, print_function
+
+from .common import Benchmark, TYPES1, get_squares
+
+import numpy as np
+
+
+class AddReduce(Benchmark):
+ def setup(self):
+ self.squares = get_squares().values()
+
+ def time_axis_0(self):
+ [np.add.reduce(a, axis=0) for a in self.squares]
+
+ def time_axis_1(self):
+ [np.add.reduce(a, axis=1) for a in self.squares]
+
+
+class AddReduceSeparate(Benchmark):
+ params = [[0, 1], TYPES1]
+ param_names = ['axis', 'type']
+
+ def setup(self, axis, typename):
+ self.a = get_squares()[typename]
+
+ def time_reduce(self, axis, typename):
+ np.add.reduce(self.a, axis=axis)
+
+
+class AnyAll(Benchmark):
+ def setup(self):
+ self.zeros = np.zeros(100000, np.bool)
+ self.ones = np.ones(100000, np.bool)
+
+ def time_all_fast(self):
+ self.zeros.all()
+
+ def time_all_slow(self):
+ self.ones.all()
+
+ def time_any_fast(self):
+ self.ones.any()
+
+ def time_any_slow(self):
+ self.zeros.any()
+
+
+class MinMax(Benchmark):
+ params = [np.float32, np.float64, np.intp]
+ param_names = ['dtype']
+
+ def setup(self, dtype):
+ self.d = np.ones(20000, dtype=dtype)
+
+ def time_min(self, dtype):
+ np.min(self.d)
+
+ def time_max(self, dtype):
+ np.max(self.d)
+
+
+class SmallReduction(Benchmark):
+ def setup(self):
+ self.d = np.ones(100, dtype=np.float32)
+
+ def time_small(self):
+ np.sum(self.d)
diff --git a/benchmarks/benchmarks/bench_ufunc.py b/benchmarks/benchmarks/bench_ufunc.py
new file mode 100644
index 000000000..1baee1340
--- /dev/null
+++ b/benchmarks/benchmarks/bench_ufunc.py
@@ -0,0 +1,152 @@
+from __future__ import absolute_import, division, print_function
+
+from .common import Benchmark, get_squares_
+
+import numpy as np
+
+
+ufuncs = ['abs', 'absolute', 'add', 'arccos', 'arccosh', 'arcsin',
+ 'arcsinh', 'arctan', 'arctan2', 'arctanh', 'bitwise_and',
+ 'bitwise_not', 'bitwise_or', 'bitwise_xor', 'cbrt', 'ceil',
+ 'conj', 'conjugate', 'copysign', 'cos', 'cosh', 'deg2rad',
+ 'degrees', 'divide', 'equal', 'exp', 'exp2', 'expm1',
+ 'fabs', 'floor', 'floor_divide', 'fmax', 'fmin', 'fmod',
+ 'frexp', 'greater', 'greater_equal', 'hypot', 'invert',
+ 'isfinite', 'isinf', 'isnan', 'ldexp', 'left_shift', 'less',
+ 'less_equal', 'log', 'log10', 'log1p', 'log2', 'logaddexp',
+ 'logaddexp2', 'logical_and', 'logical_not', 'logical_or',
+ 'logical_xor', 'maximum', 'minimum', 'mod', 'modf',
+ 'multiply', 'negative', 'nextafter', 'not_equal', 'power',
+ 'rad2deg', 'radians', 'reciprocal', 'remainder',
+ 'right_shift', 'rint', 'sign', 'signbit', 'sin', 'sinh',
+ 'spacing', 'sqrt', 'square', 'subtract', 'tan', 'tanh',
+ 'true_divide', 'trunc']
+
+for name in dir(np):
+ if isinstance(getattr(np, name, None), np.ufunc) and name not in ufuncs:
+ print("Missing ufunc %r" % (name,))
+
+
+class Broadcast(Benchmark):
+ def setup(self):
+ self.d = np.ones((50000, 100), dtype=np.float64)
+ self.e = np.ones((100,), dtype=np.float64)
+
+ def time_broadcast(self):
+ self.d - self.e
+
+
+class UFunc(Benchmark):
+ params = [ufuncs]
+ param_names = ['ufunc']
+ timeout = 10
+
+ def setup(self, ufuncname):
+ np.seterr(all='ignore')
+ try:
+ self.f = getattr(np, ufuncname)
+ except AttributeError:
+ raise NotImplementedError()
+ self.args = []
+ for t, a in get_squares_().items():
+ arg = (a,) * self.f.nin
+ try:
+ self.f(*arg)
+ except TypeError:
+ continue
+ self.args.append(arg)
+
+ def time_ufunc_types(self, ufuncname):
+ [self.f(*arg) for arg in self.args]
+
+
+class Custom(Benchmark):
+ def setup(self):
+ self.b = np.ones(20000, dtype=np.bool)
+
+ def time_nonzero(self):
+ np.nonzero(self.b)
+
+ def time_not_bool(self):
+ (~self.b)
+
+ def time_and_bool(self):
+ (self.b & self.b)
+
+ def time_or_bool(self):
+ (self.b | self.b)
+
+
+class CustomInplace(Benchmark):
+ def setup(self):
+ self.c = np.ones(500000, dtype=np.int8)
+ self.i = np.ones(150000, dtype=np.int32)
+ self.f = np.zeros(150000, dtype=np.float32)
+ self.d = np.zeros(75000, dtype=np.float64)
+ # fault memory
+ self.f *= 1.
+ self.d *= 1.
+
+ def time_char_or(self):
+ np.bitwise_or(self.c, 0, out=self.c)
+ np.bitwise_or(0, self.c, out=self.c)
+
+ def time_char_or_temp(self):
+ 0 | self.c | 0
+
+ def time_int_or(self):
+ np.bitwise_or(self.i, 0, out=self.i)
+ np.bitwise_or(0, self.i, out=self.i)
+
+ def time_int_or_temp(self):
+ 0 | self.i | 0
+
+ def time_float_add(self):
+ np.add(self.f, 1., out=self.f)
+ np.add(1., self.f, out=self.f)
+
+ def time_float_add_temp(self):
+ 1. + self.f + 1.
+
+ def time_double_add(self):
+ np.add(self.d, 1., out=self.d)
+ np.add(1., self.d, out=self.d)
+
+ def time_double_add_temp(self):
+ 1. + self.d + 1.
+
+
+class CustomScalar(Benchmark):
+ params = [np.float32, np.float64]
+ param_names = ['dtype']
+
+ def setup(self, dtype):
+ self.d = np.ones(20000, dtype=dtype)
+
+ def time_add_scalar2(self, dtype):
+ np.add(self.d, 1)
+
+ def time_divide_scalar2(self, dtype):
+ np.divide(self.d, 1)
+
+ def time_divide_scalar2_inplace(self, dtype):
+ np.divide(self.d, 1, out=self.d)
+
+ def time_less_than_scalar2(self, dtype):
+ (self.d < 1)
+
+
+class Scalar(Benchmark):
+ def setup(self):
+ self.x = np.asarray(1.0)
+ self.y = np.asarray((1.0 + 1j))
+ self.z = complex(1.0, 1.0)
+
+ def time_add_scalar(self):
+ (self.x + self.x)
+
+ def time_add_scalar_conv(self):
+ (self.x + 1.0)
+
+ def time_add_scalar_conv_complex(self):
+ (self.y + self.z)
diff --git a/benchmarks/benchmarks/common.py b/benchmarks/benchmarks/common.py
new file mode 100644
index 000000000..18a09fd40
--- /dev/null
+++ b/benchmarks/benchmarks/common.py
@@ -0,0 +1,116 @@
+from __future__ import absolute_import, division, print_function
+
+import numpy
+import random
+
+# Various pre-crafted datasets/variables for testing
+# !!! Must not be changed -- only appended !!!
+# while testing numpy we better not rely on numpy to produce random
+# sequences
+random.seed(1)
+# but will seed it nevertheless
+numpy.random.seed(1)
+
+nx, ny = 1000, 1000
+# reduced squares based on indexes_rand, primarily for testing more
+# time-consuming functions (ufunc, linalg, etc)
+nxs, nys = 100, 100
+
+# a set of interesting types to test
+TYPES1 = [
+ 'int16', 'float16',
+ 'int32', 'float32',
+ 'int64', 'float64', 'complex64',
+ 'longfloat', 'complex128',
+]
+if 'complex256' in numpy.typeDict:
+ TYPES1.append('complex256')
+
+
+def memoize(func):
+ result = []
+ def wrapper():
+ if not result:
+ result.append(func())
+ return result[0]
+ return wrapper
+
+
+# values which will be used to construct our sample data matrices
+# replicate 10 times to speed up initial imports of this helper
+# and generate some redundancy
+
+@memoize
+def get_values():
+ rnd = numpy.random.RandomState(1)
+ values = numpy.tile(rnd.uniform(0, 100, size=nx*ny//10), 10)
+ return values
+
+
+@memoize
+def get_squares():
+ values = get_values()
+ squares = {t: numpy.array(values,
+ dtype=getattr(numpy, t)).reshape((nx, ny))
+ for t in TYPES1}
+
+ # adjust complex ones to have non-degenerated imagery part -- use
+ # original data transposed for that
+ for t, v in squares.items():
+ if t.startswith('complex'):
+ v += v.T*1j
+ return squares
+
+
+@memoize
+def get_squares_():
+ # smaller squares
+ squares_ = {t: s[:nxs, :nys] for t, s in get_squares().items()}
+ return squares_
+
+
+@memoize
+def get_vectors():
+ # vectors
+ vectors = {t: s[0] for t, s in get_squares().items()}
+ return vectors
+
+
+@memoize
+def get_indexes():
+ indexes = list(range(nx))
+ # so we do not have all items
+ indexes.pop(5)
+ indexes.pop(95)
+
+ indexes = numpy.array(indexes)
+ return indexes
+
+
+@memoize
+def get_indexes_rand():
+ rnd = random.Random(1)
+
+ indexes_rand = get_indexes().tolist() # copy
+ rnd.shuffle(indexes_rand) # in-place shuffle
+ indexes_rand = numpy.array(indexes_rand)
+ return indexes_rand
+
+
+@memoize
+def get_indexes_():
+ # smaller versions
+ indexes = get_indexes()
+ indexes_ = indexes[indexes < nxs]
+ return indexes_
+
+
+@memoize
+def get_indexes_rand_():
+ indexes_rand = get_indexes_rand()
+ indexes_rand_ = indexes_rand[indexes_rand < nxs]
+ return indexes_rand_
+
+
+class Benchmark(object):
+ goal_time = 0.25