Merge branch 'master' into nep-18-initial

author: Stephan Hoyer <shoyer@google.com> 2018-09-24 09:08:27 -0700
committer: Stephan Hoyer <shoyer@google.com> 2018-09-24 09:08:27 -0700
commit: ceba9b3d659bd94ae25f71fd42c87df4ff43f78b (patch)
tree: 0746f881d8bbce4ff93e72255dba82ac68ca16f9 /benchmarks
parent: 1846ac335da808cb8bf6f9b1950933348a40d200 (diff)
parent: 409aca108c6d6daf7f4fd0561d440ff4f30a0108 (diff)
download: numpy-ceba9b3d659bd94ae25f71fd42c87df4ff43f78b.tar.gz
7 files changed, 46 insertions, 17 deletions
diff --git a/benchmarks/README.rst b/benchmarks/README.rst
index f4f0b0de9..b67994ce0 100644
--- a/benchmarks/README.rst
+++ b/benchmarks/README.rst
@@ -60,3 +60,11 @@ Some things to consider:
 - Preparing arrays etc. should generally be put in the ``setup`` method rather
   than the ``time_`` methods, to avoid counting preparation time together with
   the time of the benchmarked operation.
+
+- Be mindful that large arrays created with ``np.empty`` or ``np.zeros`` might
+  not be allocated in physical memory until the memory is accessed. If this is
+  desired behaviour, make sure to comment it in your setup function. If
+  you are benchmarking an algorithm, it is unlikely that a user will be
+  executing said algorithm on a newly created empty/zero array. One can force
+  pagefaults to occur in the setup phase either by calling ``np.ones`` or
+  ``arr.fill(value)`` after creating the array,
diff --git a/benchmarks/benchmarks/bench_function_base.py b/benchmarks/benchmarks/bench_function_base.py
index eea108528..7f217fc90 100644
--- a/benchmarks/benchmarks/bench_function_base.py
+++ b/benchmarks/benchmarks/bench_function_base.py
@@ -103,14 +103,6 @@ class Sort(object):
         self.equal = np.ones(10000)
         self.many_equal = np.sort(np.arange(10000) % 10)
 
-        # quicksort median of 3 worst case
-        self.worst = np.arange(1000000)
-        x = self.worst
-        while x.size > 3:
-            mid = x.size // 2
-            x[mid], x[-2] = x[-2], x[mid]
-            x = x[:-2]
-
     def time_sort(self):
         np.sort(self.e)
 
@@ -126,9 +118,6 @@ class Sort(object):
     def time_sort_many_equal(self):
         self.many_equal.sort()
 
-    def time_sort_worst(self):
-        np.sort(self.worst)
-
     def time_argsort(self):
         self.e.argsort()
 
@@ -136,6 +125,23 @@ class Sort(object):
         self.o.argsort()
 
 
+class SortWorst(object):
+    def setup(self):
+        # quicksort median of 3 worst case
+        self.worst = np.arange(1000000)
+        x = self.worst
+        while x.size > 3:
+            mid = x.size // 2
+            x[mid], x[-2] = x[-2], x[mid]
+            x = x[:-2]
+
+    def time_sort_worst(self):
+        np.sort(self.worst)
+
+    # Retain old benchmark name for backward compatability
+    time_sort_worst.benchmark_name = "bench_function_base.Sort.time_sort_worst"
+
+
 class Where(object):
     def setup(self):
         self.d = np.arange(20000)
diff --git a/benchmarks/benchmarks/bench_io.py b/benchmarks/benchmarks/bench_io.py
index 1fddfbc8c..da29cfe73 100644
--- a/benchmarks/benchmarks/bench_io.py
+++ b/benchmarks/benchmarks/bench_io.py
@@ -21,6 +21,10 @@ class Copy(object):
     def time_memcpy(self, typename):
         self.d[...] = self.e_d
 
+    def time_memcpy_large_out_of_place(self, typename):
+        l = np.ones(1024**2, dtype=np.dtype(typename))
+        l.copy()
+
     def time_cont_assign(self, typename):
         self.d[...] = 1
 
diff --git a/benchmarks/benchmarks/bench_lib.py b/benchmarks/benchmarks/bench_lib.py
index 3a79292da..fa6c56708 100644
--- a/benchmarks/benchmarks/bench_lib.py
+++ b/benchmarks/benchmarks/bench_lib.py
@@ -17,7 +17,10 @@ class Pad(object):
     ]
 
     def setup(self, shape, pad_width, mode):
-        self.array = np.empty(shape)
+        # avoid np.zeros or np.empty's lazy allocation.
+        # np.full causes pagefaults to occur during setup
+        # instead of during the benchmark
+        self.array = np.full(shape, 0)
 
     def time_pad(self, shape, pad_width, mode):
         np.pad(self.array, pad_width, mode)
diff --git a/benchmarks/benchmarks/bench_ma.py b/benchmarks/benchmarks/bench_ma.py
index 848a0d419..631b793b1 100644
--- a/benchmarks/benchmarks/bench_ma.py
+++ b/benchmarks/benchmarks/bench_ma.py
@@ -87,7 +87,9 @@ class Concatenate(object):
     ]
 
     def setup(self, mode, n):
-        normal = np.zeros((n, n), int)
+        # avoid np.zeros's lazy allocation that cause page faults during benchmark.
+        # np.fill will cause pagefaults to happen during setup.
+        normal = np.full((n, n), 0, int)
         unmasked = np.ma.zeros((n, n), int)
         masked = np.ma.array(normal, mask=True)
 
diff --git a/benchmarks/benchmarks/bench_reduce.py b/benchmarks/benchmarks/bench_reduce.py
index 319a4b15f..95804666e 100644
--- a/benchmarks/benchmarks/bench_reduce.py
+++ b/benchmarks/benchmarks/bench_reduce.py
@@ -29,8 +29,10 @@ class AddReduceSeparate(object):
 
 class AnyAll(object):
     def setup(self):
-        self.zeros = np.zeros(100000, bool)
-        self.ones = np.ones(100000, bool)
+        # avoid np.zeros's lazy allocation that would
+        # cause page faults during benchmark
+        self.zeros = np.full(100000, 0, bool)
+        self.ones = np.full(100000, 0, bool)
 
     def time_all_fast(self):
         self.zeros.all()
diff --git a/benchmarks/benchmarks/bench_shape_base.py b/benchmarks/benchmarks/bench_shape_base.py
index ed88aa1fd..cc6dae987 100644
--- a/benchmarks/benchmarks/bench_shape_base.py
+++ b/benchmarks/benchmarks/bench_shape_base.py
@@ -21,7 +21,9 @@ class Block(object):
         self.four_1d = np.ones(6 * n)
         self.five_0d = np.ones(1 * n)
         self.six_1d = np.ones(5 * n)
-        self.zero_2d = np.zeros((2 * n, 6 * n))
+        # avoid np.zeros's lazy allocation that might cause
+        # page faults during benchmark
+        self.zero_2d = np.full((2 * n, 6 * n), 0)
 
         self.one = np.ones(3 * n)
         self.two = 2 * np.ones((3, 3 * n))
@@ -29,7 +31,9 @@ class Block(object):
         self.four = 4 * np.ones(3 * n)
         self.five = 5 * np.ones(1 * n)
         self.six = 6 * np.ones(5 * n)
-        self.zero = np.zeros((2 * n, 6 * n))
+        # avoid np.zeros's lazy allocation that might cause
+        # page faults during benchmark
+        self.zero = np.full((2 * n, 6 * n), 0)
 
     def time_block_simple_row_wise(self, n):
         np.block([self.a_2d, self.b_2d])
author	Stephan Hoyer <shoyer@google.com>	2018-09-24 09:08:27 -0700
committer	Stephan Hoyer <shoyer@google.com>	2018-09-24 09:08:27 -0700
commit	ceba9b3d659bd94ae25f71fd42c87df4ff43f78b (patch)
tree	0746f881d8bbce4ff93e72255dba82ac68ca16f9 /benchmarks
parent	1846ac335da808cb8bf6f9b1950933348a40d200 (diff)
parent	409aca108c6d6daf7f4fd0561d440ff4f30a0108 (diff)
download	numpy-ceba9b3d659bd94ae25f71fd42c87df4ff43f78b.tar.gz