7 files changed, 185 insertions, 34 deletions
diff --git a/benchmarks/benchmarks/bench_core.py b/benchmarks/benchmarks/bench_core.py
index 26cffcab1..194ce3218 100644
--- a/benchmarks/benchmarks/bench_core.py
+++ b/benchmarks/benchmarks/bench_core.py
@@ -97,8 +97,8 @@ class Temporaries(Benchmark):
 
 
 class CorrConv(Benchmark):
-    params = [[50, 1000, 1e5],
-              [10, 100, 1000, 1e4],
+    params = [[50, 1000, int(1e5)],
+              [10, 100, 1000, int(1e4)],
               ['valid', 'same', 'full']]
     param_names = ['size1', 'size2', 'mode']
 
diff --git a/benchmarks/benchmarks/bench_function_base.py b/benchmarks/benchmarks/bench_function_base.py
index 9ef03262b..64e578680 100644
--- a/benchmarks/benchmarks/bench_function_base.py
+++ b/benchmarks/benchmarks/bench_function_base.py
@@ -96,35 +96,46 @@ class Select(Benchmark):
 
 
 class Sort(Benchmark):
-    def setup(self):
-        self.e = np.arange(10000, dtype=np.float32)
-        self.o = np.arange(10001, dtype=np.float32)
+    params = [
+        ['quick', 'merge', 'heap'],
+        ['float32', 'int32', 'uint32']
+    ]
+    param_names = ['kind', 'dtype']
+
+    def setup(self, kind, dtype):
+        self.e = np.arange(10000, dtype=dtype)
+        self.o = np.arange(10001, dtype=dtype)
         np.random.seed(25)
         np.random.shuffle(self.o)
         # quicksort implementations can have issues with equal elements
-        self.equal = np.ones(10000)
-        self.many_equal = np.sort(np.arange(10000) % 10)
+        self.equal = np.ones(10000, dtype=dtype)
+        self.many_equal = np.sort(np.arange(10000) % 10).astype(dtype)
+
+        try:
+            np.sort(self.e, kind=kind)
+        except TypeError:
+            raise NotImplementedError()
 
-    def time_sort(self):
-        np.sort(self.e)
+    def time_sort(self, kind, dtype):
+        np.sort(self.e, kind=kind)
 
-    def time_sort_random(self):
-        np.sort(self.o)
+    def time_sort_random(self, kind, dtype):
+        np.sort(self.o, kind=kind)
 
-    def time_sort_inplace(self):
-        self.e.sort()
+    def time_sort_inplace(self, kind, dtype):
+        self.e.sort(kind=kind)
 
-    def time_sort_equal(self):
-        self.equal.sort()
+    def time_sort_equal(self, kind, dtype):
+        self.equal.sort(kind=kind)
 
-    def time_sort_many_equal(self):
-        self.many_equal.sort()
+    def time_sort_many_equal(self, kind, dtype):
+        self.many_equal.sort(kind=kind)
 
-    def time_argsort(self):
-        self.e.argsort()
+    def time_argsort(self, kind, dtype):
+        self.e.argsort(kind=kind)
 
-    def time_argsort_random(self):
-        self.o.argsort()
+    def time_argsort_random(self, kind, dtype):
+        self.o.argsort(kind=kind)
 
 
 class SortWorst(Benchmark):
diff --git a/benchmarks/benchmarks/bench_lib.py b/benchmarks/benchmarks/bench_lib.py
index e6c91a27c..f65a96dad 100644
--- a/benchmarks/benchmarks/bench_lib.py
+++ b/benchmarks/benchmarks/bench_lib.py
@@ -9,20 +9,109 @@ import numpy as np
 
 
 class Pad(Benchmark):
-    """Benchmarks for `numpy.pad`."""
+    """Benchmarks for `numpy.pad`.
+
+    When benchmarking the pad function it is useful to cover scenarios where
+    the ratio between the size of the input array and the output array differs
+    significantly (original area vs. padded area). This allows to evaluate for
+    which scenario a padding algorithm is optimized. Furthermore involving
+    large range of array sizes ensures that the effects of CPU-bound caching is
+    visible.
+
+    The table below shows the sizes of the arrays involved in this benchmark:
+
+    +-----------------+----------+-----------+-----------+-----------------+
+    | shape           | original | padded: 1 | padded: 8 | padded: (0, 32) |
+    +=================+==========+===========+===========+=================+
+    | (2 ** 22,)      | 32 MiB   | 32.0 MiB  | 32.0 MiB  | 32.0 MiB        |
+    +-----------------+----------+-----------+-----------+-----------------+
+    | (1024, 1024)    | 8 MiB    | 8.03 MiB  | 8.25 MiB  | 8.51 MiB        |
+    +-----------------+----------+-----------+-----------+-----------------+
+    | (256, 256, 1)   | 256 KiB  | 786 KiB   | 5.08 MiB  | 11.6 MiB        |
+    +-----------------+----------+-----------+-----------+-----------------+
+    | (4, 4, 4, 4)    | 2 KiB    | 10.1 KiB  | 1.22 MiB  | 12.8 MiB        |
+    +-----------------+----------+-----------+-----------+-----------------+
+    | (1, 1, 1, 1, 1) | 8 B      | 1.90 MiB  | 10.8 MiB  | 299 MiB         |
+    +-----------------+----------+-----------+-----------+-----------------+
+    """
 
     param_names = ["shape", "pad_width", "mode"]
     params = [
-        [(1000,), (10, 100), (10, 10, 10)],
-        [1, 3, (0, 5)],
+        # Shape of the input arrays
+        [(2 ** 22,), (1024, 1024), (256, 128, 1),
+         (4, 4, 4, 4), (1, 1, 1, 1, 1)],
+        # Tested pad widths
+        [1, 8, (0, 32)],
+        # Tested modes: mean, median, minimum & maximum use the same code path
+        #               reflect & symmetric share a lot of their code path
         ["constant", "edge", "linear_ramp", "mean", "reflect", "wrap"],
     ]
 
     def setup(self, shape, pad_width, mode):
-        # avoid np.zeros or np.empty's lazy allocation.
-        # np.full causes pagefaults to occur during setup
-        # instead of during the benchmark
-        self.array = np.full(shape, 0)
+        # Make sure to fill the array to make the OS page fault
+        # in the setup phase and not the timed phase
+        self.array = np.full(shape, fill_value=1, dtype=np.float64)
 
     def time_pad(self, shape, pad_width, mode):
         np.pad(self.array, pad_width, mode)
+
+class Nan(Benchmark):
+    """Benchmarks for nan functions"""
+
+    param_names = ["array_size", "percent_nans"]
+    params = [
+            # sizes of the 1D arrays
+            [200, int(2e5)],
+            # percent of np.nan in arrays
+            [0, 0.1, 2., 50., 90.],
+            ]
+
+    def setup(self, array_size, percent_nans):
+        np.random.seed(123)
+        # produce a randomly shuffled array with the
+        # approximate desired percentage np.nan content
+        base_array = np.random.uniform(size=array_size)
+        base_array[base_array < percent_nans / 100.] = np.nan
+        self.arr = base_array
+
+    def time_nanmin(self, array_size, percent_nans):
+        np.nanmin(self.arr)
+
+    def time_nanmax(self, array_size, percent_nans):
+        np.nanmax(self.arr)
+
+    def time_nanargmin(self, array_size, percent_nans):
+        np.nanargmin(self.arr)
+
+    def time_nanargmax(self, array_size, percent_nans):
+        np.nanargmax(self.arr)
+
+    def time_nansum(self, array_size, percent_nans):
+        np.nansum(self.arr)
+
+    def time_nanprod(self, array_size, percent_nans):
+        np.nanprod(self.arr)
+
+    def time_nancumsum(self, array_size, percent_nans):
+        np.nancumsum(self.arr)
+
+    def time_nancumprod(self, array_size, percent_nans):
+        np.nancumprod(self.arr)
+
+    def time_nanmean(self, array_size, percent_nans):
+        np.nanmean(self.arr)
+
+    def time_nanvar(self, array_size, percent_nans):
+        np.nanvar(self.arr)
+
+    def time_nanstd(self, array_size, percent_nans):
+        np.nanstd(self.arr)
+
+    def time_nanmedian(self, array_size, percent_nans):
+        np.nanmedian(self.arr)
+
+    def time_nanquantile(self, array_size, percent_nans):
+        np.nanquantile(self.arr, q=0.2)
+
+    def time_nanpercentile(self, array_size, percent_nans):
+        np.nanpercentile(self.arr, q=50)
diff --git a/benchmarks/benchmarks/bench_linalg.py b/benchmarks/benchmarks/bench_linalg.py
index a65d510be..5c44162a2 100644
--- a/benchmarks/benchmarks/bench_linalg.py
+++ b/benchmarks/benchmarks/bench_linalg.py
@@ -106,4 +106,4 @@ class Lstsq(Benchmark):
         self.b = get_indexes_rand()[:100].astype(np.float64)
 
     def time_numpy_linalg_lstsq_a__b_float64(self):
-        np.linalg.lstsq(self.a, self.b)
+        np.linalg.lstsq(self.a, self.b, rcond=-1)
diff --git a/benchmarks/benchmarks/bench_overrides.py b/benchmarks/benchmarks/bench_overrides.py
index 2cb94c95c..58572d07d 100644
--- a/benchmarks/benchmarks/bench_overrides.py
+++ b/benchmarks/benchmarks/bench_overrides.py
@@ -2,7 +2,15 @@ from __future__ import absolute_import, division, print_function
 
 from .common import Benchmark
 
-from numpy.core.overrides import array_function_dispatch
+try:
+    from numpy.core.overrides import array_function_dispatch
+except ImportError:
+    # Don't fail at import time with old Numpy versions
+    def array_function_dispatch(*args, **kwargs):
+        def wrap(*args, **kwargs):
+            return None
+        return wrap
+
 import numpy as np
 
 
@@ -16,10 +24,10 @@ def mock_broadcast_to(array, shape, subok=False):
 
 
 def _concatenate_dispatcher(arrays, axis=None, out=None):
-    for array in arrays:
-        yield array
     if out is not None:
-        yield out
+        arrays = list(arrays)
+        arrays.append(out)
+    return arrays
 
 
 @array_function_dispatch(_concatenate_dispatcher)
diff --git a/benchmarks/benchmarks/bench_records.py b/benchmarks/benchmarks/bench_records.py
new file mode 100644
index 000000000..41a6dd775
--- /dev/null
+++ b/benchmarks/benchmarks/bench_records.py
@@ -0,0 +1,43 @@
+from __future__ import absolute_import, division, print_function
+import os
+
+from .common import Benchmark
+
+import numpy as np
+
+
+class Records(Benchmark):
+    def setup(self):
+        self.l50 = np.arange(1000)
+        self.fields_number = 10000
+        self.arrays = [self.l50 for _ in range(self.fields_number)]
+        self.formats = [self.l50.dtype.str for _ in range(self.fields_number)]
+        self.formats_str = ','.join(self.formats)
+        self.dtype_ = np.dtype(
+            [
+                ('field_{}'.format(i), self.l50.dtype.str)
+                for i in range(self.fields_number)
+            ]
+        )
+        self.buffer = self.l50.tostring() * self.fields_number
+
+    def time_fromarrays_w_dtype(self):
+        np.core.records.fromarrays(self.arrays, dtype=self.dtype_)
+
+    def time_fromarrays_wo_dtype(self):
+        np.core.records.fromarrays(self.arrays)
+
+    def time_fromarrays_formats_as_list(self):
+        np.core.records.fromarrays(self.arrays, formats=self.formats)
+
+    def time_fromarrays_formats_as_string(self):
+        np.core.records.fromarrays(self.arrays, formats=self.formats_str)
+
+    def time_fromstring_w_dtype(self):
+        np.core.records.fromstring(self.buffer, dtype=self.dtype_)
+
+    def time_fromstring_formats_as_list(self):
+        np.core.records.fromstring(self.buffer, formats=self.formats)
+
+    def time_fromstring_formats_as_string(self):
+        np.core.records.fromstring(self.buffer, formats=self.formats_str)
diff --git a/benchmarks/benchmarks/bench_ufunc.py b/benchmarks/benchmarks/bench_ufunc.py
index a7e385f70..62e70782d 100644
--- a/benchmarks/benchmarks/bench_ufunc.py
+++ b/benchmarks/benchmarks/bench_ufunc.py
@@ -15,7 +15,7 @@ ufuncs = ['abs', 'absolute', 'add', 'arccos', 'arccosh', 'arcsin', 'arcsinh',
           'isinf', 'isnan', 'isnat', 'lcm', 'ldexp', 'left_shift', 'less',
           'less_equal', 'log', 'log10', 'log1p', 'log2', 'logaddexp',
           'logaddexp2', 'logical_and', 'logical_not', 'logical_or',
-          'logical_xor', 'maximum', 'minimum', 'mod', 'modf', 'multiply',
+          'logical_xor', 'matmul', 'maximum', 'minimum', 'mod', 'modf', 'multiply',
           'negative', 'nextafter', 'not_equal', 'positive', 'power',
           'rad2deg', 'radians', 'reciprocal', 'remainder', 'right_shift',
           'rint', 'sign', 'signbit', 'sin', 'sinh', 'spacing', 'sqrt',