From 7dcd29aaafe1ab8be4be04d3c793e5bcaf17459f Mon Sep 17 00:00:00 2001 From: Filip Trojan Date: Fri, 12 Feb 2021 17:47:55 +0100 Subject: BUG: Fix unique handling of nan entries. (#18070) * benchmark bench_lib.Unique added * extended test_unique_1d * modify _unique1d * extend test with return_index, return_inverse and return_counts parameters * documentation updated * Update numpy/lib/arraysetops.py Co-authored-by: Bas van Beek <43369155+BvB93@users.noreply.github.com> * full coverage of nan types Co-authored-by: Bas van Beek <43369155+BvB93@users.noreply.github.com> * added tests for the datetime like dtypes * nan as vector of length 1 * use aux[-1] as nan, ..versionchanged, release note * for complex arrays all NaN values are considered equivalent Co-authored-by: filip_trojan Co-authored-by: Bas van Beek <43369155+BvB93@users.noreply.github.com> --- benchmarks/benchmarks/bench_lib.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'benchmarks') diff --git a/benchmarks/benchmarks/bench_lib.py b/benchmarks/benchmarks/bench_lib.py index c22ceaa5e..f7884cd6c 100644 --- a/benchmarks/benchmarks/bench_lib.py +++ b/benchmarks/benchmarks/bench_lib.py @@ -53,6 +53,7 @@ class Pad(Benchmark): def time_pad(self, shape, pad_width, mode): np.pad(self.array, pad_width, mode) + class Nan(Benchmark): """Benchmarks for nan functions""" @@ -113,3 +114,26 @@ class Nan(Benchmark): def time_nanpercentile(self, array_size, percent_nans): np.nanpercentile(self.arr, q=50) + + +class Unique(Benchmark): + """Benchmark for np.unique with np.nan values.""" + + param_names = ["array_size", "percent_nans"] + params = [ + # sizes of the 1D arrays + [200, int(2e5)], + # percent of np.nan in arrays + [0, 0.1, 2., 50., 90.], + ] + + def setup(self, array_size, percent_nans): + np.random.seed(123) + # produce a randomly shuffled array with the + # approximate desired percentage np.nan content + base_array = np.random.uniform(size=array_size) + base_array[base_array < percent_nans / 100.] = np.nan + self.arr = base_array + + def time_unique(self, array_size, percent_nans): + np.unique(self.arr) -- cgit v1.2.1