From e1ea1d5f130025f0b6ac6e28fb1a3c8581ad13fd Mon Sep 17 00:00:00 2001 From: Hameer Abbasi Date: Thu, 14 Feb 2019 12:56:22 +0500 Subject: Sorting benchmarks. --- benchmarks/benchmarks/bench_function_base.py | 134 +++++++++++++++++++++------ 1 file changed, 106 insertions(+), 28 deletions(-) (limited to 'benchmarks') diff --git a/benchmarks/benchmarks/bench_function_base.py b/benchmarks/benchmarks/bench_function_base.py index 64e578680..2f5808b1a 100644 --- a/benchmarks/benchmarks/bench_function_base.py +++ b/benchmarks/benchmarks/bench_function_base.py @@ -98,44 +98,122 @@ class Select(Benchmark): class Sort(Benchmark): params = [ ['quick', 'merge', 'heap'], - ['float32', 'int32', 'uint32'] + ['float32', 'int32', 'uint32'], + [ + 'ordered', + 'reversed', + 'uniform', + 'sorted_block_10', + 'sorted_block_100', + 'sorted_block_1000', + 'swapped_pair_1_percent', + 'swapped_pair_10_percent', + 'swapped_pair_50_percent', + 'random_unsorted_area_50_percent', + 'random_unsorted_area_10_percent', + 'random_unsorted_area_1_percent', + 'random_bubble_1_fold', + 'random_bubble_5_fold', + 'random_bubble_10_fold', + ], ] - param_names = ['kind', 'dtype'] + param_names = ['kind', 'dtype', 'array_type'] - def setup(self, kind, dtype): - self.e = np.arange(10000, dtype=dtype) - self.o = np.arange(10001, dtype=dtype) - np.random.seed(25) - np.random.shuffle(self.o) - # quicksort implementations can have issues with equal elements - self.equal = np.ones(10000, dtype=dtype) - self.many_equal = np.sort(np.arange(10000) % 10).astype(dtype) + ARRAY_SIZE = 10000 - try: - np.sort(self.e, kind=kind) - except TypeError: - raise NotImplementedError() + def setup(self, kind, dtype, array_type): + np.random.seed(1234) + self.arr = getattr(self, 'array_' + array_type)(self.ARRAY_SIZE, dtype) - def time_sort(self, kind, dtype): - np.sort(self.e, kind=kind) + def time_sort_inplace(self, kind, dtype, array_type): + self.arr.sort(kind=kind) - def time_sort_random(self, kind, dtype): - np.sort(self.o, kind=kind) + def time_sort(self, kind, dtype, array_type): + np.sort(self.arr, kind=kind) - def time_sort_inplace(self, kind, dtype): - self.e.sort(kind=kind) + def time_argsort(self, kind, dtype, array_type): + np.argsort(self.arr, kind=kind) - def time_sort_equal(self, kind, dtype): - self.equal.sort(kind=kind) + def array_random(self, size, dtype): + arr = np.arange(size, dtype=dtype) + np.random.shuffle(arr) + return arr + + def array_ordered(self, size, dtype): + return np.arange(size, dtype=dtype) - def time_sort_many_equal(self, kind, dtype): - self.many_equal.sort(kind=kind) + def array_reversed(self, size, dtype): + return np.arange(size-1, -1, -1, dtype=dtype) - def time_argsort(self, kind, dtype): - self.e.argsort(kind=kind) + def array_uniform(self, size, dtype): + return np.ones(size, dtype=dtype) - def time_argsort_random(self, kind, dtype): - self.o.argsort(kind=kind) + def array_sorted_block_10(self, size, dtype): + return self.sorted_block(size, dtype, 10) + + def array_sorted_block_100(self, size, dtype): + return self.sorted_block(size, dtype, 100) + + def array_sorted_block_1000(self, size, dtype): + return self.sorted_block(size, dtype, 1000) + + def array_swapped_pair_1_percent(self, size, dtype): + return self.swapped_pair(size, dtype, 0.01) + + def array_swapped_pair_10_percent(self, size, dtype): + return self.swapped_pair(size, dtype, 0.1) + + def array_swapped_pair_50_percent(self, size, dtype): + return self.swapped_pair(size, dtype, 0.5) + + AREA_SIZE = 10 + + def array_random_unsorted_area_50_percent(self, size, dtype): + return self.random_unsorted_area(size, dtype, 0.5, self.AREA_SIZE) + + def array_random_unsorted_area_10_percent(self, size, dtype): + return self.random_unsorted_area(size, dtype, 0.1, self.AREA_SIZE) + + def array_random_unsorted_area_1_percent(self, size, dtype): + return self.random_unsorted_area(size, dtype, 0.01, self.AREA_SIZE) + + BUBBLE_SIZE = 10 + + def array_random_bubble_1_fold(self, size, dtype): + return self.random_unsorted_area(size, dtype, 1, size / self.BUBBLE_SIZE) + + def array_random_bubble_5_fold(self, size, dtype): + return self.random_unsorted_area(size, dtype, 5, size / self.BUBBLE_SIZE) + + def array_random_bubble_10_fold(self, size, dtype): + return self.random_unsorted_area(size, dtype, 10, size / self.BUBBLE_SIZE) + + def swapped_pair(self, size, dtype, swap_frac): + a = np.arange(size, dtype=dtype) + for _ in range(int(size * swap_frac)): + x, y = np.random.randint(0, size, 2) + a[x], a[y] = a[y], a[x] + return a + + def sorted_block(self, size, dtype, block_size): + a = np.arange(size, dtype=dtype) + b = [] + if size < block_size: + return a + block_num = size // block_size + for i in range(block_num): + b.extend(a[i::block_num]) + return np.array(b) + + def random_unsorted_area(self, size, dtype, frac, area_num): + area_num = int(area_num) + a = np.arange(size, dtype=dtype) + unsorted_len = int(size * frac / area_num) + for _ in range(area_num): + start = np.random.randint(size-unsorted_len) + end = start + unsorted_len + np.random.shuffle(a[start:end]) + return a class SortWorst(Benchmark): -- cgit v1.2.1 From 2a67d015b276541ca99800134097491c1dfb740b Mon Sep 17 00:00:00 2001 From: Hameer Abbasi Date: Mon, 18 Feb 2019 12:59:07 +0500 Subject: Change according to feedback from @charris. --- benchmarks/benchmarks/bench_function_base.py | 38 ++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 5 deletions(-) (limited to 'benchmarks') diff --git a/benchmarks/benchmarks/bench_function_base.py b/benchmarks/benchmarks/bench_function_base.py index 2f5808b1a..fa02cd82d 100644 --- a/benchmarks/benchmarks/bench_function_base.py +++ b/benchmarks/benchmarks/bench_function_base.py @@ -96,9 +96,35 @@ class Select(Benchmark): class Sort(Benchmark): + """ + This benchmark tests sorting performance with several + different types of arrays that are likely to appear in + real-world applications. + + ordered + Perfectly ordered array + reversed + A reversed array + uniform + A array where all values are the same + sorted_block_X + An array that is entirely composed of sorted blocks + of size X + swapped_pair_X_percent + An array which has random pairs swapped. The number + of swapped pairs is X% of the size of the array + random_unsorted_area_X_percent + This kind of array has random unsorted areas which + take up X% of the original array, the size of one + area is AREA_SIZE + random_bubble_X_fold + ? + """ params = [ + # In NumPy 1.17 and newer, 'merge' can be one of several + # stable sorts ['quick', 'merge', 'heap'], - ['float32', 'int32', 'uint32'], + ['float64', 'int64', 'uint64'], [ 'ordered', 'reversed', @@ -119,7 +145,13 @@ class Sort(Benchmark): ] param_names = ['kind', 'dtype', 'array_type'] + # The size of the benchmarked arrays. ARRAY_SIZE = 10000 + # The size of the unsorted area in the "random unsorted area" + # benchmarks + AREA_SIZE = 10 + # ? + BUBBLE_SIZE = 10 def setup(self, kind, dtype, array_type): np.random.seed(1234) @@ -166,8 +198,6 @@ class Sort(Benchmark): def array_swapped_pair_50_percent(self, size, dtype): return self.swapped_pair(size, dtype, 0.5) - AREA_SIZE = 10 - def array_random_unsorted_area_50_percent(self, size, dtype): return self.random_unsorted_area(size, dtype, 0.5, self.AREA_SIZE) @@ -177,8 +207,6 @@ class Sort(Benchmark): def array_random_unsorted_area_1_percent(self, size, dtype): return self.random_unsorted_area(size, dtype, 0.01, self.AREA_SIZE) - BUBBLE_SIZE = 10 - def array_random_bubble_1_fold(self, size, dtype): return self.random_unsorted_area(size, dtype, 1, size / self.BUBBLE_SIZE) -- cgit v1.2.1 From 8aa669b22946e8f5aaaa591c37262b26ebd7976a Mon Sep 17 00:00:00 2001 From: Hameer Abbasi Date: Mon, 18 Feb 2019 16:14:03 +0500 Subject: Respond to feedback from @eric-wieser --- benchmarks/benchmarks/bench_function_base.py | 169 +++++++++++++-------------- 1 file changed, 83 insertions(+), 86 deletions(-) (limited to 'benchmarks') diff --git a/benchmarks/benchmarks/bench_function_base.py b/benchmarks/benchmarks/bench_function_base.py index fa02cd82d..a82d424e7 100644 --- a/benchmarks/benchmarks/bench_function_base.py +++ b/benchmarks/benchmarks/bench_function_base.py @@ -95,6 +95,84 @@ class Select(Benchmark): np.select(self.cond_large, ([self.d, self.e] * 10)) +class SortGenerator(object): + # The size of the unsorted area in the "random unsorted area" + # benchmarks + AREA_SIZE = 10 + # The size of the "partially ordered" sub-arrays + BUBBLE_SIZE = 10 + + @staticmethod + def random(size, dtype): + arr = np.arange(size, dtype=dtype) + np.random.shuffle(arr) + return arr + + @staticmethod + def ordered(size, dtype): + return np.arange(size, dtype=dtype) + + @staticmethod + def reversed(size, dtype): + return np.arange(size-1, -1, -1, dtype=dtype) + + @staticmethod + def uniform(size, dtype): + return np.ones(size, dtype=dtype) + + @staticmethod + def _type_swapped_pair(size, dtype, swap_frac): + a = np.arange(size, dtype=dtype) + for _ in range(int(size * swap_frac)): + x, y = np.random.randint(0, size, 2) + a[x], a[y] = a[y], a[x] + return a + + @staticmethod + def _type_sorted_block(size, dtype, block_size): + a = np.arange(size, dtype=dtype) + b = [] + if size < block_size: + return a + block_num = size // block_size + for i in range(block_num): + b.extend(a[i::block_num]) + return np.array(b) + + @staticmethod + def _type_random_unsorted_area(size, dtype, frac, area_num): + area_num = int(area_num) + a = np.arange(size, dtype=dtype) + unsorted_len = int(size * frac / area_num) + for _ in range(area_num): + start = np.random.randint(size-unsorted_len) + end = start + unsorted_len + np.random.shuffle(a[start:end]) + return a + + def __getattr__(self, name): + import re + import functools + token_specification = [ + (r'sorted\_block\_([0-9]+)', + lambda size, dtype, x: self._type_sorted_block(size, dtype, x)), + (r'swapped\_pair\_([0-9]+)\_percent', + lambda size, dtype, x: self._type_swapped_pair(size, dtype, x / 100.0)), + (r'random\_unsorted\_area\_([0-9]+)\_percent', + lambda size, dtype, x: self._type_random_unsorted_area(size, dtype, x / 100.0, self.AREA_SIZE)), + (r'random_bubble\_([0-9]+)\_fold', + lambda size, dtype, x: self._type_random_unsorted_area(size, dtype, x * self.BUBBLE_SIZE / size, self.BUBBLE_SIZE)), + ] + + for pattern, function in token_specification: + match = re.fullmatch(pattern, name) + + if match is not None: + return functools.partial(function, x=int(match.group(1))) + + raise AttributeError + + class Sort(Benchmark): """ This benchmark tests sorting performance with several @@ -116,13 +194,14 @@ class Sort(Benchmark): random_unsorted_area_X_percent This kind of array has random unsorted areas which take up X% of the original array, the size of one - area is AREA_SIZE + area is SortGenerator.AREA_SIZE random_bubble_X_fold - ? + Same as random_unsorted_area, except that it has + X areas of size SortGenerator.BUBBLE_SIZE """ params = [ # In NumPy 1.17 and newer, 'merge' can be one of several - # stable sorts + # stable sorts, it isn't necessarily merge sort. ['quick', 'merge', 'heap'], ['float64', 'int64', 'uint64'], [ @@ -147,15 +226,10 @@ class Sort(Benchmark): # The size of the benchmarked arrays. ARRAY_SIZE = 10000 - # The size of the unsorted area in the "random unsorted area" - # benchmarks - AREA_SIZE = 10 - # ? - BUBBLE_SIZE = 10 def setup(self, kind, dtype, array_type): np.random.seed(1234) - self.arr = getattr(self, 'array_' + array_type)(self.ARRAY_SIZE, dtype) + self.arr = getattr(SortGenerator(), array_type)(self.ARRAY_SIZE, dtype) def time_sort_inplace(self, kind, dtype, array_type): self.arr.sort(kind=kind) @@ -166,83 +240,6 @@ class Sort(Benchmark): def time_argsort(self, kind, dtype, array_type): np.argsort(self.arr, kind=kind) - def array_random(self, size, dtype): - arr = np.arange(size, dtype=dtype) - np.random.shuffle(arr) - return arr - - def array_ordered(self, size, dtype): - return np.arange(size, dtype=dtype) - - def array_reversed(self, size, dtype): - return np.arange(size-1, -1, -1, dtype=dtype) - - def array_uniform(self, size, dtype): - return np.ones(size, dtype=dtype) - - def array_sorted_block_10(self, size, dtype): - return self.sorted_block(size, dtype, 10) - - def array_sorted_block_100(self, size, dtype): - return self.sorted_block(size, dtype, 100) - - def array_sorted_block_1000(self, size, dtype): - return self.sorted_block(size, dtype, 1000) - - def array_swapped_pair_1_percent(self, size, dtype): - return self.swapped_pair(size, dtype, 0.01) - - def array_swapped_pair_10_percent(self, size, dtype): - return self.swapped_pair(size, dtype, 0.1) - - def array_swapped_pair_50_percent(self, size, dtype): - return self.swapped_pair(size, dtype, 0.5) - - def array_random_unsorted_area_50_percent(self, size, dtype): - return self.random_unsorted_area(size, dtype, 0.5, self.AREA_SIZE) - - def array_random_unsorted_area_10_percent(self, size, dtype): - return self.random_unsorted_area(size, dtype, 0.1, self.AREA_SIZE) - - def array_random_unsorted_area_1_percent(self, size, dtype): - return self.random_unsorted_area(size, dtype, 0.01, self.AREA_SIZE) - - def array_random_bubble_1_fold(self, size, dtype): - return self.random_unsorted_area(size, dtype, 1, size / self.BUBBLE_SIZE) - - def array_random_bubble_5_fold(self, size, dtype): - return self.random_unsorted_area(size, dtype, 5, size / self.BUBBLE_SIZE) - - def array_random_bubble_10_fold(self, size, dtype): - return self.random_unsorted_area(size, dtype, 10, size / self.BUBBLE_SIZE) - - def swapped_pair(self, size, dtype, swap_frac): - a = np.arange(size, dtype=dtype) - for _ in range(int(size * swap_frac)): - x, y = np.random.randint(0, size, 2) - a[x], a[y] = a[y], a[x] - return a - - def sorted_block(self, size, dtype, block_size): - a = np.arange(size, dtype=dtype) - b = [] - if size < block_size: - return a - block_num = size // block_size - for i in range(block_num): - b.extend(a[i::block_num]) - return np.array(b) - - def random_unsorted_area(self, size, dtype, frac, area_num): - area_num = int(area_num) - a = np.arange(size, dtype=dtype) - unsorted_len = int(size * frac / area_num) - for _ in range(area_num): - start = np.random.randint(size-unsorted_len) - end = start + unsorted_len - np.random.shuffle(a[start:end]) - return a - class SortWorst(Benchmark): def setup(self): -- cgit v1.2.1 From 7ce362e5ce1667ef60b2f45d52ebb08339a2c8b0 Mon Sep 17 00:00:00 2001 From: Hameer Abbasi Date: Wed, 20 Feb 2019 02:28:36 +0500 Subject: Get rid of __getattr__ magic. --- benchmarks/benchmarks/bench_function_base.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'benchmarks') diff --git a/benchmarks/benchmarks/bench_function_base.py b/benchmarks/benchmarks/bench_function_base.py index a82d424e7..8a7bf362d 100644 --- a/benchmarks/benchmarks/bench_function_base.py +++ b/benchmarks/benchmarks/bench_function_base.py @@ -150,27 +150,28 @@ class SortGenerator(object): np.random.shuffle(a[start:end]) return a - def __getattr__(self, name): + @classmethod + def get_array(cls, array_type, size, dtype): import re import functools token_specification = [ (r'sorted\_block\_([0-9]+)', - lambda size, dtype, x: self._type_sorted_block(size, dtype, x)), + lambda size, dtype, x: cls._type_sorted_block(size, dtype, x)), (r'swapped\_pair\_([0-9]+)\_percent', - lambda size, dtype, x: self._type_swapped_pair(size, dtype, x / 100.0)), + lambda size, dtype, x: cls._type_swapped_pair(size, dtype, x / 100.0)), (r'random\_unsorted\_area\_([0-9]+)\_percent', - lambda size, dtype, x: self._type_random_unsorted_area(size, dtype, x / 100.0, self.AREA_SIZE)), + lambda size, dtype, x: cls._type_random_unsorted_area(size, dtype, x / 100.0, cls.AREA_SIZE)), (r'random_bubble\_([0-9]+)\_fold', - lambda size, dtype, x: self._type_random_unsorted_area(size, dtype, x * self.BUBBLE_SIZE / size, self.BUBBLE_SIZE)), + lambda size, dtype, x: cls._type_random_unsorted_area(size, dtype, x * cls.BUBBLE_SIZE / size, cls.BUBBLE_SIZE)), ] for pattern, function in token_specification: - match = re.fullmatch(pattern, name) + match = re.fullmatch(pattern, array_type) if match is not None: - return functools.partial(function, x=int(match.group(1))) + return function(size, dtype, int(match.group(1))) - raise AttributeError + raise ValueError("Incorrect array_type specified.") class Sort(Benchmark): @@ -229,7 +230,7 @@ class Sort(Benchmark): def setup(self, kind, dtype, array_type): np.random.seed(1234) - self.arr = getattr(SortGenerator(), array_type)(self.ARRAY_SIZE, dtype) + self.arr = SortGenerator.get_array(array_type, self.ARRAY_SIZE, dtype) def time_sort_inplace(self, kind, dtype, array_type): self.arr.sort(kind=kind) -- cgit v1.2.1 From 5e23c6edfadcd46325eb954aa5357a21c9e1a474 Mon Sep 17 00:00:00 2001 From: Hameer Abbasi Date: Wed, 20 Feb 2019 02:39:08 +0500 Subject: Fix the missing types. --- benchmarks/benchmarks/bench_function_base.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) (limited to 'benchmarks') diff --git a/benchmarks/benchmarks/bench_function_base.py b/benchmarks/benchmarks/bench_function_base.py index 8a7bf362d..6c2856633 100644 --- a/benchmarks/benchmarks/bench_function_base.py +++ b/benchmarks/benchmarks/bench_function_base.py @@ -103,21 +103,21 @@ class SortGenerator(object): BUBBLE_SIZE = 10 @staticmethod - def random(size, dtype): + def _random(size, dtype): arr = np.arange(size, dtype=dtype) np.random.shuffle(arr) return arr @staticmethod - def ordered(size, dtype): + def _ordered(size, dtype): return np.arange(size, dtype=dtype) @staticmethod - def reversed(size, dtype): + def _reversed(size, dtype): return np.arange(size-1, -1, -1, dtype=dtype) @staticmethod - def uniform(size, dtype): + def _uniform(size, dtype): return np.ones(size, dtype=dtype) @staticmethod @@ -155,21 +155,29 @@ class SortGenerator(object): import re import functools token_specification = [ + (r'random', + lambda match: cls._random(size, dtype)), + (r'ordered', + lambda match: cls._ordered(size, dtype)), + (r'reversed', + lambda match: cls._reversed(size, dtype)), + (r'uniform', + lambda match: cls._uniform(size, dtype)), (r'sorted\_block\_([0-9]+)', - lambda size, dtype, x: cls._type_sorted_block(size, dtype, x)), + lambda match: cls._type_sorted_block(size, dtype, int(match.group(1)))), (r'swapped\_pair\_([0-9]+)\_percent', - lambda size, dtype, x: cls._type_swapped_pair(size, dtype, x / 100.0)), + lambda match: cls._type_swapped_pair(size, dtype, int(match.group(1)) / 100.0)), (r'random\_unsorted\_area\_([0-9]+)\_percent', - lambda size, dtype, x: cls._type_random_unsorted_area(size, dtype, x / 100.0, cls.AREA_SIZE)), + lambda match: cls._type_random_unsorted_area(size, dtype, int(match.group(1)) / 100.0, cls.AREA_SIZE)), (r'random_bubble\_([0-9]+)\_fold', - lambda size, dtype, x: cls._type_random_unsorted_area(size, dtype, x * cls.BUBBLE_SIZE / size, cls.BUBBLE_SIZE)), + lambda match: cls._type_random_unsorted_area(size, dtype, int(match.group(1)) * cls.BUBBLE_SIZE / size, cls.BUBBLE_SIZE)), ] for pattern, function in token_specification: match = re.fullmatch(pattern, array_type) if match is not None: - return function(size, dtype, int(match.group(1))) + return function(match) raise ValueError("Incorrect array_type specified.") -- cgit v1.2.1 From f28155aada409ec25c4cd82944c0c1ad47464feb Mon Sep 17 00:00:00 2001 From: Hameer Abbasi Date: Mon, 25 Feb 2019 01:36:15 +0100 Subject: Address feedback from @eric-wieser and @charris. --- benchmarks/benchmarks/bench_function_base.py | 144 ++++++++++++--------------- 1 file changed, 65 insertions(+), 79 deletions(-) (limited to 'benchmarks') diff --git a/benchmarks/benchmarks/bench_function_base.py b/benchmarks/benchmarks/bench_function_base.py index 6c2856633..64b36f7c4 100644 --- a/benchmarks/benchmarks/bench_function_base.py +++ b/benchmarks/benchmarks/bench_function_base.py @@ -98,30 +98,46 @@ class Select(Benchmark): class SortGenerator(object): # The size of the unsorted area in the "random unsorted area" # benchmarks - AREA_SIZE = 10 + AREA_SIZE = 100 # The size of the "partially ordered" sub-arrays - BUBBLE_SIZE = 10 + BUBBLE_SIZE = 100 @staticmethod - def _random(size, dtype): + def random(size, dtype): + """ + Returns a randomly-shuffled array. + """ arr = np.arange(size, dtype=dtype) np.random.shuffle(arr) return arr @staticmethod - def _ordered(size, dtype): + def ordered(size, dtype): + """ + Returns an ordered array. + """ return np.arange(size, dtype=dtype) @staticmethod - def _reversed(size, dtype): + def reversed(size, dtype): + """ + Returns an array that's in descending order. + """ return np.arange(size-1, -1, -1, dtype=dtype) @staticmethod - def _uniform(size, dtype): + def uniform(size, dtype): + """ + Returns an array that has the same value everywhere. + """ return np.ones(size, dtype=dtype) @staticmethod - def _type_swapped_pair(size, dtype, swap_frac): + def swapped_pair(size, dtype, swap_frac): + """ + Returns an ordered array, but one that has ``swap_frac * size`` + pairs swapped. + """ a = np.arange(size, dtype=dtype) for _ in range(int(size * swap_frac)): x, y = np.random.randint(0, size, 2) @@ -129,7 +145,10 @@ class SortGenerator(object): return a @staticmethod - def _type_sorted_block(size, dtype, block_size): + def sorted_block(size, dtype, block_size): + """ + Returns an array with blocks that are all sorted. + """ a = np.arange(size, dtype=dtype) b = [] if size < block_size: @@ -139,47 +158,33 @@ class SortGenerator(object): b.extend(a[i::block_num]) return np.array(b) - @staticmethod - def _type_random_unsorted_area(size, dtype, frac, area_num): - area_num = int(area_num) + @classmethod + def random_unsorted_area(cls, size, dtype, frac, area_size=None): + """ + This type of array has random unsorted areas such that they + compose ``frac`` percent of the original array. + """ + if area_size is None: + area_size = cls.AREA_SIZE + + area_num = int(size * frac / area_size) a = np.arange(size, dtype=dtype) - unsorted_len = int(size * frac / area_num) for _ in range(area_num): - start = np.random.randint(size-unsorted_len) - end = start + unsorted_len + start = np.random.randint(size-area_size) + end = start + area_size np.random.shuffle(a[start:end]) return a @classmethod - def get_array(cls, array_type, size, dtype): - import re - import functools - token_specification = [ - (r'random', - lambda match: cls._random(size, dtype)), - (r'ordered', - lambda match: cls._ordered(size, dtype)), - (r'reversed', - lambda match: cls._reversed(size, dtype)), - (r'uniform', - lambda match: cls._uniform(size, dtype)), - (r'sorted\_block\_([0-9]+)', - lambda match: cls._type_sorted_block(size, dtype, int(match.group(1)))), - (r'swapped\_pair\_([0-9]+)\_percent', - lambda match: cls._type_swapped_pair(size, dtype, int(match.group(1)) / 100.0)), - (r'random\_unsorted\_area\_([0-9]+)\_percent', - lambda match: cls._type_random_unsorted_area(size, dtype, int(match.group(1)) / 100.0, cls.AREA_SIZE)), - (r'random_bubble\_([0-9]+)\_fold', - lambda match: cls._type_random_unsorted_area(size, dtype, int(match.group(1)) * cls.BUBBLE_SIZE / size, cls.BUBBLE_SIZE)), - ] - - for pattern, function in token_specification: - match = re.fullmatch(pattern, array_type) - - if match is not None: - return function(match) - - raise ValueError("Incorrect array_type specified.") + def random_bubble(cls, size, dtype, bubble_num, bubble_size=None): + """ + This type of array has ``bubble_num`` random unsorted areas. + """ + if bubble_size is None: + bubble_size = cls.BUBBLE_SIZE + frac = bubble_size * bubble_num / size + + return cls.random_unsorted_area(size, dtype, frac, bubble_size) class Sort(Benchmark): @@ -187,26 +192,6 @@ class Sort(Benchmark): This benchmark tests sorting performance with several different types of arrays that are likely to appear in real-world applications. - - ordered - Perfectly ordered array - reversed - A reversed array - uniform - A array where all values are the same - sorted_block_X - An array that is entirely composed of sorted blocks - of size X - swapped_pair_X_percent - An array which has random pairs swapped. The number - of swapped pairs is X% of the size of the array - random_unsorted_area_X_percent - This kind of array has random unsorted areas which - take up X% of the original array, the size of one - area is SortGenerator.AREA_SIZE - random_bubble_X_fold - Same as random_unsorted_area, except that it has - X areas of size SortGenerator.BUBBLE_SIZE """ params = [ # In NumPy 1.17 and newer, 'merge' can be one of several @@ -214,21 +199,21 @@ class Sort(Benchmark): ['quick', 'merge', 'heap'], ['float64', 'int64', 'uint64'], [ - 'ordered', - 'reversed', - 'uniform', - 'sorted_block_10', - 'sorted_block_100', - 'sorted_block_1000', - 'swapped_pair_1_percent', - 'swapped_pair_10_percent', - 'swapped_pair_50_percent', - 'random_unsorted_area_50_percent', - 'random_unsorted_area_10_percent', - 'random_unsorted_area_1_percent', - 'random_bubble_1_fold', - 'random_bubble_5_fold', - 'random_bubble_10_fold', + ('ordered',), + ('reversed',), + ('uniform',), + ('sorted_block', 10), + ('sorted_block', 100), + ('sorted_block', 1000), + ('swapped_pair', 0.01), + ('swapped_pair', 0.1), + ('swapped_pair', 0.5), + ('random_unsorted_area', 0.5), + ('random_unsorted_area', 0.1), + ('random_unsorted_area', 0.01), + ('random_bubble', 1), + ('random_bubble', 5), + ('random_bubble', 10), ], ] param_names = ['kind', 'dtype', 'array_type'] @@ -238,7 +223,8 @@ class Sort(Benchmark): def setup(self, kind, dtype, array_type): np.random.seed(1234) - self.arr = SortGenerator.get_array(array_type, self.ARRAY_SIZE, dtype) + array_class = array_type[0] + self.arr = getattr(SortGenerator, array_class)(self.ARRAY_SIZE, dtype, *array_type[1:]) def time_sort_inplace(self, kind, dtype, array_type): self.arr.sort(kind=kind) -- cgit v1.2.1 From 19770c759827eef033b07d94c942aabd5ee5bcbf Mon Sep 17 00:00:00 2001 From: Hameer Abbasi Date: Mon, 25 Feb 2019 02:59:07 +0100 Subject: Minor typo. [ci skip] --- benchmarks/benchmarks/bench_function_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'benchmarks') diff --git a/benchmarks/benchmarks/bench_function_base.py b/benchmarks/benchmarks/bench_function_base.py index 64b36f7c4..07ada2bbb 100644 --- a/benchmarks/benchmarks/bench_function_base.py +++ b/benchmarks/benchmarks/bench_function_base.py @@ -162,7 +162,7 @@ class SortGenerator(object): def random_unsorted_area(cls, size, dtype, frac, area_size=None): """ This type of array has random unsorted areas such that they - compose ``frac`` percent of the original array. + compose the fraction ``frac`` of the original array. """ if area_size is None: area_size = cls.AREA_SIZE -- cgit v1.2.1