diff options
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/lib/arraysetops.py | 46 | ||||
-rw-r--r-- | numpy/lib/tests/test_arraysetops.py | 52 |
2 files changed, 49 insertions, 49 deletions
diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index 5102a5d06..56080382c 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -545,27 +545,27 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, kind=None): False where an element of `ar1` is in `ar2` and True otherwise). Default is False. ``np.in1d(a, b, invert=True)`` is equivalent to (but is faster than) ``np.invert(in1d(a, b))``. - kind : {None, 'mergesort', 'dictionary'}, optional + kind : {None, 'sort', 'table'}, optional The algorithm to use. This will not affect the final result, but will affect the speed. Default will select automatically based on memory considerations. - * If 'mergesort', will use a mergesort-based approach. This will have + * If 'sort', will use a mergesort-based approach. This will have a memory usage of roughly 6 times the sum of the sizes of `ar1` and `ar2`, not accounting for size of dtypes. - * If 'dictionary', will use a key-dictionary approach similar + * If 'table', will use a key-dictionary approach similar to a counting sort. This is only available for boolean and integer arrays. This will have a memory usage of the size of `ar1` plus the max-min value of `ar2`. This tends to be the faster method if the following formula is true: ``log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927``, but may use greater memory. - * If `None`, will automatically choose 'dictionary' if + * If `None`, will automatically choose 'table' if the required memory allocation is less than or equal to 6 times the sum of the sizes of `ar1` and `ar2`, - otherwise will use 'mergesort'. This is done to not use + otherwise will use 'sort'. This is done to not use a large amount of memory by default, even though - 'dictionary' may be faster in most cases. + 'table' may be faster in most cases. .. versionadded:: 1.8.0 @@ -625,18 +625,18 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, kind=None): integer_arrays = (np.issubdtype(ar1.dtype, np.integer) and np.issubdtype(ar2.dtype, np.integer)) - if kind not in {None, 'mergesort', 'dictionary'}: + if kind not in {None, 'sort', 'table'}: raise ValueError( "Invalid kind: {0}. ".format(kind) - + "Please use None, 'mergesort' or 'dictionary'.") + + "Please use None, 'sort' or 'table'.") - if integer_arrays and kind in {None, 'dictionary'}: + if integer_arrays and kind in {None, 'table'}: ar2_min = np.min(ar2) ar2_max = np.max(ar2) ar2_range = int(ar2_max) - int(ar2_min) - # Constraints on whether we can actually use the dictionary method: + # Constraints on whether we can actually use the table method: range_safe_from_overflow = ar2_range < np.iinfo(ar2.dtype).max below_memory_constraint = ar2_range <= 6 * (ar1.size + ar2.size) @@ -650,7 +650,7 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, kind=None): if ( range_safe_from_overflow and - (below_memory_constraint or kind == 'dictionary') + (below_memory_constraint or kind == 'table') ): if invert: @@ -672,18 +672,18 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, kind=None): ar2_min] return outgoing_array - elif kind == 'dictionary': # not range_safe_from_overflow + elif kind == 'table': # not range_safe_from_overflow raise RuntimeError( - "You have specified kind='dictionary', " + "You have specified kind='table', " "but the range of values in `ar2` exceeds the " "maximum integer of the datatype. " - "Please set `kind` to None or 'mergesort'." + "Please set `kind` to None or 'sort'." ) - elif kind == 'dictionary': + elif kind == 'table': raise ValueError( - "The 'dictionary' method is only " + "The 'table' method is only " "supported for boolean or integer arrays. " - "Please select 'mergesort' or None for kind." + "Please select 'sort' or None for kind." ) @@ -759,27 +759,27 @@ def isin(element, test_elements, assume_unique=False, invert=False, calculating `element not in test_elements`. Default is False. ``np.isin(a, b, invert=True)`` is equivalent to (but faster than) ``np.invert(np.isin(a, b))``. - kind : {None, 'mergesort', 'dictionary'}, optional + kind : {None, 'sort', 'table'}, optional The algorithm to use. This will not affect the final result, but will affect the speed. Default will select automatically based on memory considerations. - * If 'mergesort', will use a mergesort-based approach. This will have + * If 'sort', will use a mergesort-based approach. This will have a memory usage of roughly 6 times the sum of the sizes of `ar1` and `ar2`, not accounting for size of dtypes. - * If 'dictionary', will use a key-dictionary approach similar + * If 'table', will use a key-dictionary approach similar to a counting sort. This is only available for boolean and integer arrays. This will have a memory usage of the size of `ar1` plus the max-min value of `ar2`. This tends to be the faster method if the following formula is true: ``log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927``, but may use greater memory. - * If `None`, will automatically choose 'dictionary' if + * If `None`, will automatically choose 'table' if the required memory allocation is less than or equal to 6 times the sum of the sizes of `ar1` and `ar2`, - otherwise will use 'mergesort'. This is done to not use + otherwise will use 'sort'. This is done to not use a large amount of memory by default, even though - 'dictionary' may be faster in most cases. + 'table' may be faster in most cases. Returns diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py index 8b28fa9e5..57152e4d5 100644 --- a/numpy/lib/tests/test_arraysetops.py +++ b/numpy/lib/tests/test_arraysetops.py @@ -205,7 +205,7 @@ class TestSetOps: isin_slow = np.vectorize(_isin_slow, otypes=[bool], excluded={1}) def assert_isin_equal(a, b, old_algorithm=None): - kind = "mergesort" if old_algorithm else None + kind = "sort" if old_algorithm else None x = isin(a, b, kind=kind) y = isin_slow(a, b) assert_array_equal(x, y) @@ -258,21 +258,21 @@ class TestSetOps: ec = np.array([True, False, True, True]) c = in1d(a, b, assume_unique=True) assert_array_equal(c, ec) - c = in1d(a, b, assume_unique=True, kind="mergesort") + c = in1d(a, b, assume_unique=True, kind="sort") assert_array_equal(c, ec) a[0] = 8 ec = np.array([False, False, True, True]) c = in1d(a, b, assume_unique=True) assert_array_equal(c, ec) - c = in1d(a, b, assume_unique=True, kind="mergesort") + c = in1d(a, b, assume_unique=True, kind="sort") assert_array_equal(c, ec) a[0], a[3] = 4, 8 ec = np.array([True, False, True, False]) c = in1d(a, b, assume_unique=True) assert_array_equal(c, ec) - c = in1d(a, b, assume_unique=True, kind="mergesort") + c = in1d(a, b, assume_unique=True, kind="sort") assert_array_equal(c, ec) a = np.array([5, 4, 5, 3, 4, 4, 3, 4, 3, 5, 2, 1, 5, 5]) @@ -281,7 +281,7 @@ class TestSetOps: False, True, False, False, False] c = in1d(a, b) assert_array_equal(c, ec) - c = in1d(a, b, kind="mergesort") + c = in1d(a, b, kind="sort") assert_array_equal(c, ec) b = b + [5, 5, 4] * mult @@ -289,7 +289,7 @@ class TestSetOps: True, False, True, True] c = in1d(a, b) assert_array_equal(c, ec) - c = in1d(a, b, kind="mergesort") + c = in1d(a, b, kind="sort") assert_array_equal(c, ec) a = np.array([5, 7, 1, 2]) @@ -297,7 +297,7 @@ class TestSetOps: ec = np.array([True, False, True, True]) c = in1d(a, b) assert_array_equal(c, ec) - c = in1d(a, b, kind="mergesort") + c = in1d(a, b, kind="sort") assert_array_equal(c, ec) a = np.array([5, 7, 1, 1, 2]) @@ -305,7 +305,7 @@ class TestSetOps: ec = np.array([True, False, True, True, True]) c = in1d(a, b) assert_array_equal(c, ec) - c = in1d(a, b, kind="mergesort") + c = in1d(a, b, kind="sort") assert_array_equal(c, ec) a = np.array([5, 5]) @@ -313,7 +313,7 @@ class TestSetOps: ec = np.array([False, False]) c = in1d(a, b) assert_array_equal(c, ec) - c = in1d(a, b, kind="mergesort") + c = in1d(a, b, kind="sort") assert_array_equal(c, ec) a = np.array([5]) @@ -321,7 +321,7 @@ class TestSetOps: ec = np.array([False]) c = in1d(a, b) assert_array_equal(c, ec) - c = in1d(a, b, kind="mergesort") + c = in1d(a, b, kind="sort") assert_array_equal(c, ec) assert_array_equal(in1d([], []), []) @@ -413,7 +413,7 @@ class TestSetOps: b = [2, 3, 4] * mult assert_array_equal(np.invert(in1d(a, b)), in1d(a, b, invert=True)) assert_array_equal(np.invert(in1d(a, b)), - in1d(a, b, invert=True, kind="mergesort")) + in1d(a, b, invert=True, kind="sort")) for mult in (1, 10): a = np.array([5, 4, 5, 3, 4, 4, 3, 4, 3, 5, 2, 1, 5, 5], @@ -422,7 +422,7 @@ class TestSetOps: b = np.array(b, dtype=np.float32) assert_array_equal(np.invert(in1d(a, b)), in1d(a, b, invert=True)) assert_array_equal(np.invert(in1d(a, b)), - in1d(a, b, invert=True, kind="mergesort")) + in1d(a, b, invert=True, kind="sort")) def test_in1d_ravel(self): # Test that in1d ravels its input arrays. This is not documented @@ -436,22 +436,22 @@ class TestSetOps: assert_array_equal(in1d(a, b, assume_unique=False), ec) assert_array_equal(in1d(a, long_b, assume_unique=True), ec) assert_array_equal(in1d(a, long_b, assume_unique=False), ec) - assert_array_equal(in1d(a, b, assume_unique=True, kind="mergesort"), + assert_array_equal(in1d(a, b, assume_unique=True, kind="sort"), ec) assert_array_equal(in1d(a, b, assume_unique=False, - kind="mergesort"), + kind="sort"), ec) assert_array_equal(in1d(a, long_b, assume_unique=True, - kind="mergesort"), + kind="sort"), ec) assert_array_equal(in1d(a, long_b, assume_unique=False, - kind="mergesort"), + kind="sort"), ec) def test_in1d_hit_alternate_algorithm(self): """Hit the standard isin code with integers""" # Need extreme range to hit standard code - # This hits it without the use of kind='dictionary' + # This hits it without the use of kind='table' a = np.array([5, 4, 5, 3, 4, 4, 1e9], dtype=np.int64) b = np.array([2, 3, 4, 1e9], dtype=np.int64) expected = np.array([0, 1, 0, 1, 1, 1, 1], dtype=bool) @@ -472,11 +472,11 @@ class TestSetOps: assert_array_equal(expected, in1d(a, b)) assert_array_equal(expected, - in1d(a, b, kind="mergesort")) + in1d(a, b, kind="sort")) assert_array_equal(np.invert(expected), in1d(a, b, invert=True)) assert_array_equal(np.invert(expected), - in1d(a, b, invert=True, kind="mergesort")) + in1d(a, b, invert=True, kind="sort")) def test_in1d_first_array_is_object(self): ar1 = [None] @@ -545,35 +545,35 @@ class TestSetOps: def test_in1d_errors(self): """Test that in1d raises expected errors.""" - # Error 1: `kind` is not one of 'mergesort' 'dictionary' or None. + # Error 1: `kind` is not one of 'sort' 'table' or None. ar1 = np.array([1, 2, 3, 4, 5]) ar2 = np.array([2, 4, 6, 8, 10]) assert_raises(ValueError, in1d, ar1, ar2, kind='quicksort') - # Error 2: `kind="dictionary"` does not work for non-integral arrays. + # Error 2: `kind="table"` does not work for non-integral arrays. obj_ar1 = np.array([1, 'a', 3, 'b', 5], dtype=object) obj_ar2 = np.array([1, 'a', 3, 'b', 5], dtype=object) - assert_raises(ValueError, in1d, obj_ar1, obj_ar2, kind='dictionary') + assert_raises(ValueError, in1d, obj_ar1, obj_ar2, kind='table') for dtype in [np.int32, np.int64]: ar1 = np.array([-1, 2, 3, 4, 5], dtype=dtype) # The range of this array will overflow: overflow_ar2 = np.array([-1, np.iinfo(dtype).max], dtype=dtype) - # Error 3: `kind="dictionary"` will trigger a runtime error + # Error 3: `kind="table"` will trigger a runtime error # if there is an integer overflow expected when computing the # range of ar2 assert_raises( RuntimeError, - in1d, ar1, overflow_ar2, kind='dictionary' + in1d, ar1, overflow_ar2, kind='table' ) # Non-error: `kind=None` will *not* trigger a runtime error # if there is an integer overflow, it will switch to - # the `mergesort` algorithm. + # the `sort` algorithm. result = np.in1d(ar1, overflow_ar2, kind=None) assert_array_equal(result, [True] + [False] * 4) - result = np.in1d(ar1, overflow_ar2, kind='mergesort') + result = np.in1d(ar1, overflow_ar2, kind='sort') assert_array_equal(result, [True] + [False] * 4) def test_union1d(self): |