summaryrefslogtreecommitdiff
path: root/numpy
diff options
context:
space:
mode:
Diffstat (limited to 'numpy')
-rw-r--r--numpy/lib/arraysetops.py46
-rw-r--r--numpy/lib/tests/test_arraysetops.py52
2 files changed, 49 insertions, 49 deletions
diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index 5102a5d06..56080382c 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -545,27 +545,27 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, kind=None):
False where an element of `ar1` is in `ar2` and True otherwise).
Default is False. ``np.in1d(a, b, invert=True)`` is equivalent
to (but is faster than) ``np.invert(in1d(a, b))``.
- kind : {None, 'mergesort', 'dictionary'}, optional
+ kind : {None, 'sort', 'table'}, optional
The algorithm to use. This will not affect the final result,
but will affect the speed. Default will select automatically
based on memory considerations.
- * If 'mergesort', will use a mergesort-based approach. This will have
+ * If 'sort', will use a mergesort-based approach. This will have
a memory usage of roughly 6 times the sum of the sizes of
`ar1` and `ar2`, not accounting for size of dtypes.
- * If 'dictionary', will use a key-dictionary approach similar
+ * If 'table', will use a key-dictionary approach similar
to a counting sort. This is only available for boolean and
integer arrays. This will have a memory usage of the
size of `ar1` plus the max-min value of `ar2`. This tends
to be the faster method if the following formula is true:
``log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927``,
but may use greater memory.
- * If `None`, will automatically choose 'dictionary' if
+ * If `None`, will automatically choose 'table' if
the required memory allocation is less than or equal to
6 times the sum of the sizes of `ar1` and `ar2`,
- otherwise will use 'mergesort'. This is done to not use
+ otherwise will use 'sort'. This is done to not use
a large amount of memory by default, even though
- 'dictionary' may be faster in most cases.
+ 'table' may be faster in most cases.
.. versionadded:: 1.8.0
@@ -625,18 +625,18 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, kind=None):
integer_arrays = (np.issubdtype(ar1.dtype, np.integer) and
np.issubdtype(ar2.dtype, np.integer))
- if kind not in {None, 'mergesort', 'dictionary'}:
+ if kind not in {None, 'sort', 'table'}:
raise ValueError(
"Invalid kind: {0}. ".format(kind)
- + "Please use None, 'mergesort' or 'dictionary'.")
+ + "Please use None, 'sort' or 'table'.")
- if integer_arrays and kind in {None, 'dictionary'}:
+ if integer_arrays and kind in {None, 'table'}:
ar2_min = np.min(ar2)
ar2_max = np.max(ar2)
ar2_range = int(ar2_max) - int(ar2_min)
- # Constraints on whether we can actually use the dictionary method:
+ # Constraints on whether we can actually use the table method:
range_safe_from_overflow = ar2_range < np.iinfo(ar2.dtype).max
below_memory_constraint = ar2_range <= 6 * (ar1.size + ar2.size)
@@ -650,7 +650,7 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, kind=None):
if (
range_safe_from_overflow and
- (below_memory_constraint or kind == 'dictionary')
+ (below_memory_constraint or kind == 'table')
):
if invert:
@@ -672,18 +672,18 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, kind=None):
ar2_min]
return outgoing_array
- elif kind == 'dictionary': # not range_safe_from_overflow
+ elif kind == 'table': # not range_safe_from_overflow
raise RuntimeError(
- "You have specified kind='dictionary', "
+ "You have specified kind='table', "
"but the range of values in `ar2` exceeds the "
"maximum integer of the datatype. "
- "Please set `kind` to None or 'mergesort'."
+ "Please set `kind` to None or 'sort'."
)
- elif kind == 'dictionary':
+ elif kind == 'table':
raise ValueError(
- "The 'dictionary' method is only "
+ "The 'table' method is only "
"supported for boolean or integer arrays. "
- "Please select 'mergesort' or None for kind."
+ "Please select 'sort' or None for kind."
)
@@ -759,27 +759,27 @@ def isin(element, test_elements, assume_unique=False, invert=False,
calculating `element not in test_elements`. Default is False.
``np.isin(a, b, invert=True)`` is equivalent to (but faster
than) ``np.invert(np.isin(a, b))``.
- kind : {None, 'mergesort', 'dictionary'}, optional
+ kind : {None, 'sort', 'table'}, optional
The algorithm to use. This will not affect the final result,
but will affect the speed. Default will select automatically
based on memory considerations.
- * If 'mergesort', will use a mergesort-based approach. This will have
+ * If 'sort', will use a mergesort-based approach. This will have
a memory usage of roughly 6 times the sum of the sizes of
`ar1` and `ar2`, not accounting for size of dtypes.
- * If 'dictionary', will use a key-dictionary approach similar
+ * If 'table', will use a key-dictionary approach similar
to a counting sort. This is only available for boolean and
integer arrays. This will have a memory usage of the
size of `ar1` plus the max-min value of `ar2`. This tends
to be the faster method if the following formula is true:
``log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927``,
but may use greater memory.
- * If `None`, will automatically choose 'dictionary' if
+ * If `None`, will automatically choose 'table' if
the required memory allocation is less than or equal to
6 times the sum of the sizes of `ar1` and `ar2`,
- otherwise will use 'mergesort'. This is done to not use
+ otherwise will use 'sort'. This is done to not use
a large amount of memory by default, even though
- 'dictionary' may be faster in most cases.
+ 'table' may be faster in most cases.
Returns
diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py
index 8b28fa9e5..57152e4d5 100644
--- a/numpy/lib/tests/test_arraysetops.py
+++ b/numpy/lib/tests/test_arraysetops.py
@@ -205,7 +205,7 @@ class TestSetOps:
isin_slow = np.vectorize(_isin_slow, otypes=[bool], excluded={1})
def assert_isin_equal(a, b, old_algorithm=None):
- kind = "mergesort" if old_algorithm else None
+ kind = "sort" if old_algorithm else None
x = isin(a, b, kind=kind)
y = isin_slow(a, b)
assert_array_equal(x, y)
@@ -258,21 +258,21 @@ class TestSetOps:
ec = np.array([True, False, True, True])
c = in1d(a, b, assume_unique=True)
assert_array_equal(c, ec)
- c = in1d(a, b, assume_unique=True, kind="mergesort")
+ c = in1d(a, b, assume_unique=True, kind="sort")
assert_array_equal(c, ec)
a[0] = 8
ec = np.array([False, False, True, True])
c = in1d(a, b, assume_unique=True)
assert_array_equal(c, ec)
- c = in1d(a, b, assume_unique=True, kind="mergesort")
+ c = in1d(a, b, assume_unique=True, kind="sort")
assert_array_equal(c, ec)
a[0], a[3] = 4, 8
ec = np.array([True, False, True, False])
c = in1d(a, b, assume_unique=True)
assert_array_equal(c, ec)
- c = in1d(a, b, assume_unique=True, kind="mergesort")
+ c = in1d(a, b, assume_unique=True, kind="sort")
assert_array_equal(c, ec)
a = np.array([5, 4, 5, 3, 4, 4, 3, 4, 3, 5, 2, 1, 5, 5])
@@ -281,7 +281,7 @@ class TestSetOps:
False, True, False, False, False]
c = in1d(a, b)
assert_array_equal(c, ec)
- c = in1d(a, b, kind="mergesort")
+ c = in1d(a, b, kind="sort")
assert_array_equal(c, ec)
b = b + [5, 5, 4] * mult
@@ -289,7 +289,7 @@ class TestSetOps:
True, False, True, True]
c = in1d(a, b)
assert_array_equal(c, ec)
- c = in1d(a, b, kind="mergesort")
+ c = in1d(a, b, kind="sort")
assert_array_equal(c, ec)
a = np.array([5, 7, 1, 2])
@@ -297,7 +297,7 @@ class TestSetOps:
ec = np.array([True, False, True, True])
c = in1d(a, b)
assert_array_equal(c, ec)
- c = in1d(a, b, kind="mergesort")
+ c = in1d(a, b, kind="sort")
assert_array_equal(c, ec)
a = np.array([5, 7, 1, 1, 2])
@@ -305,7 +305,7 @@ class TestSetOps:
ec = np.array([True, False, True, True, True])
c = in1d(a, b)
assert_array_equal(c, ec)
- c = in1d(a, b, kind="mergesort")
+ c = in1d(a, b, kind="sort")
assert_array_equal(c, ec)
a = np.array([5, 5])
@@ -313,7 +313,7 @@ class TestSetOps:
ec = np.array([False, False])
c = in1d(a, b)
assert_array_equal(c, ec)
- c = in1d(a, b, kind="mergesort")
+ c = in1d(a, b, kind="sort")
assert_array_equal(c, ec)
a = np.array([5])
@@ -321,7 +321,7 @@ class TestSetOps:
ec = np.array([False])
c = in1d(a, b)
assert_array_equal(c, ec)
- c = in1d(a, b, kind="mergesort")
+ c = in1d(a, b, kind="sort")
assert_array_equal(c, ec)
assert_array_equal(in1d([], []), [])
@@ -413,7 +413,7 @@ class TestSetOps:
b = [2, 3, 4] * mult
assert_array_equal(np.invert(in1d(a, b)), in1d(a, b, invert=True))
assert_array_equal(np.invert(in1d(a, b)),
- in1d(a, b, invert=True, kind="mergesort"))
+ in1d(a, b, invert=True, kind="sort"))
for mult in (1, 10):
a = np.array([5, 4, 5, 3, 4, 4, 3, 4, 3, 5, 2, 1, 5, 5],
@@ -422,7 +422,7 @@ class TestSetOps:
b = np.array(b, dtype=np.float32)
assert_array_equal(np.invert(in1d(a, b)), in1d(a, b, invert=True))
assert_array_equal(np.invert(in1d(a, b)),
- in1d(a, b, invert=True, kind="mergesort"))
+ in1d(a, b, invert=True, kind="sort"))
def test_in1d_ravel(self):
# Test that in1d ravels its input arrays. This is not documented
@@ -436,22 +436,22 @@ class TestSetOps:
assert_array_equal(in1d(a, b, assume_unique=False), ec)
assert_array_equal(in1d(a, long_b, assume_unique=True), ec)
assert_array_equal(in1d(a, long_b, assume_unique=False), ec)
- assert_array_equal(in1d(a, b, assume_unique=True, kind="mergesort"),
+ assert_array_equal(in1d(a, b, assume_unique=True, kind="sort"),
ec)
assert_array_equal(in1d(a, b, assume_unique=False,
- kind="mergesort"),
+ kind="sort"),
ec)
assert_array_equal(in1d(a, long_b, assume_unique=True,
- kind="mergesort"),
+ kind="sort"),
ec)
assert_array_equal(in1d(a, long_b, assume_unique=False,
- kind="mergesort"),
+ kind="sort"),
ec)
def test_in1d_hit_alternate_algorithm(self):
"""Hit the standard isin code with integers"""
# Need extreme range to hit standard code
- # This hits it without the use of kind='dictionary'
+ # This hits it without the use of kind='table'
a = np.array([5, 4, 5, 3, 4, 4, 1e9], dtype=np.int64)
b = np.array([2, 3, 4, 1e9], dtype=np.int64)
expected = np.array([0, 1, 0, 1, 1, 1, 1], dtype=bool)
@@ -472,11 +472,11 @@ class TestSetOps:
assert_array_equal(expected,
in1d(a, b))
assert_array_equal(expected,
- in1d(a, b, kind="mergesort"))
+ in1d(a, b, kind="sort"))
assert_array_equal(np.invert(expected),
in1d(a, b, invert=True))
assert_array_equal(np.invert(expected),
- in1d(a, b, invert=True, kind="mergesort"))
+ in1d(a, b, invert=True, kind="sort"))
def test_in1d_first_array_is_object(self):
ar1 = [None]
@@ -545,35 +545,35 @@ class TestSetOps:
def test_in1d_errors(self):
"""Test that in1d raises expected errors."""
- # Error 1: `kind` is not one of 'mergesort' 'dictionary' or None.
+ # Error 1: `kind` is not one of 'sort' 'table' or None.
ar1 = np.array([1, 2, 3, 4, 5])
ar2 = np.array([2, 4, 6, 8, 10])
assert_raises(ValueError, in1d, ar1, ar2, kind='quicksort')
- # Error 2: `kind="dictionary"` does not work for non-integral arrays.
+ # Error 2: `kind="table"` does not work for non-integral arrays.
obj_ar1 = np.array([1, 'a', 3, 'b', 5], dtype=object)
obj_ar2 = np.array([1, 'a', 3, 'b', 5], dtype=object)
- assert_raises(ValueError, in1d, obj_ar1, obj_ar2, kind='dictionary')
+ assert_raises(ValueError, in1d, obj_ar1, obj_ar2, kind='table')
for dtype in [np.int32, np.int64]:
ar1 = np.array([-1, 2, 3, 4, 5], dtype=dtype)
# The range of this array will overflow:
overflow_ar2 = np.array([-1, np.iinfo(dtype).max], dtype=dtype)
- # Error 3: `kind="dictionary"` will trigger a runtime error
+ # Error 3: `kind="table"` will trigger a runtime error
# if there is an integer overflow expected when computing the
# range of ar2
assert_raises(
RuntimeError,
- in1d, ar1, overflow_ar2, kind='dictionary'
+ in1d, ar1, overflow_ar2, kind='table'
)
# Non-error: `kind=None` will *not* trigger a runtime error
# if there is an integer overflow, it will switch to
- # the `mergesort` algorithm.
+ # the `sort` algorithm.
result = np.in1d(ar1, overflow_ar2, kind=None)
assert_array_equal(result, [True] + [False] * 4)
- result = np.in1d(ar1, overflow_ar2, kind='mergesort')
+ result = np.in1d(ar1, overflow_ar2, kind='sort')
assert_array_equal(result, [True] + [False] * 4)
def test_union1d(self):