summaryrefslogtreecommitdiff
path: root/numpy/lib/arraysetops.py
diff options
context:
space:
mode:
authorMilesCranmer <miles.cranmer@gmail.com>2022-06-20 14:21:07 -0400
committerMilesCranmer <miles.cranmer@gmail.com>2022-06-20 14:21:07 -0400
commit48580947a5c6c30ef81aa49da2a9399d6b1c3b45 (patch)
tree4e63894a4f3fcea6325a644b3c3360c95f8db747 /numpy/lib/arraysetops.py
parent75dbbea7d7111c151498722c0c2811cee86d34ff (diff)
downloadnumpy-48580947a5c6c30ef81aa49da2a9399d6b1c3b45.tar.gz
DOC: Notes on `kind` to in1d/isin docstring
Diffstat (limited to 'numpy/lib/arraysetops.py')
-rw-r--r--numpy/lib/arraysetops.py28
1 files changed, 18 insertions, 10 deletions
diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index a81e0af8b..8b95fcac9 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -556,11 +556,8 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, kind=None):
* If 'table', will use a key-dictionary approach similar
to a counting sort. This is only available for boolean and
integer arrays. This will have a memory usage of the
- size of `ar1` plus the max-min value of `ar2`. This tends
- to be the faster method if the following formula is true:
- ``log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927``,
- but may use greater memory. `assume_unique` has no effect
- when the 'table' option is used.
+ size of `ar1` plus the max-min value of `ar2`. `assume_unique`
+ has no effect when the 'table' option is used.
* If None, will automatically choose 'table' if
the required memory allocation is less than or equal to
6 times the sum of the sizes of `ar1` and `ar2`,
@@ -593,6 +590,13 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, kind=None):
``asarray(ar2)`` is an object array rather than the expected array of
contained values.
+ Using ``kind='table'`` tends to be faster than `kind='sort'` if the
+ following relationship is true:
+ ``log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927``,
+ but may use greater memory. The default value for `kind` will
+ be automatically selected based only on memory usage, so one may
+ manually set ``kind='table'`` if memory constraints can be relaxed.
+
.. versionadded:: 1.4.0
Examples
@@ -772,11 +776,8 @@ def isin(element, test_elements, assume_unique=False, invert=False,
* If 'table', will use a key-dictionary approach similar
to a counting sort. This is only available for boolean and
integer arrays. This will have a memory usage of the
- size of `ar1` plus the max-min value of `ar2`. This tends
- to be the faster method if the following formula is true:
- ``log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927``,
- but may use greater memory. `assume_unique` has no effect
- when the 'table' option is used.
+ size of `ar1` plus the max-min value of `ar2`. `assume_unique`
+ has no effect when the 'table' option is used.
* If None, will automatically choose 'table' if
the required memory allocation is less than or equal to
6 times the sum of the sizes of `ar1` and `ar2`,
@@ -812,6 +813,13 @@ def isin(element, test_elements, assume_unique=False, invert=False,
of the `array` constructor's way of handling non-sequence collections.
Converting the set to a list usually gives the desired behavior.
+ Using ``kind='table'`` tends to be faster than `kind='sort'` if the
+ following relationship is true:
+ ``log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927``,
+ but may use greater memory. The default value for `kind` will
+ be automatically selected based only on memory usage, so one may
+ manually set ``kind='table'`` if memory constraints can be relaxed.
+
.. versionadded:: 1.13.0
Examples