diff options
author | MilesCranmer <miles.cranmer@gmail.com> | 2022-06-20 14:21:07 -0400 |
---|---|---|
committer | MilesCranmer <miles.cranmer@gmail.com> | 2022-06-20 14:21:07 -0400 |
commit | 48580947a5c6c30ef81aa49da2a9399d6b1c3b45 (patch) | |
tree | 4e63894a4f3fcea6325a644b3c3360c95f8db747 /numpy/lib/arraysetops.py | |
parent | 75dbbea7d7111c151498722c0c2811cee86d34ff (diff) | |
download | numpy-48580947a5c6c30ef81aa49da2a9399d6b1c3b45.tar.gz |
DOC: Notes on `kind` to in1d/isin docstring
Diffstat (limited to 'numpy/lib/arraysetops.py')
-rw-r--r-- | numpy/lib/arraysetops.py | 28 |
1 files changed, 18 insertions, 10 deletions
diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index a81e0af8b..8b95fcac9 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -556,11 +556,8 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, kind=None): * If 'table', will use a key-dictionary approach similar to a counting sort. This is only available for boolean and integer arrays. This will have a memory usage of the - size of `ar1` plus the max-min value of `ar2`. This tends - to be the faster method if the following formula is true: - ``log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927``, - but may use greater memory. `assume_unique` has no effect - when the 'table' option is used. + size of `ar1` plus the max-min value of `ar2`. `assume_unique` + has no effect when the 'table' option is used. * If None, will automatically choose 'table' if the required memory allocation is less than or equal to 6 times the sum of the sizes of `ar1` and `ar2`, @@ -593,6 +590,13 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, kind=None): ``asarray(ar2)`` is an object array rather than the expected array of contained values. + Using ``kind='table'`` tends to be faster than `kind='sort'` if the + following relationship is true: + ``log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927``, + but may use greater memory. The default value for `kind` will + be automatically selected based only on memory usage, so one may + manually set ``kind='table'`` if memory constraints can be relaxed. + .. versionadded:: 1.4.0 Examples @@ -772,11 +776,8 @@ def isin(element, test_elements, assume_unique=False, invert=False, * If 'table', will use a key-dictionary approach similar to a counting sort. This is only available for boolean and integer arrays. This will have a memory usage of the - size of `ar1` plus the max-min value of `ar2`. This tends - to be the faster method if the following formula is true: - ``log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927``, - but may use greater memory. `assume_unique` has no effect - when the 'table' option is used. + size of `ar1` plus the max-min value of `ar2`. `assume_unique` + has no effect when the 'table' option is used. * If None, will automatically choose 'table' if the required memory allocation is less than or equal to 6 times the sum of the sizes of `ar1` and `ar2`, @@ -812,6 +813,13 @@ def isin(element, test_elements, assume_unique=False, invert=False, of the `array` constructor's way of handling non-sequence collections. Converting the set to a list usually gives the desired behavior. + Using ``kind='table'`` tends to be faster than `kind='sort'` if the + following relationship is true: + ``log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927``, + but may use greater memory. The default value for `kind` will + be automatically selected based only on memory usage, so one may + manually set ``kind='table'`` if memory constraints can be relaxed. + .. versionadded:: 1.13.0 Examples |