summaryrefslogtreecommitdiff
path: root/numpy/lib/arraysetops.py
diff options
context:
space:
mode:
Diffstat (limited to 'numpy/lib/arraysetops.py')
-rw-r--r--numpy/lib/arraysetops.py74
1 files changed, 51 insertions, 23 deletions
diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index 22687b941..bd56b6975 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -1,28 +1,17 @@
"""
Set operations for arrays based on sorting.
-:Contains:
- unique,
- isin,
- ediff1d,
- intersect1d,
- setxor1d,
- in1d,
- union1d,
- setdiff1d
-
-:Notes:
+Notes
+-----
For floating point arrays, inaccurate results may appear due to usual round-off
and floating point comparison issues.
Speed could be gained in some operations by an implementation of
-sort(), that can provide directly the permutation vectors, avoiding
-thus calls to argsort().
+`numpy.sort`, that can provide directly the permutation vectors, thus avoiding
+calls to `numpy.argsort`.
-To do: Optionally return indices analogously to unique for all functions.
-
-:Author: Robert Cimrman
+Original author: Robert Cimrman
"""
import functools
@@ -104,7 +93,7 @@ def ediff1d(ary, to_end=None, to_begin=None):
else:
to_begin = np.asanyarray(to_begin)
if not np.can_cast(to_begin, dtype_req, casting="same_kind"):
- raise TypeError("dtype of `to_end` must be compatible "
+ raise TypeError("dtype of `to_begin` must be compatible "
"with input `ary` under the `same_kind` rule.")
to_begin = to_begin.ravel()
@@ -206,6 +195,7 @@ def unique(ar, return_index=False, return_inverse=False,
--------
numpy.lib.arraysetops : Module with a number of other functions for
performing set operations on arrays.
+ repeat : Repeat elements of an array.
Notes
-----
@@ -219,6 +209,16 @@ def unique(ar, return_index=False, return_inverse=False,
flattened subarrays are sorted in lexicographic order starting with the
first element.
+ .. versionchanged: NumPy 1.21
+ If nan values are in the input array, a single nan is put
+ to the end of the sorted unique values.
+
+ Also for complex arrays all NaN values are considered equivalent
+ (no matter whether the NaN is in the real or imaginary part).
+ As the representant for the returned array the smallest one in the
+ lexicographical order is chosen - see np.sort for how the lexicographical
+ order is defined for complex arrays.
+
Examples
--------
>>> np.unique([1, 1, 2, 2, 3, 3])
@@ -244,7 +244,7 @@ def unique(ar, return_index=False, return_inverse=False,
>>> a[indices]
array(['a', 'b', 'c'], dtype='<U1')
- Reconstruct the input array from the unique values:
+ Reconstruct the input array from the unique values and inverse:
>>> a = np.array([1, 2, 6, 4, 2, 3, 2])
>>> u, indices = np.unique(a, return_inverse=True)
@@ -255,6 +255,17 @@ def unique(ar, return_index=False, return_inverse=False,
>>> u[indices]
array([1, 2, 6, 4, 2, 3, 2])
+ Reconstruct the input values from the unique values and counts:
+
+ >>> a = np.array([1, 2, 6, 4, 2, 3, 2])
+ >>> values, counts = np.unique(a, return_counts=True)
+ >>> values
+ array([1, 2, 3, 4, 6])
+ >>> counts
+ array([1, 3, 1, 1, 1])
+ >>> np.repeat(values, counts)
+ array([1, 2, 2, 2, 3, 4, 6]) # original order not preserved
+
"""
ar = np.asanyarray(ar)
if axis is None:
@@ -266,7 +277,7 @@ def unique(ar, return_index=False, return_inverse=False,
ar = np.moveaxis(ar, axis, 0)
except np.AxisError:
# this removes the "axis1" or "axis2" prefix from the error message
- raise np.AxisError(axis, ar.ndim)
+ raise np.AxisError(axis, ar.ndim) from None
# Must reshape to a contiguous 2D array for this to work...
orig_shape, orig_dtype = ar.shape, ar.dtype
@@ -288,10 +299,10 @@ def unique(ar, return_index=False, return_inverse=False,
# array with shape `(len(ar),)`. Because `dtype` in this case has
# itemsize 0, the total size of the result is still 0 bytes.
consolidated = np.empty(len(ar), dtype=dtype)
- except TypeError:
+ except TypeError as e:
# There's no good way to do this for object arrays, etc...
msg = 'The axis argument to unique is not supported for dtype {dt}'
- raise TypeError(msg.format(dt=ar.dtype))
+ raise TypeError(msg.format(dt=ar.dtype)) from e
def reshape_uniq(uniq):
n = len(uniq)
@@ -323,7 +334,18 @@ def _unique1d(ar, return_index=False, return_inverse=False,
aux = ar
mask = np.empty(aux.shape, dtype=np.bool_)
mask[:1] = True
- mask[1:] = aux[1:] != aux[:-1]
+ if aux.shape[0] > 0 and aux.dtype.kind in "cfmM" and np.isnan(aux[-1]):
+ if aux.dtype.kind == "c": # for complex all NaNs are considered equivalent
+ aux_firstnan = np.searchsorted(np.isnan(aux), True, side='left')
+ else:
+ aux_firstnan = np.searchsorted(aux, aux[-1], side='left')
+ if aux_firstnan > 0:
+ mask[1:aux_firstnan] = (
+ aux[1:aux_firstnan] != aux[:aux_firstnan - 1])
+ mask[aux_firstnan] = True
+ mask[aux_firstnan + 1:] = False
+ else:
+ mask[1:] = aux[1:] != aux[:-1]
ret = (aux[mask],)
if return_index:
@@ -357,7 +379,9 @@ def intersect1d(ar1, ar2, assume_unique=False, return_indices=False):
Input arrays. Will be flattened if not already 1D.
assume_unique : bool
If True, the input arrays are both assumed to be unique, which
- can speed up the calculation. Default is False.
+ can speed up the calculation. If True but ``ar1`` or ``ar2`` are not
+ unique, incorrect results and out-of-bounds indices could result.
+ Default is False.
return_indices : bool
If True, the indices which correspond to the intersection of the two
arrays are returned. The first instance of a value is used if there are
@@ -562,6 +586,10 @@ def in1d(ar1, ar2, assume_unique=False, invert=False):
ar1 = np.asarray(ar1).ravel()
ar2 = np.asarray(ar2).ravel()
+ # Ensure that iteration through object arrays yields size-1 arrays
+ if ar2.dtype == object:
+ ar2 = ar2.reshape(-1, 1)
+
# Check if one of the arrays may contain arbitrary objects
contains_object = ar1.dtype.hasobject or ar2.dtype.hasobject