diff options
author | Devin Shanahan <dshanahan88@gmail.com> | 2022-01-16 05:12:59 -0700 |
---|---|---|
committer | Devin Shanahan <dshanahan88@gmail.com> | 2022-01-16 05:12:59 -0700 |
commit | 0f66b6032a2c5039007d5041398561b452ddabef (patch) | |
tree | d030c1ae812e138f74a29b280cddde376d821ab8 /numpy/lib/arraysetops.py | |
parent | 5a52c717fe45c7c6bdc3d20b178a00bffbe9e24e (diff) | |
parent | 7191d9a4773d77205349ac151f84b72c0ffcf848 (diff) | |
download | numpy-0f66b6032a2c5039007d5041398561b452ddabef.tar.gz |
MAINT: Merge branch 'main' into delete-speedup
Diffstat (limited to 'numpy/lib/arraysetops.py')
-rw-r--r-- | numpy/lib/arraysetops.py | 74 |
1 files changed, 51 insertions, 23 deletions
diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index 22687b941..bd56b6975 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -1,28 +1,17 @@ """ Set operations for arrays based on sorting. -:Contains: - unique, - isin, - ediff1d, - intersect1d, - setxor1d, - in1d, - union1d, - setdiff1d - -:Notes: +Notes +----- For floating point arrays, inaccurate results may appear due to usual round-off and floating point comparison issues. Speed could be gained in some operations by an implementation of -sort(), that can provide directly the permutation vectors, avoiding -thus calls to argsort(). +`numpy.sort`, that can provide directly the permutation vectors, thus avoiding +calls to `numpy.argsort`. -To do: Optionally return indices analogously to unique for all functions. - -:Author: Robert Cimrman +Original author: Robert Cimrman """ import functools @@ -104,7 +93,7 @@ def ediff1d(ary, to_end=None, to_begin=None): else: to_begin = np.asanyarray(to_begin) if not np.can_cast(to_begin, dtype_req, casting="same_kind"): - raise TypeError("dtype of `to_end` must be compatible " + raise TypeError("dtype of `to_begin` must be compatible " "with input `ary` under the `same_kind` rule.") to_begin = to_begin.ravel() @@ -206,6 +195,7 @@ def unique(ar, return_index=False, return_inverse=False, -------- numpy.lib.arraysetops : Module with a number of other functions for performing set operations on arrays. + repeat : Repeat elements of an array. Notes ----- @@ -219,6 +209,16 @@ def unique(ar, return_index=False, return_inverse=False, flattened subarrays are sorted in lexicographic order starting with the first element. + .. versionchanged: NumPy 1.21 + If nan values are in the input array, a single nan is put + to the end of the sorted unique values. + + Also for complex arrays all NaN values are considered equivalent + (no matter whether the NaN is in the real or imaginary part). + As the representant for the returned array the smallest one in the + lexicographical order is chosen - see np.sort for how the lexicographical + order is defined for complex arrays. + Examples -------- >>> np.unique([1, 1, 2, 2, 3, 3]) @@ -244,7 +244,7 @@ def unique(ar, return_index=False, return_inverse=False, >>> a[indices] array(['a', 'b', 'c'], dtype='<U1') - Reconstruct the input array from the unique values: + Reconstruct the input array from the unique values and inverse: >>> a = np.array([1, 2, 6, 4, 2, 3, 2]) >>> u, indices = np.unique(a, return_inverse=True) @@ -255,6 +255,17 @@ def unique(ar, return_index=False, return_inverse=False, >>> u[indices] array([1, 2, 6, 4, 2, 3, 2]) + Reconstruct the input values from the unique values and counts: + + >>> a = np.array([1, 2, 6, 4, 2, 3, 2]) + >>> values, counts = np.unique(a, return_counts=True) + >>> values + array([1, 2, 3, 4, 6]) + >>> counts + array([1, 3, 1, 1, 1]) + >>> np.repeat(values, counts) + array([1, 2, 2, 2, 3, 4, 6]) # original order not preserved + """ ar = np.asanyarray(ar) if axis is None: @@ -266,7 +277,7 @@ def unique(ar, return_index=False, return_inverse=False, ar = np.moveaxis(ar, axis, 0) except np.AxisError: # this removes the "axis1" or "axis2" prefix from the error message - raise np.AxisError(axis, ar.ndim) + raise np.AxisError(axis, ar.ndim) from None # Must reshape to a contiguous 2D array for this to work... orig_shape, orig_dtype = ar.shape, ar.dtype @@ -288,10 +299,10 @@ def unique(ar, return_index=False, return_inverse=False, # array with shape `(len(ar),)`. Because `dtype` in this case has # itemsize 0, the total size of the result is still 0 bytes. consolidated = np.empty(len(ar), dtype=dtype) - except TypeError: + except TypeError as e: # There's no good way to do this for object arrays, etc... msg = 'The axis argument to unique is not supported for dtype {dt}' - raise TypeError(msg.format(dt=ar.dtype)) + raise TypeError(msg.format(dt=ar.dtype)) from e def reshape_uniq(uniq): n = len(uniq) @@ -323,7 +334,18 @@ def _unique1d(ar, return_index=False, return_inverse=False, aux = ar mask = np.empty(aux.shape, dtype=np.bool_) mask[:1] = True - mask[1:] = aux[1:] != aux[:-1] + if aux.shape[0] > 0 and aux.dtype.kind in "cfmM" and np.isnan(aux[-1]): + if aux.dtype.kind == "c": # for complex all NaNs are considered equivalent + aux_firstnan = np.searchsorted(np.isnan(aux), True, side='left') + else: + aux_firstnan = np.searchsorted(aux, aux[-1], side='left') + if aux_firstnan > 0: + mask[1:aux_firstnan] = ( + aux[1:aux_firstnan] != aux[:aux_firstnan - 1]) + mask[aux_firstnan] = True + mask[aux_firstnan + 1:] = False + else: + mask[1:] = aux[1:] != aux[:-1] ret = (aux[mask],) if return_index: @@ -357,7 +379,9 @@ def intersect1d(ar1, ar2, assume_unique=False, return_indices=False): Input arrays. Will be flattened if not already 1D. assume_unique : bool If True, the input arrays are both assumed to be unique, which - can speed up the calculation. Default is False. + can speed up the calculation. If True but ``ar1`` or ``ar2`` are not + unique, incorrect results and out-of-bounds indices could result. + Default is False. return_indices : bool If True, the indices which correspond to the intersection of the two arrays are returned. The first instance of a value is used if there are @@ -562,6 +586,10 @@ def in1d(ar1, ar2, assume_unique=False, invert=False): ar1 = np.asarray(ar1).ravel() ar2 = np.asarray(ar2).ravel() + # Ensure that iteration through object arrays yields size-1 arrays + if ar2.dtype == object: + ar2 = ar2.reshape(-1, 1) + # Check if one of the arrays may contain arbitrary objects contains_object = ar1.dtype.hasobject or ar2.dtype.hasobject |