diff options
author | jaimefrio <jaime.frio@gmail.com> | 2014-01-09 10:23:07 -0800 |
---|---|---|
committer | jaimefrio <jaime.frio@gmail.com> | 2014-04-05 21:06:56 -0700 |
commit | eae3d1a73f2f901da5956e3bcdaf2c44bfdd1ed3 (patch) | |
tree | eacd0e21014656e4cf36fe617830697474d9166f /numpy/lib/arraysetops.py | |
parent | 52d5d109f9dedf4f006b930abef9ff9c54ec1542 (diff) | |
download | numpy-eae3d1a73f2f901da5956e3bcdaf2c44bfdd1ed3.tar.gz |
ENH: add a 'return_counts=' keyword argument to `np.unique`
This PR adds a new keyword argument to `np.unique` that returns the
number of times each unique item comes up in the array. This allows
replacing a typical numpy construct:
unq, _ = np.unique(a, return_inverse=True)
unq_counts = np.bincount(_)
with a single line of code:
unq, unq_counts = np.unique(a, return_counts=True)
As a plus, it runs faster, because it does not need the extra
operations required to produce `unique_inverse`.
Diffstat (limited to 'numpy/lib/arraysetops.py')
-rw-r--r-- | numpy/lib/arraysetops.py | 68 |
1 files changed, 42 insertions, 26 deletions
diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index 691550579..0755fffd1 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -90,7 +90,7 @@ def ediff1d(ary, to_end=None, to_begin=None): return ed -def unique(ar, return_index=False, return_inverse=False): +def unique(ar, return_index=False, return_inverse=False, return_counts=False): """ Find the unique elements of an array. @@ -109,6 +109,10 @@ def unique(ar, return_index=False, return_inverse=False): return_inverse : bool, optional If True, also return the indices of the unique array that can be used to reconstruct `ar`. + return_counts : bool, optional + .. versionadded:: 1.9.0 + If True, also return the number of times each unique value comes up + in `ar`. Returns ------- @@ -120,6 +124,10 @@ def unique(ar, return_index=False, return_inverse=False): unique_inverse : ndarray, optional The indices to reconstruct the (flattened) original array from the unique array. Only provided if `return_inverse` is True. + unique_counts : ndarray, optional + .. versionadded:: 1.9.0 + The number of times each of the unique values comes up in the + original array. Only provided if `return_counts` is True. See Also -------- @@ -162,41 +170,49 @@ def unique(ar, return_index=False, return_inverse=False): try: ar = ar.flatten() except AttributeError: - if not return_inverse and not return_index: - return np.sort(list(set(ar))) + if not return_inverse and not return_index and not return_counts: + return np.sort(list((set(ar)))) else: ar = np.asanyarray(ar).flatten() + optional_indices = return_index or return_inverse + optional_returns = optional_indices or return_counts + if ar.size == 0: - if return_inverse and return_index: - return ar, np.empty(0, np.bool), np.empty(0, np.bool) - elif return_inverse or return_index: - return ar, np.empty(0, np.bool) + if not optional_returns: + ret = ar else: - return ar + ret = (ar,) + if return_index: + ret += (np.empty(0, np.bool),) + if return_inverse: + ret += (np.empty(0, np.bool),) + if return_counts: + ret += (np.empty(0, np.intp),) + return ret + + if optional_indices: + perm = ar.argsort(kind='mergesort' if return_index else 'quicksort') + aux = ar[perm] + else: + ar.sort() + aux = ar + flag = np.concatenate(([True], aux[1:] != aux[:-1])) - if return_inverse or return_index: + if not optional_returns: + ret = aux[flag] + else: + ret = (aux[flag],) if return_index: - perm = ar.argsort(kind='mergesort') - else: - perm = ar.argsort() - aux = ar[perm] - flag = np.concatenate(([True], aux[1:] != aux[:-1])) + ret += (perm[flag],) if return_inverse: iflag = np.cumsum(flag) - 1 iperm = perm.argsort() - if return_index: - return aux[flag], perm[flag], iflag[iperm] - else: - return aux[flag], iflag[iperm] - else: - return aux[flag], perm[flag] - - else: - ar.sort() - flag = np.concatenate(([True], ar[1:] != ar[:-1])) - return ar[flag] - + ret += (np.take(iflag, iperm),) + if return_counts: + idx = np.concatenate(np.nonzero(flag) + ([ar.size],)) + ret += (np.diff(idx),) + return ret def intersect1d(ar1, ar2, assume_unique=False): """ |