summaryrefslogtreecommitdiff
path: root/numpy/lib/arraysetops.py
diff options
context:
space:
mode:
authorjaimefrio <jaime.frio@gmail.com>2014-01-09 10:23:07 -0800
committerjaimefrio <jaime.frio@gmail.com>2014-04-05 21:06:56 -0700
commiteae3d1a73f2f901da5956e3bcdaf2c44bfdd1ed3 (patch)
treeeacd0e21014656e4cf36fe617830697474d9166f /numpy/lib/arraysetops.py
parent52d5d109f9dedf4f006b930abef9ff9c54ec1542 (diff)
downloadnumpy-eae3d1a73f2f901da5956e3bcdaf2c44bfdd1ed3.tar.gz
ENH: add a 'return_counts=' keyword argument to `np.unique`
This PR adds a new keyword argument to `np.unique` that returns the number of times each unique item comes up in the array. This allows replacing a typical numpy construct: unq, _ = np.unique(a, return_inverse=True) unq_counts = np.bincount(_) with a single line of code: unq, unq_counts = np.unique(a, return_counts=True) As a plus, it runs faster, because it does not need the extra operations required to produce `unique_inverse`.
Diffstat (limited to 'numpy/lib/arraysetops.py')
-rw-r--r--numpy/lib/arraysetops.py68
1 files changed, 42 insertions, 26 deletions
diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index 691550579..0755fffd1 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -90,7 +90,7 @@ def ediff1d(ary, to_end=None, to_begin=None):
return ed
-def unique(ar, return_index=False, return_inverse=False):
+def unique(ar, return_index=False, return_inverse=False, return_counts=False):
"""
Find the unique elements of an array.
@@ -109,6 +109,10 @@ def unique(ar, return_index=False, return_inverse=False):
return_inverse : bool, optional
If True, also return the indices of the unique array that can be used
to reconstruct `ar`.
+ return_counts : bool, optional
+ .. versionadded:: 1.9.0
+ If True, also return the number of times each unique value comes up
+ in `ar`.
Returns
-------
@@ -120,6 +124,10 @@ def unique(ar, return_index=False, return_inverse=False):
unique_inverse : ndarray, optional
The indices to reconstruct the (flattened) original array from the
unique array. Only provided if `return_inverse` is True.
+ unique_counts : ndarray, optional
+ .. versionadded:: 1.9.0
+ The number of times each of the unique values comes up in the
+ original array. Only provided if `return_counts` is True.
See Also
--------
@@ -162,41 +170,49 @@ def unique(ar, return_index=False, return_inverse=False):
try:
ar = ar.flatten()
except AttributeError:
- if not return_inverse and not return_index:
- return np.sort(list(set(ar)))
+ if not return_inverse and not return_index and not return_counts:
+ return np.sort(list((set(ar))))
else:
ar = np.asanyarray(ar).flatten()
+ optional_indices = return_index or return_inverse
+ optional_returns = optional_indices or return_counts
+
if ar.size == 0:
- if return_inverse and return_index:
- return ar, np.empty(0, np.bool), np.empty(0, np.bool)
- elif return_inverse or return_index:
- return ar, np.empty(0, np.bool)
+ if not optional_returns:
+ ret = ar
else:
- return ar
+ ret = (ar,)
+ if return_index:
+ ret += (np.empty(0, np.bool),)
+ if return_inverse:
+ ret += (np.empty(0, np.bool),)
+ if return_counts:
+ ret += (np.empty(0, np.intp),)
+ return ret
+
+ if optional_indices:
+ perm = ar.argsort(kind='mergesort' if return_index else 'quicksort')
+ aux = ar[perm]
+ else:
+ ar.sort()
+ aux = ar
+ flag = np.concatenate(([True], aux[1:] != aux[:-1]))
- if return_inverse or return_index:
+ if not optional_returns:
+ ret = aux[flag]
+ else:
+ ret = (aux[flag],)
if return_index:
- perm = ar.argsort(kind='mergesort')
- else:
- perm = ar.argsort()
- aux = ar[perm]
- flag = np.concatenate(([True], aux[1:] != aux[:-1]))
+ ret += (perm[flag],)
if return_inverse:
iflag = np.cumsum(flag) - 1
iperm = perm.argsort()
- if return_index:
- return aux[flag], perm[flag], iflag[iperm]
- else:
- return aux[flag], iflag[iperm]
- else:
- return aux[flag], perm[flag]
-
- else:
- ar.sort()
- flag = np.concatenate(([True], ar[1:] != ar[:-1]))
- return ar[flag]
-
+ ret += (np.take(iflag, iperm),)
+ if return_counts:
+ idx = np.concatenate(np.nonzero(flag) + ([ar.size],))
+ ret += (np.diff(idx),)
+ return ret
def intersect1d(ar1, ar2, assume_unique=False):
"""