summaryrefslogtreecommitdiff
path: root/numpy/lib/arraysetops.py
diff options
context:
space:
mode:
authorTravis Oliphant <oliphant@enthought.com>2009-08-28 15:36:42 +0000
committerTravis Oliphant <oliphant@enthought.com>2009-08-28 15:36:42 +0000
commit2b01ee6b966b9ca298247e6717e3a5be16a92970 (patch)
tree6bbb8ee8eebdfe2ef3eb26f13994193b313c6fe7 /numpy/lib/arraysetops.py
parentc2191bc97da8a0879cec8d3e9a7a93fe9e66fcd8 (diff)
parentfddd4b9c3b8f18ba7cf386f766b70ec3328b1c69 (diff)
downloadnumpy-2b01ee6b966b9ca298247e6717e3a5be16a92970.tar.gz
Re-base the date-time branch back to the trunk.
Diffstat (limited to 'numpy/lib/arraysetops.py')
-rw-r--r--numpy/lib/arraysetops.py305
1 files changed, 161 insertions, 144 deletions
diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index c5e7822f2..b8ae9a9f3 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -3,40 +3,36 @@ Set operations for 1D numeric arrays based on sorting.
:Contains:
ediff1d,
- unique1d,
+ unique,
intersect1d,
- intersect1d_nu,
setxor1d,
- setmember1d,
- setmember1d_nu,
+ in1d,
union1d,
setdiff1d
+:Deprecated:
+ unique1d,
+ intersect1d_nu,
+ setmember1d
+
:Notes:
-All functions work best with integer numerical arrays on input (e.g. indices).
-For floating point arrays, innacurate results may appear due to usual round-off
+For floating point arrays, inaccurate results may appear due to usual round-off
and floating point comparison issues.
-Except unique1d, union1d and intersect1d_nu, all functions expect inputs with
-unique elements. Speed could be gained in some operations by an implementaion of
-sort(), that can provide directly the permutation vectors, avoiding thus calls
-to argsort().
+Speed could be gained in some operations by an implementation of
+sort(), that can provide directly the permutation vectors, avoiding
+thus calls to argsort().
-Run _test_unique1d_speed() to compare performance of numpy.unique1d() and
-numpy.unique() - it should be the same.
-
-To do: Optionally return indices analogously to unique1d for all functions.
-
-created: 01.11.2005
-last revision: 07.01.2007
+To do: Optionally return indices analogously to unique for all functions.
:Author: Robert Cimrman
"""
__all__ = ['ediff1d', 'unique1d', 'intersect1d', 'intersect1d_nu', 'setxor1d',
- 'setmember1d', 'setmember1d_nu', 'union1d', 'setdiff1d']
+ 'setmember1d', 'union1d', 'setdiff1d', 'unique', 'in1d']
import numpy as np
+from numpy.lib.utils import deprecate_with_doc
def ediff1d(ary, to_end=None, to_begin=None):
"""
@@ -50,7 +46,7 @@ def ediff1d(ary, to_end=None, to_begin=None):
If provided, this number will be tacked onto the end of the returned
differences.
to_begin : number, optional
- If provided, this number will be taked onto the beginning of the
+ If provided, this number will be tacked onto the beginning of the
returned differences.
Returns
@@ -73,26 +69,26 @@ def ediff1d(ary, to_end=None, to_begin=None):
arrays.append(to_end)
if len(arrays) != 1:
- # We'll save ourselves a copy of a potentially large array in the common
- # case where neither to_begin or to_end was given.
+ # We'll save ourselves a copy of a potentially large array in
+ # the common case where neither to_begin or to_end was given.
ed = np.hstack(arrays)
return ed
-def unique1d(ar1, return_index=False, return_inverse=False):
+def unique(ar, return_index=False, return_inverse=False):
"""
Find the unique elements of an array.
Parameters
----------
- ar1 : array_like
+ ar : array_like
This array will be flattened if it is not already 1-D.
return_index : bool, optional
If True, also return the indices against `ar1` that result in the
unique array.
return_inverse : bool, optional
If True, also return the indices against the unique array that
- result in `ar1`.
+ result in `ar`.
Returns
-------
@@ -112,17 +108,17 @@ def unique1d(ar1, return_index=False, return_inverse=False):
Examples
--------
- >>> np.unique1d([1, 1, 2, 2, 3, 3])
+ >>> np.unique([1, 1, 2, 2, 3, 3])
array([1, 2, 3])
>>> a = np.array([[1, 1], [2, 3]])
- >>> np.unique1d(a)
+ >>> np.unique(a)
array([1, 2, 3])
Reconstruct the input from unique values:
- >>> np.unique1d([1,2,6,4,2,3,2], return_index=True)
+ >>> np.unique([1,2,6,4,2,3,2], return_index=True)
>>> x = [1,2,6,4,2,3,2]
- >>> u, i = np.unique1d(x, return_inverse=True)
+ >>> u, i = np.unique(x, return_inverse=True)
>>> u
array([1, 2, 3, 4, 6])
>>> i
@@ -131,14 +127,15 @@ def unique1d(ar1, return_index=False, return_inverse=False):
[1, 2, 6, 4, 2, 3, 2]
"""
- if return_index:
- import warnings
- warnings.warn("The order of the output arguments for "
- "`return_index` has changed. Before, "
- "the output was (indices, unique_arr), but "
- "has now been reversed to be more consistent.")
+ try:
+ ar = ar.flatten()
+ except AttributeError:
+ if not return_inverse and not return_index:
+ items = sorted(set(ar))
+ return np.asarray(items)
+ else:
+ ar = np.asanyarray(ar).flatten()
- ar = np.asanyarray(ar1).flatten()
if ar.size == 0:
if return_inverse and return_index:
return ar, np.empty(0, np.bool), np.empty(0, np.bool)
@@ -166,44 +163,18 @@ def unique1d(ar1, return_index=False, return_inverse=False):
flag = np.concatenate(([True], ar[1:] != ar[:-1]))
return ar[flag]
-def intersect1d(ar1, ar2):
- """
- Intersection returning repeated or unique elements common to both arrays.
-
- Parameters
- ----------
- ar1,ar2 : array_like
- Input arrays.
-
- Returns
- -------
- out : ndarray, shape(N,)
- Sorted 1D array of common elements with repeating elements.
-
- See Also
- --------
- intersect1d_nu : Returns only unique common elements.
- numpy.lib.arraysetops : Module with a number of other functions for
- performing set operations on arrays.
-
- Examples
- --------
- >>> np.intersect1d([1,3,3],[3,1,1])
- array([1, 1, 3, 3])
-
- """
- aux = np.concatenate((ar1,ar2))
- aux.sort()
- return aux[aux[1:] == aux[:-1]]
-def intersect1d_nu(ar1, ar2):
+def intersect1d(ar1, ar2, assume_unique=False):
"""
Intersection returning unique elements common to both arrays.
Parameters
----------
- ar1,ar2 : array_like
+ ar1, ar2 : array_like
Input arrays.
+ assume_unique : bool
+ If True, the input arrays are both assumed to be unique, which
+ can speed up the calculation. Default is False.
Returns
-------
@@ -212,34 +183,34 @@ def intersect1d_nu(ar1, ar2):
See Also
--------
- intersect1d : Returns repeated or unique common elements.
numpy.lib.arraysetops : Module with a number of other functions for
performing set operations on arrays.
Examples
--------
- >>> np.intersect1d_nu([1,3,3],[3,1,1])
+ >>> np.intersect1d([1,3,3], [3,1,1])
array([1, 3])
"""
- # Might be faster than unique1d( intersect1d( ar1, ar2 ) )?
- aux = np.concatenate((unique1d(ar1), unique1d(ar2)))
+ if not assume_unique:
+ # Might be faster than unique( intersect1d( ar1, ar2 ) )?
+ ar1 = unique(ar1)
+ ar2 = unique(ar2)
+ aux = np.concatenate( (ar1, ar2) )
aux.sort()
return aux[aux[1:] == aux[:-1]]
-def setxor1d(ar1, ar2):
+def setxor1d(ar1, ar2, assume_unique=False):
"""
- Set exclusive-or of 1D arrays with unique elements.
-
- Use unique1d() to generate arrays with only unique elements to use as
- inputs to this function.
+ Set exclusive-or of two 1D arrays.
Parameters
----------
- ar1 : array_like
- Input array.
- ar2 : array_like
- Input array.
+ ar1, ar2 : array_like
+ Input arrays.
+ assume_unique : bool
+ If True, the input arrays are both assumed to be unique, which
+ can speed up the calculation. Default is False.
Returns
-------
@@ -252,7 +223,11 @@ def setxor1d(ar1, ar2):
performing set operations on arrays.
"""
- aux = np.concatenate((ar1, ar2))
+ if not assume_unique:
+ ar1 = unique(ar1)
+ ar2 = unique(ar2)
+
+ aux = np.concatenate( (ar1, ar2) )
if aux.size == 0:
return aux
@@ -263,98 +238,68 @@ def setxor1d(ar1, ar2):
flag2 = flag[1:] == flag[:-1]
return aux[flag2]
-def setmember1d(ar1, ar2):
+def in1d(ar1, ar2, assume_unique=False):
"""
- Return a boolean array set True where first element is in second array.
-
- Boolean array is the shape of `ar1` containing True where the elements
- of `ar1` are in `ar2` and False otherwise.
+ Test whether each element of an array is also present in a second array.
- Use unique1d() to generate arrays with only unique elements to use as
- inputs to this function.
+ Returns a boolean array the same length as `ar1` that is True
+ where an element of `ar1` is in `ar2` and False otherwise.
Parameters
----------
- ar1 : array_like
- Input array.
- ar2 : array_like
- Input array.
+ ar1, ar2 : array_like
+ Input arrays.
+ assume_unique : bool
+ If True, the input arrays are both assumed to be unique, which
+ can speed up the calculation. Default is False.
Returns
-------
mask : ndarray, bool
The values `ar1[mask]` are in `ar2`.
-
See Also
--------
- setmember1d_nu : Works for arrays with non-unique elements.
numpy.lib.arraysetops : Module with a number of other functions for
performing set operations on arrays.
+ Notes
+ -----
+ .. versionadded:: 1.4.0
+
Examples
--------
>>> test = np.arange(5)
>>> states = [0, 2]
- >>> mask = np.setmember1d(test,states)
+ >>> mask = np.setmember1d(test, states)
>>> mask
array([ True, False, True, False, False], dtype=bool)
>>> test[mask]
array([0, 2])
"""
- # We need this to be a stable sort, so always use 'mergesort' here. The
- # values from the first array should always come before the values from the
- # second array.
- ar = np.concatenate( (ar1, ar2 ) )
+ if not assume_unique:
+ ar1, rev_idx = np.unique(ar1, return_inverse=True)
+ ar2 = np.unique(ar2)
+
+ ar = np.concatenate( (ar1, ar2) )
+ # We need this to be a stable sort, so always use 'mergesort'
+ # here. The values from the first array should always come before
+ # the values from the second array.
order = ar.argsort(kind='mergesort')
sar = ar[order]
equal_adj = (sar[1:] == sar[:-1])
flag = np.concatenate( (equal_adj, [False] ) )
-
indx = order.argsort(kind='mergesort')[:len( ar1 )]
- return flag[indx]
-
-def setmember1d_nu(ar1, ar2):
- """
- Return a boolean array set True where first element is in second array.
-
- Boolean array is the shape of `ar1` containing True where the elements
- of `ar1` are in `ar2` and False otherwise.
-
- Unlike setmember1d(), this version works also for arrays with duplicate
- values. It uses setmember1d() internally. For arrays with unique
- entries it is slower than calling setmember1d() directly.
-
- Parameters
- ----------
- ar1 : array_like
- Input array.
- ar2 : array_like
- Input array.
- Returns
- -------
- mask : ndarray, bool
- The values `ar1[mask]` are in `ar2`.
-
- See Also
- --------
- setmember1d : Faster for arrays with unique elements.
- numpy.lib.arraysetops : Module with a number of other functions for
- performing set operations on arrays.
-
- """
- unique_ar1, rev_idx = np.unique1d(ar1, return_inverse=True)
- mask = np.setmember1d(unique_ar1, np.unique1d(ar2))
- return mask[rev_idx]
+ if assume_unique:
+ return flag[indx]
+ else:
+ return flag[indx][rev_idx]
def union1d(ar1, ar2):
"""
- Union of 1D arrays with unique elements.
-
- Use unique1d() to generate arrays with only unique elements to use as
- inputs to this function.
+ Union of two 1D arrays.
Parameters
----------
@@ -374,14 +319,11 @@ def union1d(ar1, ar2):
performing set operations on arrays.
"""
- return unique1d( np.concatenate( (ar1, ar2) ) )
+ return unique( np.concatenate( (ar1, ar2) ) )
-def setdiff1d(ar1, ar2):
+def setdiff1d(ar1, ar2, assume_unique=False):
"""
- Set difference of 1D arrays with unique elements.
-
- Use unique1d() to generate arrays with only unique elements to use as
- inputs to this function.
+ Set difference of two 1D arrays.
Parameters
----------
@@ -389,6 +331,9 @@ def setdiff1d(ar1, ar2):
Input array.
ar2 : array_like
Input comparison array.
+ assume_unique : bool
+ If True, the input arrays are both assumed to be unique, which
+ can speed up the calculation. Default is False.
Returns
-------
@@ -401,8 +346,80 @@ def setdiff1d(ar1, ar2):
performing set operations on arrays.
"""
- aux = setmember1d(ar1,ar2)
+ if not assume_unique:
+ ar1 = unique(ar1)
+ ar2 = unique(ar2)
+ aux = in1d(ar1, ar2, assume_unique=True)
if aux.size == 0:
return aux
else:
return np.asarray(ar1)[aux == 0]
+
+@deprecate_with_doc('')
+def unique1d(ar1, return_index=False, return_inverse=False):
+ """
+ This function is deprecated. Use unique() instead.
+ """
+ if return_index:
+ import warnings
+ warnings.warn("The order of the output arguments for "
+ "`return_index` has changed. Before, "
+ "the output was (indices, unique_arr), but "
+ "has now been reversed to be more consistent.")
+
+ ar = np.asanyarray(ar1).flatten()
+ if ar.size == 0:
+ if return_inverse and return_index:
+ return ar, np.empty(0, np.bool), np.empty(0, np.bool)
+ elif return_inverse or return_index:
+ return ar, np.empty(0, np.bool)
+ else:
+ return ar
+
+ if return_inverse or return_index:
+ perm = ar.argsort()
+ aux = ar[perm]
+ flag = np.concatenate(([True], aux[1:] != aux[:-1]))
+ if return_inverse:
+ iflag = np.cumsum(flag) - 1
+ iperm = perm.argsort()
+ if return_index:
+ return aux[flag], perm[flag], iflag[iperm]
+ else:
+ return aux[flag], iflag[iperm]
+ else:
+ return aux[flag], perm[flag]
+
+ else:
+ ar.sort()
+ flag = np.concatenate(([True], ar[1:] != ar[:-1]))
+ return ar[flag]
+
+@deprecate_with_doc('')
+def intersect1d_nu(ar1, ar2):
+ """
+ This function is deprecated. Use intersect1d()
+ instead.
+ """
+ # Might be faster than unique1d( intersect1d( ar1, ar2 ) )?
+ aux = np.concatenate((unique1d(ar1), unique1d(ar2)))
+ aux.sort()
+ return aux[aux[1:] == aux[:-1]]
+
+@deprecate_with_doc('')
+def setmember1d(ar1, ar2):
+ """
+ This function is deprecated. Use in1d(assume_unique=True)
+ instead.
+ """
+ # We need this to be a stable sort, so always use 'mergesort' here. The
+ # values from the first array should always come before the values from the
+ # second array.
+ ar = np.concatenate( (ar1, ar2 ) )
+ order = ar.argsort(kind='mergesort')
+ sar = ar[order]
+ equal_adj = (sar[1:] == sar[:-1])
+ flag = np.concatenate( (equal_adj, [False] ) )
+
+ indx = order.argsort(kind='mergesort')[:len( ar1 )]
+ return flag[indx]