Re-base the date-time branch back to the trunk.

author: Travis Oliphant <oliphant@enthought.com> 2009-08-28 15:36:42 +0000
committer: Travis Oliphant <oliphant@enthought.com> 2009-08-28 15:36:42 +0000
commit: 2b01ee6b966b9ca298247e6717e3a5be16a92970 (patch)
tree: 6bbb8ee8eebdfe2ef3eb26f13994193b313c6fe7 /numpy/lib/arraysetops.py
parent: c2191bc97da8a0879cec8d3e9a7a93fe9e66fcd8 (diff)
parent: fddd4b9c3b8f18ba7cf386f766b70ec3328b1c69 (diff)
download: numpy-2b01ee6b966b9ca298247e6717e3a5be16a92970.tar.gz
1 files changed, 161 insertions, 144 deletions
diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index c5e7822f2..b8ae9a9f3 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -3,40 +3,36 @@ Set operations for 1D numeric arrays based on sorting.
 
 :Contains:
   ediff1d,
-  unique1d,
+  unique,
   intersect1d,
-  intersect1d_nu,
   setxor1d,
-  setmember1d,
-  setmember1d_nu,
+  in1d,
   union1d,
   setdiff1d
 
+:Deprecated:
+  unique1d,
+  intersect1d_nu,
+  setmember1d
+
 :Notes:
 
-All functions work best with integer numerical arrays on input (e.g. indices).
-For floating point arrays, innacurate results may appear due to usual round-off
+For floating point arrays, inaccurate results may appear due to usual round-off
 and floating point comparison issues.
 
-Except unique1d, union1d and intersect1d_nu, all functions expect inputs with
-unique elements. Speed could be gained in some operations by an implementaion of
-sort(), that can provide directly the permutation vectors, avoiding thus calls
-to argsort().
+Speed could be gained in some operations by an implementation of
+sort(), that can provide directly the permutation vectors, avoiding
+thus calls to argsort().
 
-Run _test_unique1d_speed() to compare performance of numpy.unique1d() and
-numpy.unique() - it should be the same.
-
-To do: Optionally return indices analogously to unique1d for all functions.
-
-created:       01.11.2005
-last revision: 07.01.2007
+To do: Optionally return indices analogously to unique for all functions.
 
 :Author: Robert Cimrman
 """
 __all__ = ['ediff1d', 'unique1d', 'intersect1d', 'intersect1d_nu', 'setxor1d',
-           'setmember1d', 'setmember1d_nu', 'union1d', 'setdiff1d']
+           'setmember1d', 'union1d', 'setdiff1d', 'unique', 'in1d']
 
 import numpy as np
+from numpy.lib.utils import deprecate_with_doc
 
 def ediff1d(ary, to_end=None, to_begin=None):
     """
@@ -50,7 +46,7 @@ def ediff1d(ary, to_end=None, to_begin=None):
         If provided, this number will be tacked onto the end of the returned
         differences.
     to_begin : number, optional
-        If provided, this number will be taked onto the beginning of the
+        If provided, this number will be tacked onto the beginning of the
         returned differences.
 
     Returns
@@ -73,26 +69,26 @@ def ediff1d(ary, to_end=None, to_begin=None):
         arrays.append(to_end)
 
     if len(arrays) != 1:
-        # We'll save ourselves a copy of a potentially large array in the common
-        # case where neither to_begin or to_end was given.
+        # We'll save ourselves a copy of a potentially large array in
+        # the common case where neither to_begin or to_end was given.
         ed = np.hstack(arrays)
 
     return ed
 
-def unique1d(ar1, return_index=False, return_inverse=False):
+def unique(ar, return_index=False, return_inverse=False):
     """
     Find the unique elements of an array.
 
     Parameters
     ----------
-    ar1 : array_like
+    ar : array_like
         This array will be flattened if it is not already 1-D.
     return_index : bool, optional
         If True, also return the indices against `ar1` that result in the
         unique array.
     return_inverse : bool, optional
         If True, also return the indices against the unique array that
-        result in `ar1`.
+        result in `ar`.
 
     Returns
     -------
@@ -112,17 +108,17 @@ def unique1d(ar1, return_index=False, return_inverse=False):
 
     Examples
     --------
-    >>> np.unique1d([1, 1, 2, 2, 3, 3])
+    >>> np.unique([1, 1, 2, 2, 3, 3])
     array([1, 2, 3])
     >>> a = np.array([[1, 1], [2, 3]])
-    >>> np.unique1d(a)
+    >>> np.unique(a)
     array([1, 2, 3])
 
     Reconstruct the input from unique values:
 
-    >>> np.unique1d([1,2,6,4,2,3,2], return_index=True)
+    >>> np.unique([1,2,6,4,2,3,2], return_index=True)
     >>> x = [1,2,6,4,2,3,2]
-    >>> u, i = np.unique1d(x, return_inverse=True)
+    >>> u, i = np.unique(x, return_inverse=True)
     >>> u
     array([1, 2, 3, 4, 6])
     >>> i
@@ -131,14 +127,15 @@ def unique1d(ar1, return_index=False, return_inverse=False):
     [1, 2, 6, 4, 2, 3, 2]
 
     """
-    if return_index:
-        import warnings
-        warnings.warn("The order of the output arguments for "
-                      "`return_index` has changed.  Before, "
-                      "the output was (indices, unique_arr), but "
-                      "has now been reversed to be more consistent.")
+    try:
+        ar = ar.flatten()
+    except AttributeError:
+        if not return_inverse and not return_index:
+            items = sorted(set(ar))
+            return np.asarray(items)
+        else:
+            ar = np.asanyarray(ar).flatten()
 
-    ar = np.asanyarray(ar1).flatten()
     if ar.size == 0:
         if return_inverse and return_index:
             return ar, np.empty(0, np.bool), np.empty(0, np.bool)
@@ -166,44 +163,18 @@ def unique1d(ar1, return_index=False, return_inverse=False):
         flag = np.concatenate(([True], ar[1:] != ar[:-1]))
         return ar[flag]
 
-def intersect1d(ar1, ar2):
-    """
-    Intersection returning repeated or unique elements common to both arrays.
-
-    Parameters
-    ----------
-    ar1,ar2 : array_like
-        Input arrays.
-
-    Returns
-    -------
-    out : ndarray, shape(N,)
-        Sorted 1D array of common elements with repeating elements.
-
-    See Also
-    --------
-    intersect1d_nu : Returns only unique common elements.
-    numpy.lib.arraysetops : Module with a number of other functions for
-                            performing set operations on arrays.
-
-    Examples
-    --------
-    >>> np.intersect1d([1,3,3],[3,1,1])
-    array([1, 1, 3, 3])
-
-    """
-    aux = np.concatenate((ar1,ar2))
-    aux.sort()
-    return aux[aux[1:] == aux[:-1]]
 
-def intersect1d_nu(ar1, ar2):
+def intersect1d(ar1, ar2, assume_unique=False):
     """
     Intersection returning unique elements common to both arrays.
 
     Parameters
     ----------
-    ar1,ar2 : array_like
+    ar1, ar2 : array_like
         Input arrays.
+    assume_unique : bool
+        If True, the input arrays are both assumed to be unique, which
+        can speed up the calculation.  Default is False.
 
     Returns
     -------
@@ -212,34 +183,34 @@ def intersect1d_nu(ar1, ar2):
 
     See Also
     --------
-    intersect1d : Returns repeated or unique common elements.
     numpy.lib.arraysetops : Module with a number of other functions for
                             performing set operations on arrays.
 
     Examples
     --------
-    >>> np.intersect1d_nu([1,3,3],[3,1,1])
+    >>> np.intersect1d([1,3,3], [3,1,1])
     array([1, 3])
 
     """
-    # Might be faster than unique1d( intersect1d( ar1, ar2 ) )?
-    aux = np.concatenate((unique1d(ar1), unique1d(ar2)))
+    if not assume_unique:
+        # Might be faster than unique( intersect1d( ar1, ar2 ) )?
+        ar1 = unique(ar1)
+        ar2 = unique(ar2)
+    aux = np.concatenate( (ar1, ar2) )
     aux.sort()
     return aux[aux[1:] == aux[:-1]]
 
-def setxor1d(ar1, ar2):
+def setxor1d(ar1, ar2, assume_unique=False):
     """
-    Set exclusive-or of 1D arrays with unique elements.
-
-    Use unique1d() to generate arrays with only unique elements to use as
-    inputs to this function.
+    Set exclusive-or of two 1D arrays.
 
     Parameters
     ----------
-    ar1 : array_like
-        Input array.
-    ar2 : array_like
-        Input array.
+    ar1, ar2 : array_like
+        Input arrays.
+    assume_unique : bool
+        If True, the input arrays are both assumed to be unique, which
+        can speed up the calculation.  Default is False.
 
     Returns
     -------
@@ -252,7 +223,11 @@ def setxor1d(ar1, ar2):
                             performing set operations on arrays.
 
     """
-    aux = np.concatenate((ar1, ar2))
+    if not assume_unique:
+        ar1 = unique(ar1)
+        ar2 = unique(ar2)
+
+    aux = np.concatenate( (ar1, ar2) )
     if aux.size == 0:
         return aux
 
@@ -263,98 +238,68 @@ def setxor1d(ar1, ar2):
     flag2 = flag[1:] == flag[:-1]
     return aux[flag2]
 
-def setmember1d(ar1, ar2):
+def in1d(ar1, ar2, assume_unique=False):
     """
-    Return a boolean array set True where first element is in second array.
-
-    Boolean array is the shape of `ar1` containing True where the elements
-    of `ar1` are in `ar2` and False otherwise.
+    Test whether each element of an array is also present in a second array.
 
-    Use unique1d() to generate arrays with only unique elements to use as
-    inputs to this function.
+    Returns a boolean array the same length as `ar1` that is True
+    where an element of `ar1` is in `ar2` and False otherwise.
 
     Parameters
     ----------
-    ar1 : array_like
-        Input array.
-    ar2 : array_like
-        Input array.
+    ar1, ar2 : array_like
+        Input arrays.
+    assume_unique : bool
+        If True, the input arrays are both assumed to be unique, which
+        can speed up the calculation.  Default is False.
 
     Returns
     -------
     mask : ndarray, bool
         The values `ar1[mask]` are in `ar2`.
 
-
     See Also
     --------
-    setmember1d_nu : Works for arrays with non-unique elements.
     numpy.lib.arraysetops : Module with a number of other functions for
                             performing set operations on arrays.
 
+    Notes
+    -----
+    .. versionadded:: 1.4.0
+
     Examples
     --------
     >>> test = np.arange(5)
     >>> states = [0, 2]
-    >>> mask = np.setmember1d(test,states)
+    >>> mask = np.setmember1d(test, states)
     >>> mask
     array([ True, False,  True, False, False], dtype=bool)
     >>> test[mask]
     array([0, 2])
 
     """
-    # We need this to be a stable sort, so always use 'mergesort' here. The
-    # values from the first array should always come before the values from the
-    # second array.
-    ar = np.concatenate( (ar1, ar2 ) )
+    if not assume_unique:
+        ar1, rev_idx = np.unique(ar1, return_inverse=True)
+        ar2 = np.unique(ar2)
+
+    ar = np.concatenate( (ar1, ar2) )
+    # We need this to be a stable sort, so always use 'mergesort'
+    # here. The values from the first array should always come before
+    # the values from the second array.
     order = ar.argsort(kind='mergesort')
     sar = ar[order]
     equal_adj = (sar[1:] == sar[:-1])
     flag = np.concatenate( (equal_adj, [False] ) )
-
     indx = order.argsort(kind='mergesort')[:len( ar1 )]
-    return flag[indx]
-
-def setmember1d_nu(ar1, ar2):
-    """
-    Return a boolean array set True where first element is in second array.
-
-    Boolean array is the shape of `ar1` containing True where the elements
-    of `ar1` are in `ar2` and False otherwise.
-
-    Unlike setmember1d(), this version works also for arrays with duplicate
-    values.  It uses setmember1d() internally. For arrays with unique
-    entries it is slower than calling setmember1d() directly.
-
-    Parameters
-    ----------
-    ar1 : array_like
-        Input array.
-    ar2 : array_like
-        Input array.
 
-    Returns
-    -------
-    mask : ndarray, bool
-        The values `ar1[mask]` are in `ar2`.
-
-    See Also
-    --------
-    setmember1d : Faster for arrays with unique elements.
-    numpy.lib.arraysetops : Module with a number of other functions for
-                            performing set operations on arrays.
-
-    """
-    unique_ar1, rev_idx = np.unique1d(ar1, return_inverse=True)
-    mask = np.setmember1d(unique_ar1, np.unique1d(ar2))
-    return mask[rev_idx]
+    if assume_unique:
+        return flag[indx]
+    else:
+        return flag[indx][rev_idx]
 
 def union1d(ar1, ar2):
     """
-    Union of 1D arrays with unique elements.
-
-    Use unique1d() to generate arrays with only unique elements to use as
-    inputs to this function.
+    Union of two 1D arrays.
 
     Parameters
     ----------
@@ -374,14 +319,11 @@ def union1d(ar1, ar2):
                             performing set operations on arrays.
 
     """
-    return unique1d( np.concatenate( (ar1, ar2) ) )
+    return unique( np.concatenate( (ar1, ar2) ) )
 
-def setdiff1d(ar1, ar2):
+def setdiff1d(ar1, ar2, assume_unique=False):
     """
-    Set difference of 1D arrays with unique elements.
-
-    Use unique1d() to generate arrays with only unique elements to use as
-    inputs to this function.
+    Set difference of two 1D arrays.
 
     Parameters
     ----------
@@ -389,6 +331,9 @@ def setdiff1d(ar1, ar2):
         Input array.
     ar2 : array_like
         Input comparison array.
+    assume_unique : bool
+        If True, the input arrays are both assumed to be unique, which
+        can speed up the calculation.  Default is False.
 
     Returns
     -------
@@ -401,8 +346,80 @@ def setdiff1d(ar1, ar2):
                             performing set operations on arrays.
 
     """
-    aux = setmember1d(ar1,ar2)
+    if not assume_unique:
+        ar1 = unique(ar1)
+        ar2 = unique(ar2)
+    aux = in1d(ar1, ar2, assume_unique=True)
     if aux.size == 0:
         return aux
     else:
         return np.asarray(ar1)[aux == 0]
+
+@deprecate_with_doc('')
+def unique1d(ar1, return_index=False, return_inverse=False):
+    """
+    This function is deprecated. Use unique() instead.
+    """
+    if return_index:
+        import warnings
+        warnings.warn("The order of the output arguments for "
+                      "`return_index` has changed.  Before, "
+                      "the output was (indices, unique_arr), but "
+                      "has now been reversed to be more consistent.")
+
+    ar = np.asanyarray(ar1).flatten()
+    if ar.size == 0:
+        if return_inverse and return_index:
+            return ar, np.empty(0, np.bool), np.empty(0, np.bool)
+        elif return_inverse or return_index:
+            return ar, np.empty(0, np.bool)
+        else:
+            return ar
+
+    if return_inverse or return_index:
+        perm = ar.argsort()
+        aux = ar[perm]
+        flag = np.concatenate(([True], aux[1:] != aux[:-1]))
+        if return_inverse:
+            iflag = np.cumsum(flag) - 1
+            iperm = perm.argsort()
+            if return_index:
+                return aux[flag], perm[flag], iflag[iperm]
+            else:
+                return aux[flag], iflag[iperm]
+        else:
+            return aux[flag], perm[flag]
+
+    else:
+        ar.sort()
+        flag = np.concatenate(([True], ar[1:] != ar[:-1]))
+        return ar[flag]
+
+@deprecate_with_doc('')
+def intersect1d_nu(ar1, ar2):
+    """
+    This function is deprecated.  Use intersect1d()
+    instead.
+    """
+    # Might be faster than unique1d( intersect1d( ar1, ar2 ) )?
+    aux = np.concatenate((unique1d(ar1), unique1d(ar2)))
+    aux.sort()
+    return aux[aux[1:] == aux[:-1]]
+
+@deprecate_with_doc('')
+def setmember1d(ar1, ar2):
+    """
+    This function is deprecated.  Use in1d(assume_unique=True)
+    instead.
+    """
+    # We need this to be a stable sort, so always use 'mergesort' here. The
+    # values from the first array should always come before the values from the
+    # second array.
+    ar = np.concatenate( (ar1, ar2 ) )
+    order = ar.argsort(kind='mergesort')
+    sar = ar[order]
+    equal_adj = (sar[1:] == sar[:-1])
+    flag = np.concatenate( (equal_adj, [False] ) )
+
+    indx = order.argsort(kind='mergesort')[:len( ar1 )]
+    return flag[indx]
author	Travis Oliphant <oliphant@enthought.com>	2009-08-28 15:36:42 +0000
committer	Travis Oliphant <oliphant@enthought.com>	2009-08-28 15:36:42 +0000
commit	2b01ee6b966b9ca298247e6717e3a5be16a92970 (patch)
tree	6bbb8ee8eebdfe2ef3eb26f13994193b313c6fe7 /numpy/lib/arraysetops.py
parent	c2191bc97da8a0879cec8d3e9a7a93fe9e66fcd8 (diff)
parent	fddd4b9c3b8f18ba7cf386f766b70ec3328b1c69 (diff)
download	numpy-2b01ee6b966b9ca298247e6717e3a5be16a92970.tar.gz