2 files changed, 259 insertions, 116 deletions
diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index 836f4583f..fae3e3cbc 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -109,7 +109,8 @@ def ediff1d(ary, to_end=None, to_begin=None):
     return result
 
 
-def unique(ar, return_index=False, return_inverse=False, return_counts=False):
+def unique(ar, return_index=False, return_inverse=False,
+           return_counts=False, axis=None):
     """
     Find the unique elements of an array.
 
@@ -122,18 +123,27 @@ def unique(ar, return_index=False, return_inverse=False, return_counts=False):
     Parameters
     ----------
     ar : array_like
-        Input array. This will be flattened if it is not already 1-D.
+        Input array. Unless `axis` is specified, this will be flattened if it
+        is not already 1-D.
     return_index : bool, optional
-        If True, also return the indices of `ar` that result in the unique
-        array.
+        If True, also return the indices of `ar` (along the specified axis,
+        if provided, or in the flattened array) that result in the unique array.
     return_inverse : bool, optional
-        If True, also return the indices of the unique array that can be used
-        to reconstruct `ar`.
+        If True, also return the indices of the unique array (for the specified
+        axis, if provided) that can be used to reconstruct `ar`.
     return_counts : bool, optional
-        If True, also return the number of times each unique value comes up
+        If True, also return the number of times each unique item appears
         in `ar`.
-
         .. versionadded:: 1.9.0
+    axis : int or None, optional
+        The axis to operate on. If None, `ar` will be flattened beforehand.
+        Otherwise, duplicate items will be removed along the provided axis,
+        with all the other axes belonging to the each of the unique elements.
+        Object arrays or structured arrays that contain objects are not
+        supported if the `axis` kwarg is used.
+        .. versionadded:: 1.13.0
+
+
 
     Returns
     -------
@@ -141,14 +151,13 @@ def unique(ar, return_index=False, return_inverse=False, return_counts=False):
         The sorted unique values.
     unique_indices : ndarray, optional
         The indices of the first occurrences of the unique values in the
-        (flattened) original array. Only provided if `return_index` is True.
+        original array. Only provided if `return_index` is True.
     unique_inverse : ndarray, optional
-        The indices to reconstruct the (flattened) original array from the
+        The indices to reconstruct the original array from the
         unique array. Only provided if `return_inverse` is True.
     unique_counts : ndarray, optional
         The number of times each of the unique values comes up in the
         original array. Only provided if `return_counts` is True.
-
         .. versionadded:: 1.9.0
 
     See Also
@@ -164,6 +173,12 @@ def unique(ar, return_index=False, return_inverse=False, return_counts=False):
     >>> np.unique(a)
     array([1, 2, 3])
 
+    Return the unique rows of a 2D array
+
+    >>> a = np.array([[1, 0, 0], [1, 0, 0], [2, 3, 4]])
+    >>> np.unique(a, axis=0)
+    array([[1, 0, 0], [2, 3, 4]])
+
     Return the indices of the original array that give the unique values:
 
     >>> a = np.array(['a', 'b', 'b', 'c', 'a'])
@@ -189,6 +204,53 @@ def unique(ar, return_index=False, return_inverse=False, return_counts=False):
     array([1, 2, 6, 4, 2, 3, 2])
 
     """
+    ar = np.asanyarray(ar)
+    if axis is None:
+        return _unique1d(ar, return_index, return_inverse, return_counts)
+    if not (-ar.ndim <= axis < ar.ndim):
+        raise ValueError('Invalid axis kwarg specified for unique')
+
+    ar = np.swapaxes(ar, axis, 0)
+    orig_shape, orig_dtype = ar.shape, ar.dtype
+    # Must reshape to a contiguous 2D array for this to work...
+    ar = ar.reshape(orig_shape[0], -1)
+    ar = np.ascontiguousarray(ar)
+
+    if ar.dtype.char in (np.typecodes['AllInteger'] +
+                         np.typecodes['Datetime'] + 'S'):
+        # Optimization: Creating a view of your data with a np.void data type of
+        # size the number of bytes in a full row. Handles any type where items
+        # have a unique binary representation, i.e. 0 is only 0, not +0 and -0.
+        dtype = np.dtype((np.void, ar.dtype.itemsize * ar.shape[1]))
+    else:
+        dtype = [('f{i}'.format(i=i), ar.dtype) for i in range(ar.shape[1])]
+
+    try:
+        consolidated = ar.view(dtype)
+    except TypeError:
+        # There's no good way to do this for object arrays, etc...
+        msg = 'The axis argument to unique is not supported for dtype {dt}'
+        raise TypeError(msg.format(dt=ar.dtype))
+
+    def reshape_uniq(uniq):
+        uniq = uniq.view(orig_dtype)
+        uniq = uniq.reshape(-1, *orig_shape[1:])
+        uniq = np.swapaxes(uniq, 0, axis)
+        return uniq
+
+    output = _unique1d(consolidated, return_index,
+                       return_inverse, return_counts)
+    if not (return_index or return_inverse or return_counts):
+        return reshape_uniq(output)
+    else:
+        uniq = reshape_uniq(output[0])
+        return (uniq,) + output[1:]
+
+def _unique1d(ar, return_index=False, return_inverse=False,
+              return_counts=False):
+    """
+    Find the unique elements of an array, ignoring shape.
+    """
     ar = np.asanyarray(ar).flatten()
 
     optional_indices = return_index or return_inverse
diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py
index 75918fbee..8b142c264 100644
--- a/numpy/lib/tests/test_arraysetops.py
+++ b/numpy/lib/tests/test_arraysetops.py
@@ -5,7 +5,7 @@ from __future__ import division, absolute_import, print_function
 
 import numpy as np
 from numpy.testing import (
-    run_module_suite, TestCase, assert_array_equal, assert_equal
+    run_module_suite, TestCase, assert_array_equal, assert_equal, assert_raises
     )
 from numpy.lib.arraysetops import (
     ediff1d, intersect1d, setxor1d, union1d, setdiff1d, unique, in1d
@@ -14,107 +14,6 @@ from numpy.lib.arraysetops import (
 
 class TestSetOps(TestCase):
 
-    def test_unique(self):
-
-        def check_all(a, b, i1, i2, c, dt):
-            base_msg = 'check {0} failed for type {1}'
-
-            msg = base_msg.format('values', dt)
-            v = unique(a)
-            assert_array_equal(v, b, msg)
-
-            msg = base_msg.format('return_index', dt)
-            v, j = unique(a, 1, 0, 0)
-            assert_array_equal(v, b, msg)
-            assert_array_equal(j, i1, msg)
-
-            msg = base_msg.format('return_inverse', dt)
-            v, j = unique(a, 0, 1, 0)
-            assert_array_equal(v, b, msg)
-            assert_array_equal(j, i2, msg)
-
-            msg = base_msg.format('return_counts', dt)
-            v, j = unique(a, 0, 0, 1)
-            assert_array_equal(v, b, msg)
-            assert_array_equal(j, c, msg)
-
-            msg = base_msg.format('return_index and return_inverse', dt)
-            v, j1, j2 = unique(a, 1, 1, 0)
-            assert_array_equal(v, b, msg)
-            assert_array_equal(j1, i1, msg)
-            assert_array_equal(j2, i2, msg)
-
-            msg = base_msg.format('return_index and return_counts', dt)
-            v, j1, j2 = unique(a, 1, 0, 1)
-            assert_array_equal(v, b, msg)
-            assert_array_equal(j1, i1, msg)
-            assert_array_equal(j2, c, msg)
-
-            msg = base_msg.format('return_inverse and return_counts', dt)
-            v, j1, j2 = unique(a, 0, 1, 1)
-            assert_array_equal(v, b, msg)
-            assert_array_equal(j1, i2, msg)
-            assert_array_equal(j2, c, msg)
-
-            msg = base_msg.format(('return_index, return_inverse '
-                                   'and return_counts'), dt)
-            v, j1, j2, j3 = unique(a, 1, 1, 1)
-            assert_array_equal(v, b, msg)
-            assert_array_equal(j1, i1, msg)
-            assert_array_equal(j2, i2, msg)
-            assert_array_equal(j3, c, msg)
-
-        a = [5, 7, 1, 2, 1, 5, 7]*10
-        b = [1, 2, 5, 7]
-        i1 = [2, 3, 0, 1]
-        i2 = [2, 3, 0, 1, 0, 2, 3]*10
-        c = np.multiply([2, 1, 2, 2], 10)
-
-        # test for numeric arrays
-        types = []
-        types.extend(np.typecodes['AllInteger'])
-        types.extend(np.typecodes['AllFloat'])
-        types.append('datetime64[D]')
-        types.append('timedelta64[D]')
-        for dt in types:
-            aa = np.array(a, dt)
-            bb = np.array(b, dt)
-            check_all(aa, bb, i1, i2, c, dt)
-
-        # test for object arrays
-        dt = 'O'
-        aa = np.empty(len(a), dt)
-        aa[:] = a
-        bb = np.empty(len(b), dt)
-        bb[:] = b
-        check_all(aa, bb, i1, i2, c, dt)
-
-        # test for structured arrays
-        dt = [('', 'i'), ('', 'i')]
-        aa = np.array(list(zip(a, a)), dt)
-        bb = np.array(list(zip(b, b)), dt)
-        check_all(aa, bb, i1, i2, c, dt)
-
-        # test for ticket #2799
-        aa = [1. + 0.j, 1 - 1.j, 1]
-        assert_array_equal(np.unique(aa), [1. - 1.j, 1. + 0.j])
-
-        # test for ticket #4785
-        a = [(1, 2), (1, 2), (2, 3)]
-        unq = [1, 2, 3]
-        inv = [0, 1, 0, 1, 1, 2]
-        a1 = unique(a)
-        assert_array_equal(a1, unq)
-        a2, a2_inv = unique(a, return_inverse=True)
-        assert_array_equal(a2, unq)
-        assert_array_equal(a2_inv, inv)
-
-        # test for chararrays with return_inverse (gh-5099)
-        a = np.chararray(5)
-        a[...] = ''
-        a2, a2_inv = np.unique(a, return_inverse=True)
-        assert_array_equal(a2_inv, np.zeros(5))
-
     def test_intersect1d(self):
         # unique inputs
         a = np.array([5, 7, 1, 2])
@@ -182,7 +81,7 @@ class TestSetOps(TestCase):
         # we use two different sizes for the b array here to test the
         # two different paths in in1d().
         for mult in (1, 10):
-            # One check without np.array, to make sure lists are handled correct
+            # One check without np.array to make sure lists are handled correct
             a = [5, 7, 1, 2]
             b = [2, 4, 3, 1, 5] * mult
             ec = np.array([True, False, True, True])
@@ -201,8 +100,8 @@ class TestSetOps(TestCase):
 
             a = np.array([5, 4, 5, 3, 4, 4, 3, 4, 3, 5, 2, 1, 5, 5])
             b = [2, 3, 4] * mult
-            ec = [False, True, False, True, True, True, True, True, True, False,
-                  True, False, False, False]
+            ec = [False, True, False, True, True, True, True, True, True,
+                  False, True, False, False, False]
             c = in1d(a, b)
             assert_array_equal(c, ec)
 
@@ -313,5 +212,187 @@ class TestSetOps(TestCase):
         assert_array_equal(c1, c2)
 
 
+class TestUnique(TestCase):
+
+    def test_unique_1d(self):
+
+        def check_all(a, b, i1, i2, c, dt):
+            base_msg = 'check {0} failed for type {1}'
+
+            msg = base_msg.format('values', dt)
+            v = unique(a)
+            assert_array_equal(v, b, msg)
+
+            msg = base_msg.format('return_index', dt)
+            v, j = unique(a, 1, 0, 0)
+            assert_array_equal(v, b, msg)
+            assert_array_equal(j, i1, msg)
+
+            msg = base_msg.format('return_inverse', dt)
+            v, j = unique(a, 0, 1, 0)
+            assert_array_equal(v, b, msg)
+            assert_array_equal(j, i2, msg)
+
+            msg = base_msg.format('return_counts', dt)
+            v, j = unique(a, 0, 0, 1)
+            assert_array_equal(v, b, msg)
+            assert_array_equal(j, c, msg)
+
+            msg = base_msg.format('return_index and return_inverse', dt)
+            v, j1, j2 = unique(a, 1, 1, 0)
+            assert_array_equal(v, b, msg)
+            assert_array_equal(j1, i1, msg)
+            assert_array_equal(j2, i2, msg)
+
+            msg = base_msg.format('return_index and return_counts', dt)
+            v, j1, j2 = unique(a, 1, 0, 1)
+            assert_array_equal(v, b, msg)
+            assert_array_equal(j1, i1, msg)
+            assert_array_equal(j2, c, msg)
+
+            msg = base_msg.format('return_inverse and return_counts', dt)
+            v, j1, j2 = unique(a, 0, 1, 1)
+            assert_array_equal(v, b, msg)
+            assert_array_equal(j1, i2, msg)
+            assert_array_equal(j2, c, msg)
+
+            msg = base_msg.format(('return_index, return_inverse '
+                                   'and return_counts'), dt)
+            v, j1, j2, j3 = unique(a, 1, 1, 1)
+            assert_array_equal(v, b, msg)
+            assert_array_equal(j1, i1, msg)
+            assert_array_equal(j2, i2, msg)
+            assert_array_equal(j3, c, msg)
+
+        a = [5, 7, 1, 2, 1, 5, 7]*10
+        b = [1, 2, 5, 7]
+        i1 = [2, 3, 0, 1]
+        i2 = [2, 3, 0, 1, 0, 2, 3]*10
+        c = np.multiply([2, 1, 2, 2], 10)
+
+        # test for numeric arrays
+        types = []
+        types.extend(np.typecodes['AllInteger'])
+        types.extend(np.typecodes['AllFloat'])
+        types.append('datetime64[D]')
+        types.append('timedelta64[D]')
+        for dt in types:
+            aa = np.array(a, dt)
+            bb = np.array(b, dt)
+            check_all(aa, bb, i1, i2, c, dt)
+
+        # test for object arrays
+        dt = 'O'
+        aa = np.empty(len(a), dt)
+        aa[:] = a
+        bb = np.empty(len(b), dt)
+        bb[:] = b
+        check_all(aa, bb, i1, i2, c, dt)
+
+        # test for structured arrays
+        dt = [('', 'i'), ('', 'i')]
+        aa = np.array(list(zip(a, a)), dt)
+        bb = np.array(list(zip(b, b)), dt)
+        check_all(aa, bb, i1, i2, c, dt)
+
+        # test for ticket #2799
+        aa = [1. + 0.j, 1 - 1.j, 1]
+        assert_array_equal(np.unique(aa), [1. - 1.j, 1. + 0.j])
+
+        # test for ticket #4785
+        a = [(1, 2), (1, 2), (2, 3)]
+        unq = [1, 2, 3]
+        inv = [0, 1, 0, 1, 1, 2]
+        a1 = unique(a)
+        assert_array_equal(a1, unq)
+        a2, a2_inv = unique(a, return_inverse=True)
+        assert_array_equal(a2, unq)
+        assert_array_equal(a2_inv, inv)
+
+        # test for chararrays with return_inverse (gh-5099)
+        a = np.chararray(5)
+        a[...] = ''
+        a2, a2_inv = np.unique(a, return_inverse=True)
+        assert_array_equal(a2_inv, np.zeros(5))
+
+    def test_unique_axis_errors(self):
+        assert_raises(TypeError, self._run_axis_tests, object)
+        assert_raises(TypeError, self._run_axis_tests,
+                      [('a', int), ('b', object)])
+
+        assert_raises(ValueError, unique, np.arange(10), axis=2)
+        assert_raises(ValueError, unique, np.arange(10), axis=-2)
+
+    def test_unique_axis_list(self):
+        msg = "Unique failed on list of lists"
+        inp = [[0, 1, 0], [0, 1, 0]]
+        inp_arr = np.asarray(inp)
+        assert_array_equal(unique(inp, axis=0), unique(inp_arr, axis=0), msg)
+        assert_array_equal(unique(inp, axis=1), unique(inp_arr, axis=1), msg)
+
+    def test_unique_axis(self):
+        types = []
+        types.extend(np.typecodes['AllInteger'])
+        types.extend(np.typecodes['AllFloat'])
+        types.append('datetime64[D]')
+        types.append('timedelta64[D]')
+        types.append([('a', int), ('b', int)])
+        types.append([('a', int), ('b', float)])
+
+        for dtype in types:
+            self._run_axis_tests(dtype)
+
+        msg = 'Non-bitwise-equal booleans test failed'
+        data = np.arange(10, dtype=np.uint8).reshape(-1, 2).view(bool)
+        result = np.array([[False, True], [True, True]], dtype=bool)
+        assert_array_equal(unique(data, axis=0), result, msg)
+
+        msg = 'Negative zero equality test failed'
+        data = np.array([[-0.0, 0.0], [0.0, -0.0], [-0.0, 0.0], [0.0, -0.0]])
+        result = np.array([[-0.0, 0.0]])
+        assert_array_equal(unique(data, axis=0), result, msg)
+
+    def _run_axis_tests(self, dtype):
+        data = np.array([[0, 1, 0, 0],
+                         [1, 0, 0, 0],
+                         [0, 1, 0, 0],
+                         [1, 0, 0, 0]]).astype(dtype)
+
+        msg = 'Unique with 1d array and axis=0 failed'
+        result = np.array([0, 1])
+        assert_array_equal(unique(data), result.astype(dtype), msg)
+
+        msg = 'Unique with 2d array and axis=0 failed'
+        result = np.array([[0, 1, 0, 0], [1, 0, 0, 0]])
+        assert_array_equal(unique(data, axis=0), result.astype(dtype), msg)
+
+        msg = 'Unique with 2d array and axis=1 failed'
+        result = np.array([[0, 0, 1], [0, 1, 0], [0, 0, 1], [0, 1, 0]])
+        assert_array_equal(unique(data, axis=1), result.astype(dtype), msg)
+
+        msg = 'Unique with 3d array and axis=2 failed'
+        data3d = np.dstack([data] * 3)
+        result = data3d[..., :1]
+        assert_array_equal(unique(data3d, axis=2), result, msg)
+
+        uniq, idx, inv, cnt = unique(data, axis=0, return_index=True,
+                                     return_inverse=True, return_counts=True)
+        msg = "Unique's return_index=True failed with axis=0"
+        assert_array_equal(data[idx], uniq, msg)
+        msg = "Unique's return_inverse=True failed with axis=0"
+        assert_array_equal(uniq[inv], data)
+        msg = "Unique's return_counts=True failed with axis=0"
+        assert_array_equal(cnt, np.array([2, 2]), msg)
+
+        uniq, idx, inv, cnt = unique(data, axis=1, return_index=True,
+                                     return_inverse=True, return_counts=True)
+        msg = "Unique's return_index=True failed with axis=1"
+        assert_array_equal(data[:, idx], uniq)
+        msg = "Unique's return_inverse=True failed with axis=1"
+        assert_array_equal(uniq[:, inv], data)
+        msg = "Unique's return_counts=True failed with axis=1"
+        assert_array_equal(cnt, np.array([2, 1, 1]), msg)
+
+
 if __name__ == "__main__":
     run_module_suite()