diff options
Diffstat (limited to 'numpy/lib')
-rw-r--r-- | numpy/lib/arraypad.py | 154 | ||||
-rw-r--r-- | numpy/lib/arraysetops.py | 65 | ||||
-rw-r--r-- | numpy/lib/tests/test_arraysetops.py | 41 |
3 files changed, 165 insertions, 95 deletions
diff --git a/numpy/lib/arraypad.py b/numpy/lib/arraypad.py index 600301c56..e9ca9de4d 100644 --- a/numpy/lib/arraypad.py +++ b/numpy/lib/arraypad.py @@ -74,6 +74,35 @@ def _round_ifneeded(arr, dtype): arr.round(out=arr) +def _slice_at_axis(shape, sl, axis): + """ + Construct a slice tuple the length of shape, with sl at the specified axis + """ + slice_tup = (slice(None),) + return slice_tup * axis + (sl,) + slice_tup * (len(shape) - axis - 1) + + +def _slice_first(shape, n, axis): + """ Construct a slice tuple to take the first n elements along axis """ + return _slice_at_axis(shape, slice(0, n), axis=axis) + + +def _slice_last(shape, n, axis): + """ Construct a slice tuple to take the last n elements along axis """ + dim = shape[axis] # doing this explicitly makes n=0 work + return _slice_at_axis(shape, slice(dim - n, dim), axis=axis) + + +def _do_prepend(arr, pad_chunk, axis): + return np.concatenate( + (pad_chunk.astype(arr.dtype, copy=False), arr), axis=axis) + + +def _do_append(arr, pad_chunk, axis): + return np.concatenate( + (arr, pad_chunk.astype(arr.dtype, copy=False)), axis=axis) + + def _prepend_const(arr, pad_amt, val, axis=-1): """ Prepend constant `val` along `axis` of `arr`. @@ -100,8 +129,7 @@ def _prepend_const(arr, pad_amt, val, axis=-1): return arr padshape = tuple(x if i != axis else pad_amt for (i, x) in enumerate(arr.shape)) - return np.concatenate((np.full(padshape, val, dtype=arr.dtype), arr), - axis=axis) + return _do_prepend(arr, np.full(padshape, val, dtype=arr.dtype), axis) def _append_const(arr, pad_amt, val, axis=-1): @@ -130,8 +158,8 @@ def _append_const(arr, pad_amt, val, axis=-1): return arr padshape = tuple(x if i != axis else pad_amt for (i, x) in enumerate(arr.shape)) - return np.concatenate((arr, np.full(padshape, val, dtype=arr.dtype)), - axis=axis) + return _do_append(arr, np.full(padshape, val, dtype=arr.dtype), axis) + def _prepend_edge(arr, pad_amt, axis=-1): @@ -156,11 +184,9 @@ def _prepend_edge(arr, pad_amt, axis=-1): if pad_amt == 0: return arr - edge_slice = tuple(slice(None) if i != axis else slice(0, 1) - for (i, x) in enumerate(arr.shape)) + edge_slice = _slice_first(arr.shape, 1, axis=axis) edge_arr = arr[edge_slice] - return np.concatenate((edge_arr.repeat(pad_amt, axis=axis), arr), - axis=axis) + return _do_prepend(arr, edge_arr.repeat(pad_amt, axis=axis), axis) def _append_edge(arr, pad_amt, axis=-1): @@ -186,11 +212,9 @@ def _append_edge(arr, pad_amt, axis=-1): if pad_amt == 0: return arr - edge_slice = tuple(slice(None) if i != axis else slice(x - 1, x) - for (i, x) in enumerate(arr.shape)) + edge_slice = _slice_last(arr.shape, 1, axis=axis) edge_arr = arr[edge_slice] - return np.concatenate((arr, edge_arr.repeat(pad_amt, axis=axis)), - axis=axis) + return _do_append(arr, edge_arr.repeat(pad_amt, axis=axis), axis) def _prepend_ramp(arr, pad_amt, end, axis=-1): @@ -228,8 +252,7 @@ def _prepend_ramp(arr, pad_amt, end, axis=-1): reverse=True).astype(np.float64) # Appropriate slicing to extract n-dimensional edge along `axis` - edge_slice = tuple(slice(None) if i != axis else slice(0, 1) - for (i, x) in enumerate(arr.shape)) + edge_slice = _slice_first(arr.shape, 1, axis=axis) # Extract edge, and extend along `axis` edge_pad = arr[edge_slice].repeat(pad_amt, axis) @@ -241,7 +264,7 @@ def _prepend_ramp(arr, pad_amt, end, axis=-1): _round_ifneeded(ramp_arr, arr.dtype) # Ramp values will most likely be float, cast them to the same type as arr - return np.concatenate((ramp_arr.astype(arr.dtype), arr), axis=axis) + return _do_prepend(arr, ramp_arr, axis) def _append_ramp(arr, pad_amt, end, axis=-1): @@ -279,8 +302,7 @@ def _append_ramp(arr, pad_amt, end, axis=-1): reverse=False).astype(np.float64) # Slice a chunk from the edge to calculate stats on - edge_slice = tuple(slice(None) if i != axis else slice(x - 1, x) - for (i, x) in enumerate(arr.shape)) + edge_slice = _slice_last(arr.shape, 1, axis=axis) # Extract edge, and extend along `axis` edge_pad = arr[edge_slice].repeat(pad_amt, axis) @@ -292,7 +314,7 @@ def _append_ramp(arr, pad_amt, end, axis=-1): _round_ifneeded(ramp_arr, arr.dtype) # Ramp values will most likely be float, cast them to the same type as arr - return np.concatenate((arr, ramp_arr.astype(arr.dtype)), axis=axis) + return _do_append(arr, ramp_arr, axis) def _prepend_max(arr, pad_amt, num, axis=-1): @@ -332,15 +354,13 @@ def _prepend_max(arr, pad_amt, num, axis=-1): num = None # Slice a chunk from the edge to calculate stats on - max_slice = tuple(slice(None) if i != axis else slice(num) - for (i, x) in enumerate(arr.shape)) + max_slice = _slice_first(arr.shape, num, axis=axis) # Extract slice, calculate max max_chunk = arr[max_slice].max(axis=axis, keepdims=True) # Concatenate `arr` with `max_chunk`, extended along `axis` by `pad_amt` - return np.concatenate((max_chunk.repeat(pad_amt, axis=axis), arr), - axis=axis) + return _do_prepend(arr, max_chunk.repeat(pad_amt, axis=axis), axis) def _append_max(arr, pad_amt, num, axis=-1): @@ -379,11 +399,8 @@ def _append_max(arr, pad_amt, num, axis=-1): num = None # Slice a chunk from the edge to calculate stats on - end = arr.shape[axis] - 1 if num is not None: - max_slice = tuple( - slice(None) if i != axis else slice(end, end - num, -1) - for (i, x) in enumerate(arr.shape)) + max_slice = _slice_last(arr.shape, num, axis=axis) else: max_slice = tuple(slice(None) for x in arr.shape) @@ -391,8 +408,7 @@ def _append_max(arr, pad_amt, num, axis=-1): max_chunk = arr[max_slice].max(axis=axis, keepdims=True) # Concatenate `arr` with `max_chunk`, extended along `axis` by `pad_amt` - return np.concatenate((arr, max_chunk.repeat(pad_amt, axis=axis)), - axis=axis) + return _do_append(arr, max_chunk.repeat(pad_amt, axis=axis), axis) def _prepend_mean(arr, pad_amt, num, axis=-1): @@ -431,16 +447,14 @@ def _prepend_mean(arr, pad_amt, num, axis=-1): num = None # Slice a chunk from the edge to calculate stats on - mean_slice = tuple(slice(None) if i != axis else slice(num) - for (i, x) in enumerate(arr.shape)) + mean_slice = _slice_first(arr.shape, num, axis=axis) # Extract slice, calculate mean mean_chunk = arr[mean_slice].mean(axis, keepdims=True) _round_ifneeded(mean_chunk, arr.dtype) # Concatenate `arr` with `mean_chunk`, extended along `axis` by `pad_amt` - return np.concatenate((mean_chunk.repeat(pad_amt, axis).astype(arr.dtype), - arr), axis=axis) + return _do_prepend(arr, mean_chunk.repeat(pad_amt, axis), axis=axis) def _append_mean(arr, pad_amt, num, axis=-1): @@ -479,11 +493,8 @@ def _append_mean(arr, pad_amt, num, axis=-1): num = None # Slice a chunk from the edge to calculate stats on - end = arr.shape[axis] - 1 if num is not None: - mean_slice = tuple( - slice(None) if i != axis else slice(end, end - num, -1) - for (i, x) in enumerate(arr.shape)) + mean_slice = _slice_last(arr.shape, num, axis=axis) else: mean_slice = tuple(slice(None) for x in arr.shape) @@ -492,8 +503,7 @@ def _append_mean(arr, pad_amt, num, axis=-1): _round_ifneeded(mean_chunk, arr.dtype) # Concatenate `arr` with `mean_chunk`, extended along `axis` by `pad_amt` - return np.concatenate( - (arr, mean_chunk.repeat(pad_amt, axis).astype(arr.dtype)), axis=axis) + return _do_append(arr, mean_chunk.repeat(pad_amt, axis), axis=axis) def _prepend_med(arr, pad_amt, num, axis=-1): @@ -532,16 +542,14 @@ def _prepend_med(arr, pad_amt, num, axis=-1): num = None # Slice a chunk from the edge to calculate stats on - med_slice = tuple(slice(None) if i != axis else slice(num) - for (i, x) in enumerate(arr.shape)) + med_slice = _slice_first(arr.shape, num, axis=axis) # Extract slice, calculate median med_chunk = np.median(arr[med_slice], axis=axis, keepdims=True) _round_ifneeded(med_chunk, arr.dtype) # Concatenate `arr` with `med_chunk`, extended along `axis` by `pad_amt` - return np.concatenate( - (med_chunk.repeat(pad_amt, axis).astype(arr.dtype), arr), axis=axis) + return _do_prepend(arr, med_chunk.repeat(pad_amt, axis), axis=axis) def _append_med(arr, pad_amt, num, axis=-1): @@ -580,11 +588,8 @@ def _append_med(arr, pad_amt, num, axis=-1): num = None # Slice a chunk from the edge to calculate stats on - end = arr.shape[axis] - 1 if num is not None: - med_slice = tuple( - slice(None) if i != axis else slice(end, end - num, -1) - for (i, x) in enumerate(arr.shape)) + med_slice = _slice_last(arr.shape, num, axis=axis) else: med_slice = tuple(slice(None) for x in arr.shape) @@ -593,8 +598,7 @@ def _append_med(arr, pad_amt, num, axis=-1): _round_ifneeded(med_chunk, arr.dtype) # Concatenate `arr` with `med_chunk`, extended along `axis` by `pad_amt` - return np.concatenate( - (arr, med_chunk.repeat(pad_amt, axis).astype(arr.dtype)), axis=axis) + return _do_append(arr, med_chunk.repeat(pad_amt, axis), axis=axis) def _prepend_min(arr, pad_amt, num, axis=-1): @@ -634,15 +638,13 @@ def _prepend_min(arr, pad_amt, num, axis=-1): num = None # Slice a chunk from the edge to calculate stats on - min_slice = tuple(slice(None) if i != axis else slice(num) - for (i, x) in enumerate(arr.shape)) + min_slice = _slice_first(arr.shape, num, axis=axis) # Extract slice, calculate min min_chunk = arr[min_slice].min(axis=axis, keepdims=True) # Concatenate `arr` with `min_chunk`, extended along `axis` by `pad_amt` - return np.concatenate((min_chunk.repeat(pad_amt, axis=axis), arr), - axis=axis) + return _do_prepend(arr, min_chunk.repeat(pad_amt, axis), axis=axis) def _append_min(arr, pad_amt, num, axis=-1): @@ -681,11 +683,8 @@ def _append_min(arr, pad_amt, num, axis=-1): num = None # Slice a chunk from the edge to calculate stats on - end = arr.shape[axis] - 1 if num is not None: - min_slice = tuple( - slice(None) if i != axis else slice(end, end - num, -1) - for (i, x) in enumerate(arr.shape)) + min_slice = _slice_last(arr.shape, num, axis=axis) else: min_slice = tuple(slice(None) for x in arr.shape) @@ -693,8 +692,7 @@ def _append_min(arr, pad_amt, num, axis=-1): min_chunk = arr[min_slice].min(axis=axis, keepdims=True) # Concatenate `arr` with `min_chunk`, extended along `axis` by `pad_amt` - return np.concatenate((arr, min_chunk.repeat(pad_amt, axis=axis)), - axis=axis) + return _do_append(arr, min_chunk.repeat(pad_amt, axis), axis=axis) def _pad_ref(arr, pad_amt, method, axis=-1): @@ -737,15 +735,13 @@ def _pad_ref(arr, pad_amt, method, axis=-1): # Prepended region # Slice off a reverse indexed chunk from near edge to pad `arr` before - ref_slice = tuple(slice(None) if i != axis else slice(pad_amt[0], 0, -1) - for (i, x) in enumerate(arr.shape)) + ref_slice = _slice_at_axis(arr.shape, slice(pad_amt[0], 0, -1), axis=axis) ref_chunk1 = arr[ref_slice] # Memory/computationally more expensive, only do this if `method='odd'` if 'odd' in method and pad_amt[0] > 0: - edge_slice1 = tuple(slice(None) if i != axis else slice(0, 1) - for (i, x) in enumerate(arr.shape)) + edge_slice1 = _slice_first(arr.shape, 1, axis=axis) edge_chunk = arr[edge_slice1] ref_chunk1 = 2 * edge_chunk - ref_chunk1 del edge_chunk @@ -756,15 +752,12 @@ def _pad_ref(arr, pad_amt, method, axis=-1): # Slice off a reverse indexed chunk from far edge to pad `arr` after start = arr.shape[axis] - pad_amt[1] - 1 end = arr.shape[axis] - 1 - ref_slice = tuple(slice(None) if i != axis else slice(start, end) - for (i, x) in enumerate(arr.shape)) - rev_idx = tuple(slice(None) if i != axis else slice(None, None, -1) - for (i, x) in enumerate(arr.shape)) + ref_slice = _slice_at_axis(arr.shape, slice(start, end), axis=axis) + rev_idx = _slice_at_axis(arr.shape, slice(None, None, -1), axis=axis) ref_chunk2 = arr[ref_slice][rev_idx] if 'odd' in method: - edge_slice2 = tuple(slice(None) if i != axis else slice(x - 1, x) - for (i, x) in enumerate(arr.shape)) + edge_slice2 = _slice_last(arr.shape, 1, axis=axis) edge_chunk = arr[edge_slice2] ref_chunk2 = 2 * edge_chunk - ref_chunk2 del edge_chunk @@ -813,16 +806,13 @@ def _pad_sym(arr, pad_amt, method, axis=-1): # Prepended region # Slice off a reverse indexed chunk from near edge to pad `arr` before - sym_slice = tuple(slice(None) if i != axis else slice(0, pad_amt[0]) - for (i, x) in enumerate(arr.shape)) - rev_idx = tuple(slice(None) if i != axis else slice(None, None, -1) - for (i, x) in enumerate(arr.shape)) + sym_slice = _slice_first(arr.shape, pad_amt[0], axis=axis) + rev_idx = _slice_at_axis(arr.shape, slice(None, None, -1), axis=axis) sym_chunk1 = arr[sym_slice][rev_idx] # Memory/computationally more expensive, only do this if `method='odd'` if 'odd' in method and pad_amt[0] > 0: - edge_slice1 = tuple(slice(None) if i != axis else slice(0, 1) - for (i, x) in enumerate(arr.shape)) + edge_slice1 = _slice_first(arr.shape, 1, axis=axis) edge_chunk = arr[edge_slice1] sym_chunk1 = 2 * edge_chunk - sym_chunk1 del edge_chunk @@ -831,15 +821,11 @@ def _pad_sym(arr, pad_amt, method, axis=-1): # Appended region # Slice off a reverse indexed chunk from far edge to pad `arr` after - start = arr.shape[axis] - pad_amt[1] - end = arr.shape[axis] - sym_slice = tuple(slice(None) if i != axis else slice(start, end) - for (i, x) in enumerate(arr.shape)) + sym_slice = _slice_last(arr.shape, pad_amt[1], axis=axis) sym_chunk2 = arr[sym_slice][rev_idx] if 'odd' in method: - edge_slice2 = tuple(slice(None) if i != axis else slice(x - 1, x) - for (i, x) in enumerate(arr.shape)) + edge_slice2 = _slice_last(arr.shape, 1, axis=axis) edge_chunk = arr[edge_slice2] sym_chunk2 = 2 * edge_chunk - sym_chunk2 del edge_chunk @@ -885,18 +871,14 @@ def _pad_wrap(arr, pad_amt, axis=-1): # Prepended region # Slice off a reverse indexed chunk from near edge to pad `arr` before - start = arr.shape[axis] - pad_amt[0] - end = arr.shape[axis] - wrap_slice = tuple(slice(None) if i != axis else slice(start, end) - for (i, x) in enumerate(arr.shape)) + wrap_slice = _slice_last(arr.shape, pad_amt[0], axis=axis) wrap_chunk1 = arr[wrap_slice] ########################################################################## # Appended region # Slice off a reverse indexed chunk from far edge to pad `arr` after - wrap_slice = tuple(slice(None) if i != axis else slice(0, pad_amt[1]) - for (i, x) in enumerate(arr.shape)) + wrap_slice = _slice_first(arr.shape, pad_amt[1], axis=axis) wrap_chunk2 = arr[wrap_slice] # Concatenate `arr` with both chunks, extending along `axis` diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index e8eda297f..4d3f35183 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -298,7 +298,7 @@ def _unique1d(ar, return_index=False, return_inverse=False, return ret -def intersect1d(ar1, ar2, assume_unique=False): +def intersect1d(ar1, ar2, assume_unique=False, return_indices=False): """ Find the intersection of two arrays. @@ -307,15 +307,28 @@ def intersect1d(ar1, ar2, assume_unique=False): Parameters ---------- ar1, ar2 : array_like - Input arrays. + Input arrays. Will be flattened if not already 1D. assume_unique : bool If True, the input arrays are both assumed to be unique, which can speed up the calculation. Default is False. - + return_indices : bool + If True, the indices which correspond to the intersection of the + two arrays are returned. The first instance of a value is used + if there are multiple. Default is False. + + .. versionadded:: 1.15.0 + Returns ------- intersect1d : ndarray Sorted 1D array of common and unique elements. + comm1 : ndarray + The indices of the first occurrences of the common values in `ar1`. + Only provided if `return_indices` is True. + comm2 : ndarray + The indices of the first occurrences of the common values in `ar2`. + Only provided if `return_indices` is True. + See Also -------- @@ -332,14 +345,49 @@ def intersect1d(ar1, ar2, assume_unique=False): >>> from functools import reduce >>> reduce(np.intersect1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2])) array([3]) + + To return the indices of the values common to the input arrays + along with the intersected values: + >>> x = np.array([1, 1, 2, 3, 4]) + >>> y = np.array([2, 1, 4, 6]) + >>> xy, x_ind, y_ind = np.intersect1d(x, y, return_indices=True) + >>> x_ind, y_ind + (array([0, 2, 4]), array([1, 0, 2])) + >>> xy, x[x_ind], y[y_ind] + (array([1, 2, 4]), array([1, 2, 4]), array([1, 2, 4])) + """ if not assume_unique: - # Might be faster than unique( intersect1d( ar1, ar2 ) )? - ar1 = unique(ar1) - ar2 = unique(ar2) + if return_indices: + ar1, ind1 = unique(ar1, return_index=True) + ar2, ind2 = unique(ar2, return_index=True) + else: + ar1 = unique(ar1) + ar2 = unique(ar2) + else: + ar1 = ar1.ravel() + ar2 = ar2.ravel() + aux = np.concatenate((ar1, ar2)) - aux.sort() - return aux[:-1][aux[1:] == aux[:-1]] + if return_indices: + aux_sort_indices = np.argsort(aux, kind='mergesort') + aux = aux[aux_sort_indices] + else: + aux.sort() + + mask = aux[1:] == aux[:-1] + int1d = aux[:-1][mask] + + if return_indices: + ar1_indices = aux_sort_indices[:-1][mask] + ar2_indices = aux_sort_indices[1:][mask] - ar1.size + if not assume_unique: + ar1_indices = ind1[ar1_indices] + ar2_indices = ind2[ar2_indices] + + return int1d, ar1_indices, ar2_indices + else: + return int1d def setxor1d(ar1, ar2, assume_unique=False): """ @@ -660,3 +708,4 @@ def setdiff1d(ar1, ar2, assume_unique=False): ar1 = unique(ar1) ar2 = unique(ar2) return ar1[in1d(ar1, ar2, assume_unique=True, invert=True)] + diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py index 984a3b15e..dace5ade8 100644 --- a/numpy/lib/tests/test_arraysetops.py +++ b/numpy/lib/tests/test_arraysetops.py @@ -32,7 +32,46 @@ class TestSetOps(object): assert_array_equal(c, ed) assert_array_equal([], intersect1d([], [])) - + + def test_intersect1d_indices(self): + # unique inputs + a = np.array([1, 2, 3, 4]) + b = np.array([2, 1, 4, 6]) + c, i1, i2 = intersect1d(a, b, assume_unique=True, return_indices=True) + ee = np.array([1, 2, 4]) + assert_array_equal(c, ee) + assert_array_equal(a[i1], ee) + assert_array_equal(b[i2], ee) + + # non-unique inputs + a = np.array([1, 2, 2, 3, 4, 3, 2]) + b = np.array([1, 8, 4, 2, 2, 3, 2, 3]) + c, i1, i2 = intersect1d(a, b, return_indices=True) + ef = np.array([1, 2, 3, 4]) + assert_array_equal(c, ef) + assert_array_equal(a[i1], ef) + assert_array_equal(b[i2], ef) + + # non1d, unique inputs + a = np.array([[2, 4, 5, 6], [7, 8, 1, 15]]) + b = np.array([[3, 2, 7, 6], [10, 12, 8, 9]]) + c, i1, i2 = intersect1d(a, b, assume_unique=True, return_indices=True) + ui1 = np.unravel_index(i1, a.shape) + ui2 = np.unravel_index(i2, b.shape) + ea = np.array([2, 6, 7, 8]) + assert_array_equal(ea, a[ui1]) + assert_array_equal(ea, b[ui2]) + + # non1d, not assumed to be uniqueinputs + a = np.array([[2, 4, 5, 6, 6], [4, 7, 8, 7, 2]]) + b = np.array([[3, 2, 7, 7], [10, 12, 8, 7]]) + c, i1, i2 = intersect1d(a, b, return_indices=True) + ui1 = np.unravel_index(i1, a.shape) + ui2 = np.unravel_index(i2, b.shape) + ea = np.array([2, 7, 8]) + assert_array_equal(ea, a[ui1]) + assert_array_equal(ea, b[ui2]) + def test_setxor1d(self): a = np.array([5, 7, 1, 2]) b = np.array([2, 4, 3, 1, 5]) |