diff options
Diffstat (limited to 'numpy/lib')
-rw-r--r-- | numpy/lib/arraypad.py | 289 | ||||
-rw-r--r-- | numpy/lib/arraysetops.py | 65 | ||||
-rw-r--r-- | numpy/lib/format.py | 13 | ||||
-rw-r--r-- | numpy/lib/function_base.py | 114 | ||||
-rw-r--r-- | numpy/lib/histograms.py | 36 | ||||
-rw-r--r-- | numpy/lib/index_tricks.py | 2 | ||||
-rw-r--r-- | numpy/lib/nanfunctions.py | 110 | ||||
-rw-r--r-- | numpy/lib/npyio.py | 30 | ||||
-rw-r--r-- | numpy/lib/polynomial.py | 5 | ||||
-rw-r--r-- | numpy/lib/scimath.py | 2 | ||||
-rw-r--r-- | numpy/lib/shape_base.py | 227 | ||||
-rw-r--r-- | numpy/lib/tests/test_arraypad.py | 13 | ||||
-rw-r--r-- | numpy/lib/tests/test_arraysetops.py | 43 | ||||
-rw-r--r-- | numpy/lib/tests/test_function_base.py | 47 | ||||
-rw-r--r-- | numpy/lib/tests/test_histograms.py | 43 | ||||
-rw-r--r-- | numpy/lib/tests/test_index_tricks.py | 42 | ||||
-rw-r--r-- | numpy/lib/tests/test_io.py | 2 | ||||
-rw-r--r-- | numpy/lib/tests/test_nanfunctions.py | 141 | ||||
-rw-r--r-- | numpy/lib/tests/test_shape_base.py | 118 | ||||
-rw-r--r-- | numpy/lib/twodim_base.py | 2 |
20 files changed, 950 insertions, 394 deletions
diff --git a/numpy/lib/arraypad.py b/numpy/lib/arraypad.py index daaa68d06..e9ca9de4d 100644 --- a/numpy/lib/arraypad.py +++ b/numpy/lib/arraypad.py @@ -74,6 +74,35 @@ def _round_ifneeded(arr, dtype): arr.round(out=arr) +def _slice_at_axis(shape, sl, axis): + """ + Construct a slice tuple the length of shape, with sl at the specified axis + """ + slice_tup = (slice(None),) + return slice_tup * axis + (sl,) + slice_tup * (len(shape) - axis - 1) + + +def _slice_first(shape, n, axis): + """ Construct a slice tuple to take the first n elements along axis """ + return _slice_at_axis(shape, slice(0, n), axis=axis) + + +def _slice_last(shape, n, axis): + """ Construct a slice tuple to take the last n elements along axis """ + dim = shape[axis] # doing this explicitly makes n=0 work + return _slice_at_axis(shape, slice(dim - n, dim), axis=axis) + + +def _do_prepend(arr, pad_chunk, axis): + return np.concatenate( + (pad_chunk.astype(arr.dtype, copy=False), arr), axis=axis) + + +def _do_append(arr, pad_chunk, axis): + return np.concatenate( + (arr, pad_chunk.astype(arr.dtype, copy=False)), axis=axis) + + def _prepend_const(arr, pad_amt, val, axis=-1): """ Prepend constant `val` along `axis` of `arr`. @@ -100,12 +129,7 @@ def _prepend_const(arr, pad_amt, val, axis=-1): return arr padshape = tuple(x if i != axis else pad_amt for (i, x) in enumerate(arr.shape)) - if val == 0: - return np.concatenate((np.zeros(padshape, dtype=arr.dtype), arr), - axis=axis) - else: - return np.concatenate(((np.zeros(padshape) + val).astype(arr.dtype), - arr), axis=axis) + return _do_prepend(arr, np.full(padshape, val, dtype=arr.dtype), axis) def _append_const(arr, pad_amt, val, axis=-1): @@ -134,12 +158,8 @@ def _append_const(arr, pad_amt, val, axis=-1): return arr padshape = tuple(x if i != axis else pad_amt for (i, x) in enumerate(arr.shape)) - if val == 0: - return np.concatenate((arr, np.zeros(padshape, dtype=arr.dtype)), - axis=axis) - else: - return np.concatenate( - (arr, (np.zeros(padshape) + val).astype(arr.dtype)), axis=axis) + return _do_append(arr, np.full(padshape, val, dtype=arr.dtype), axis) + def _prepend_edge(arr, pad_amt, axis=-1): @@ -164,15 +184,9 @@ def _prepend_edge(arr, pad_amt, axis=-1): if pad_amt == 0: return arr - edge_slice = tuple(slice(None) if i != axis else 0 - for (i, x) in enumerate(arr.shape)) - - # Shape to restore singleton dimension after slicing - pad_singleton = tuple(x if i != axis else 1 - for (i, x) in enumerate(arr.shape)) - edge_arr = arr[edge_slice].reshape(pad_singleton) - return np.concatenate((edge_arr.repeat(pad_amt, axis=axis), arr), - axis=axis) + edge_slice = _slice_first(arr.shape, 1, axis=axis) + edge_arr = arr[edge_slice] + return _do_prepend(arr, edge_arr.repeat(pad_amt, axis=axis), axis) def _append_edge(arr, pad_amt, axis=-1): @@ -198,15 +212,9 @@ def _append_edge(arr, pad_amt, axis=-1): if pad_amt == 0: return arr - edge_slice = tuple(slice(None) if i != axis else arr.shape[axis] - 1 - for (i, x) in enumerate(arr.shape)) - - # Shape to restore singleton dimension after slicing - pad_singleton = tuple(x if i != axis else 1 - for (i, x) in enumerate(arr.shape)) - edge_arr = arr[edge_slice].reshape(pad_singleton) - return np.concatenate((arr, edge_arr.repeat(pad_amt, axis=axis)), - axis=axis) + edge_slice = _slice_last(arr.shape, 1, axis=axis) + edge_arr = arr[edge_slice] + return _do_append(arr, edge_arr.repeat(pad_amt, axis=axis), axis) def _prepend_ramp(arr, pad_amt, end, axis=-1): @@ -244,15 +252,10 @@ def _prepend_ramp(arr, pad_amt, end, axis=-1): reverse=True).astype(np.float64) # Appropriate slicing to extract n-dimensional edge along `axis` - edge_slice = tuple(slice(None) if i != axis else 0 - for (i, x) in enumerate(arr.shape)) + edge_slice = _slice_first(arr.shape, 1, axis=axis) - # Shape to restore singleton dimension after slicing - pad_singleton = tuple(x if i != axis else 1 - for (i, x) in enumerate(arr.shape)) - - # Extract edge, reshape to original rank, and extend along `axis` - edge_pad = arr[edge_slice].reshape(pad_singleton).repeat(pad_amt, axis) + # Extract edge, and extend along `axis` + edge_pad = arr[edge_slice].repeat(pad_amt, axis) # Linear ramp slope = (end - edge_pad) / float(pad_amt) @@ -261,7 +264,7 @@ def _prepend_ramp(arr, pad_amt, end, axis=-1): _round_ifneeded(ramp_arr, arr.dtype) # Ramp values will most likely be float, cast them to the same type as arr - return np.concatenate((ramp_arr.astype(arr.dtype), arr), axis=axis) + return _do_prepend(arr, ramp_arr, axis) def _append_ramp(arr, pad_amt, end, axis=-1): @@ -299,15 +302,10 @@ def _append_ramp(arr, pad_amt, end, axis=-1): reverse=False).astype(np.float64) # Slice a chunk from the edge to calculate stats on - edge_slice = tuple(slice(None) if i != axis else -1 - for (i, x) in enumerate(arr.shape)) - - # Shape to restore singleton dimension after slicing - pad_singleton = tuple(x if i != axis else 1 - for (i, x) in enumerate(arr.shape)) + edge_slice = _slice_last(arr.shape, 1, axis=axis) - # Extract edge, reshape to original rank, and extend along `axis` - edge_pad = arr[edge_slice].reshape(pad_singleton).repeat(pad_amt, axis) + # Extract edge, and extend along `axis` + edge_pad = arr[edge_slice].repeat(pad_amt, axis) # Linear ramp slope = (end - edge_pad) / float(pad_amt) @@ -316,7 +314,7 @@ def _append_ramp(arr, pad_amt, end, axis=-1): _round_ifneeded(ramp_arr, arr.dtype) # Ramp values will most likely be float, cast them to the same type as arr - return np.concatenate((arr, ramp_arr.astype(arr.dtype)), axis=axis) + return _do_append(arr, ramp_arr, axis) def _prepend_max(arr, pad_amt, num, axis=-1): @@ -356,19 +354,13 @@ def _prepend_max(arr, pad_amt, num, axis=-1): num = None # Slice a chunk from the edge to calculate stats on - max_slice = tuple(slice(None) if i != axis else slice(num) - for (i, x) in enumerate(arr.shape)) + max_slice = _slice_first(arr.shape, num, axis=axis) - # Shape to restore singleton dimension after slicing - pad_singleton = tuple(x if i != axis else 1 - for (i, x) in enumerate(arr.shape)) - - # Extract slice, calculate max, reshape to add singleton dimension back - max_chunk = arr[max_slice].max(axis=axis).reshape(pad_singleton) + # Extract slice, calculate max + max_chunk = arr[max_slice].max(axis=axis, keepdims=True) # Concatenate `arr` with `max_chunk`, extended along `axis` by `pad_amt` - return np.concatenate((max_chunk.repeat(pad_amt, axis=axis), arr), - axis=axis) + return _do_prepend(arr, max_chunk.repeat(pad_amt, axis=axis), axis) def _append_max(arr, pad_amt, num, axis=-1): @@ -407,24 +399,16 @@ def _append_max(arr, pad_amt, num, axis=-1): num = None # Slice a chunk from the edge to calculate stats on - end = arr.shape[axis] - 1 if num is not None: - max_slice = tuple( - slice(None) if i != axis else slice(end, end - num, -1) - for (i, x) in enumerate(arr.shape)) + max_slice = _slice_last(arr.shape, num, axis=axis) else: max_slice = tuple(slice(None) for x in arr.shape) - # Shape to restore singleton dimension after slicing - pad_singleton = tuple(x if i != axis else 1 - for (i, x) in enumerate(arr.shape)) - - # Extract slice, calculate max, reshape to add singleton dimension back - max_chunk = arr[max_slice].max(axis=axis).reshape(pad_singleton) + # Extract slice, calculate max + max_chunk = arr[max_slice].max(axis=axis, keepdims=True) # Concatenate `arr` with `max_chunk`, extended along `axis` by `pad_amt` - return np.concatenate((arr, max_chunk.repeat(pad_amt, axis=axis)), - axis=axis) + return _do_append(arr, max_chunk.repeat(pad_amt, axis=axis), axis) def _prepend_mean(arr, pad_amt, num, axis=-1): @@ -463,20 +447,14 @@ def _prepend_mean(arr, pad_amt, num, axis=-1): num = None # Slice a chunk from the edge to calculate stats on - mean_slice = tuple(slice(None) if i != axis else slice(num) - for (i, x) in enumerate(arr.shape)) + mean_slice = _slice_first(arr.shape, num, axis=axis) - # Shape to restore singleton dimension after slicing - pad_singleton = tuple(x if i != axis else 1 - for (i, x) in enumerate(arr.shape)) - - # Extract slice, calculate mean, reshape to add singleton dimension back - mean_chunk = arr[mean_slice].mean(axis).reshape(pad_singleton) + # Extract slice, calculate mean + mean_chunk = arr[mean_slice].mean(axis, keepdims=True) _round_ifneeded(mean_chunk, arr.dtype) # Concatenate `arr` with `mean_chunk`, extended along `axis` by `pad_amt` - return np.concatenate((mean_chunk.repeat(pad_amt, axis).astype(arr.dtype), - arr), axis=axis) + return _do_prepend(arr, mean_chunk.repeat(pad_amt, axis), axis=axis) def _append_mean(arr, pad_amt, num, axis=-1): @@ -515,25 +493,17 @@ def _append_mean(arr, pad_amt, num, axis=-1): num = None # Slice a chunk from the edge to calculate stats on - end = arr.shape[axis] - 1 if num is not None: - mean_slice = tuple( - slice(None) if i != axis else slice(end, end - num, -1) - for (i, x) in enumerate(arr.shape)) + mean_slice = _slice_last(arr.shape, num, axis=axis) else: mean_slice = tuple(slice(None) for x in arr.shape) - # Shape to restore singleton dimension after slicing - pad_singleton = tuple(x if i != axis else 1 - for (i, x) in enumerate(arr.shape)) - - # Extract slice, calculate mean, reshape to add singleton dimension back - mean_chunk = arr[mean_slice].mean(axis=axis).reshape(pad_singleton) + # Extract slice, calculate mean + mean_chunk = arr[mean_slice].mean(axis=axis, keepdims=True) _round_ifneeded(mean_chunk, arr.dtype) # Concatenate `arr` with `mean_chunk`, extended along `axis` by `pad_amt` - return np.concatenate( - (arr, mean_chunk.repeat(pad_amt, axis).astype(arr.dtype)), axis=axis) + return _do_append(arr, mean_chunk.repeat(pad_amt, axis), axis=axis) def _prepend_med(arr, pad_amt, num, axis=-1): @@ -572,20 +542,14 @@ def _prepend_med(arr, pad_amt, num, axis=-1): num = None # Slice a chunk from the edge to calculate stats on - med_slice = tuple(slice(None) if i != axis else slice(num) - for (i, x) in enumerate(arr.shape)) - - # Shape to restore singleton dimension after slicing - pad_singleton = tuple(x if i != axis else 1 - for (i, x) in enumerate(arr.shape)) + med_slice = _slice_first(arr.shape, num, axis=axis) - # Extract slice, calculate median, reshape to add singleton dimension back - med_chunk = np.median(arr[med_slice], axis=axis).reshape(pad_singleton) + # Extract slice, calculate median + med_chunk = np.median(arr[med_slice], axis=axis, keepdims=True) _round_ifneeded(med_chunk, arr.dtype) # Concatenate `arr` with `med_chunk`, extended along `axis` by `pad_amt` - return np.concatenate( - (med_chunk.repeat(pad_amt, axis).astype(arr.dtype), arr), axis=axis) + return _do_prepend(arr, med_chunk.repeat(pad_amt, axis), axis=axis) def _append_med(arr, pad_amt, num, axis=-1): @@ -624,25 +588,17 @@ def _append_med(arr, pad_amt, num, axis=-1): num = None # Slice a chunk from the edge to calculate stats on - end = arr.shape[axis] - 1 if num is not None: - med_slice = tuple( - slice(None) if i != axis else slice(end, end - num, -1) - for (i, x) in enumerate(arr.shape)) + med_slice = _slice_last(arr.shape, num, axis=axis) else: med_slice = tuple(slice(None) for x in arr.shape) - # Shape to restore singleton dimension after slicing - pad_singleton = tuple(x if i != axis else 1 - for (i, x) in enumerate(arr.shape)) - - # Extract slice, calculate median, reshape to add singleton dimension back - med_chunk = np.median(arr[med_slice], axis=axis).reshape(pad_singleton) + # Extract slice, calculate median + med_chunk = np.median(arr[med_slice], axis=axis, keepdims=True) _round_ifneeded(med_chunk, arr.dtype) # Concatenate `arr` with `med_chunk`, extended along `axis` by `pad_amt` - return np.concatenate( - (arr, med_chunk.repeat(pad_amt, axis).astype(arr.dtype)), axis=axis) + return _do_append(arr, med_chunk.repeat(pad_amt, axis), axis=axis) def _prepend_min(arr, pad_amt, num, axis=-1): @@ -682,19 +638,13 @@ def _prepend_min(arr, pad_amt, num, axis=-1): num = None # Slice a chunk from the edge to calculate stats on - min_slice = tuple(slice(None) if i != axis else slice(num) - for (i, x) in enumerate(arr.shape)) - - # Shape to restore singleton dimension after slicing - pad_singleton = tuple(x if i != axis else 1 - for (i, x) in enumerate(arr.shape)) + min_slice = _slice_first(arr.shape, num, axis=axis) - # Extract slice, calculate min, reshape to add singleton dimension back - min_chunk = arr[min_slice].min(axis=axis).reshape(pad_singleton) + # Extract slice, calculate min + min_chunk = arr[min_slice].min(axis=axis, keepdims=True) # Concatenate `arr` with `min_chunk`, extended along `axis` by `pad_amt` - return np.concatenate((min_chunk.repeat(pad_amt, axis=axis), arr), - axis=axis) + return _do_prepend(arr, min_chunk.repeat(pad_amt, axis), axis=axis) def _append_min(arr, pad_amt, num, axis=-1): @@ -733,24 +683,16 @@ def _append_min(arr, pad_amt, num, axis=-1): num = None # Slice a chunk from the edge to calculate stats on - end = arr.shape[axis] - 1 if num is not None: - min_slice = tuple( - slice(None) if i != axis else slice(end, end - num, -1) - for (i, x) in enumerate(arr.shape)) + min_slice = _slice_last(arr.shape, num, axis=axis) else: min_slice = tuple(slice(None) for x in arr.shape) - # Shape to restore singleton dimension after slicing - pad_singleton = tuple(x if i != axis else 1 - for (i, x) in enumerate(arr.shape)) - - # Extract slice, calculate min, reshape to add singleton dimension back - min_chunk = arr[min_slice].min(axis=axis).reshape(pad_singleton) + # Extract slice, calculate min + min_chunk = arr[min_slice].min(axis=axis, keepdims=True) # Concatenate `arr` with `min_chunk`, extended along `axis` by `pad_amt` - return np.concatenate((arr, min_chunk.repeat(pad_amt, axis=axis)), - axis=axis) + return _do_append(arr, min_chunk.repeat(pad_amt, axis), axis=axis) def _pad_ref(arr, pad_amt, method, axis=-1): @@ -793,22 +735,14 @@ def _pad_ref(arr, pad_amt, method, axis=-1): # Prepended region # Slice off a reverse indexed chunk from near edge to pad `arr` before - ref_slice = tuple(slice(None) if i != axis else slice(pad_amt[0], 0, -1) - for (i, x) in enumerate(arr.shape)) + ref_slice = _slice_at_axis(arr.shape, slice(pad_amt[0], 0, -1), axis=axis) ref_chunk1 = arr[ref_slice] - # Shape to restore singleton dimension after slicing - pad_singleton = tuple(x if i != axis else 1 - for (i, x) in enumerate(arr.shape)) - if pad_amt[0] == 1: - ref_chunk1 = ref_chunk1.reshape(pad_singleton) - # Memory/computationally more expensive, only do this if `method='odd'` if 'odd' in method and pad_amt[0] > 0: - edge_slice1 = tuple(slice(None) if i != axis else 0 - for (i, x) in enumerate(arr.shape)) - edge_chunk = arr[edge_slice1].reshape(pad_singleton) + edge_slice1 = _slice_first(arr.shape, 1, axis=axis) + edge_chunk = arr[edge_slice1] ref_chunk1 = 2 * edge_chunk - ref_chunk1 del edge_chunk @@ -818,19 +752,13 @@ def _pad_ref(arr, pad_amt, method, axis=-1): # Slice off a reverse indexed chunk from far edge to pad `arr` after start = arr.shape[axis] - pad_amt[1] - 1 end = arr.shape[axis] - 1 - ref_slice = tuple(slice(None) if i != axis else slice(start, end) - for (i, x) in enumerate(arr.shape)) - rev_idx = tuple(slice(None) if i != axis else slice(None, None, -1) - for (i, x) in enumerate(arr.shape)) + ref_slice = _slice_at_axis(arr.shape, slice(start, end), axis=axis) + rev_idx = _slice_at_axis(arr.shape, slice(None, None, -1), axis=axis) ref_chunk2 = arr[ref_slice][rev_idx] - if pad_amt[1] == 1: - ref_chunk2 = ref_chunk2.reshape(pad_singleton) - if 'odd' in method: - edge_slice2 = tuple(slice(None) if i != axis else -1 - for (i, x) in enumerate(arr.shape)) - edge_chunk = arr[edge_slice2].reshape(pad_singleton) + edge_slice2 = _slice_last(arr.shape, 1, axis=axis) + edge_chunk = arr[edge_slice2] ref_chunk2 = 2 * edge_chunk - ref_chunk2 del edge_chunk @@ -878,23 +806,14 @@ def _pad_sym(arr, pad_amt, method, axis=-1): # Prepended region # Slice off a reverse indexed chunk from near edge to pad `arr` before - sym_slice = tuple(slice(None) if i != axis else slice(0, pad_amt[0]) - for (i, x) in enumerate(arr.shape)) - rev_idx = tuple(slice(None) if i != axis else slice(None, None, -1) - for (i, x) in enumerate(arr.shape)) + sym_slice = _slice_first(arr.shape, pad_amt[0], axis=axis) + rev_idx = _slice_at_axis(arr.shape, slice(None, None, -1), axis=axis) sym_chunk1 = arr[sym_slice][rev_idx] - # Shape to restore singleton dimension after slicing - pad_singleton = tuple(x if i != axis else 1 - for (i, x) in enumerate(arr.shape)) - if pad_amt[0] == 1: - sym_chunk1 = sym_chunk1.reshape(pad_singleton) - # Memory/computationally more expensive, only do this if `method='odd'` if 'odd' in method and pad_amt[0] > 0: - edge_slice1 = tuple(slice(None) if i != axis else 0 - for (i, x) in enumerate(arr.shape)) - edge_chunk = arr[edge_slice1].reshape(pad_singleton) + edge_slice1 = _slice_first(arr.shape, 1, axis=axis) + edge_chunk = arr[edge_slice1] sym_chunk1 = 2 * edge_chunk - sym_chunk1 del edge_chunk @@ -902,19 +821,12 @@ def _pad_sym(arr, pad_amt, method, axis=-1): # Appended region # Slice off a reverse indexed chunk from far edge to pad `arr` after - start = arr.shape[axis] - pad_amt[1] - end = arr.shape[axis] - sym_slice = tuple(slice(None) if i != axis else slice(start, end) - for (i, x) in enumerate(arr.shape)) + sym_slice = _slice_last(arr.shape, pad_amt[1], axis=axis) sym_chunk2 = arr[sym_slice][rev_idx] - if pad_amt[1] == 1: - sym_chunk2 = sym_chunk2.reshape(pad_singleton) - if 'odd' in method: - edge_slice2 = tuple(slice(None) if i != axis else -1 - for (i, x) in enumerate(arr.shape)) - edge_chunk = arr[edge_slice2].reshape(pad_singleton) + edge_slice2 = _slice_last(arr.shape, 1, axis=axis) + edge_chunk = arr[edge_slice2] sym_chunk2 = 2 * edge_chunk - sym_chunk2 del edge_chunk @@ -959,29 +871,16 @@ def _pad_wrap(arr, pad_amt, axis=-1): # Prepended region # Slice off a reverse indexed chunk from near edge to pad `arr` before - start = arr.shape[axis] - pad_amt[0] - end = arr.shape[axis] - wrap_slice = tuple(slice(None) if i != axis else slice(start, end) - for (i, x) in enumerate(arr.shape)) + wrap_slice = _slice_last(arr.shape, pad_amt[0], axis=axis) wrap_chunk1 = arr[wrap_slice] - # Shape to restore singleton dimension after slicing - pad_singleton = tuple(x if i != axis else 1 - for (i, x) in enumerate(arr.shape)) - if pad_amt[0] == 1: - wrap_chunk1 = wrap_chunk1.reshape(pad_singleton) - ########################################################################## # Appended region # Slice off a reverse indexed chunk from far edge to pad `arr` after - wrap_slice = tuple(slice(None) if i != axis else slice(0, pad_amt[1]) - for (i, x) in enumerate(arr.shape)) + wrap_slice = _slice_first(arr.shape, pad_amt[1], axis=axis) wrap_chunk2 = arr[wrap_slice] - if pad_amt[1] == 1: - wrap_chunk2 = wrap_chunk2.reshape(pad_singleton) - # Concatenate `arr` with both chunks, extending along `axis` return np.concatenate((wrap_chunk1, arr, wrap_chunk2), axis=axis) diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index e8eda297f..4d3f35183 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -298,7 +298,7 @@ def _unique1d(ar, return_index=False, return_inverse=False, return ret -def intersect1d(ar1, ar2, assume_unique=False): +def intersect1d(ar1, ar2, assume_unique=False, return_indices=False): """ Find the intersection of two arrays. @@ -307,15 +307,28 @@ def intersect1d(ar1, ar2, assume_unique=False): Parameters ---------- ar1, ar2 : array_like - Input arrays. + Input arrays. Will be flattened if not already 1D. assume_unique : bool If True, the input arrays are both assumed to be unique, which can speed up the calculation. Default is False. - + return_indices : bool + If True, the indices which correspond to the intersection of the + two arrays are returned. The first instance of a value is used + if there are multiple. Default is False. + + .. versionadded:: 1.15.0 + Returns ------- intersect1d : ndarray Sorted 1D array of common and unique elements. + comm1 : ndarray + The indices of the first occurrences of the common values in `ar1`. + Only provided if `return_indices` is True. + comm2 : ndarray + The indices of the first occurrences of the common values in `ar2`. + Only provided if `return_indices` is True. + See Also -------- @@ -332,14 +345,49 @@ def intersect1d(ar1, ar2, assume_unique=False): >>> from functools import reduce >>> reduce(np.intersect1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2])) array([3]) + + To return the indices of the values common to the input arrays + along with the intersected values: + >>> x = np.array([1, 1, 2, 3, 4]) + >>> y = np.array([2, 1, 4, 6]) + >>> xy, x_ind, y_ind = np.intersect1d(x, y, return_indices=True) + >>> x_ind, y_ind + (array([0, 2, 4]), array([1, 0, 2])) + >>> xy, x[x_ind], y[y_ind] + (array([1, 2, 4]), array([1, 2, 4]), array([1, 2, 4])) + """ if not assume_unique: - # Might be faster than unique( intersect1d( ar1, ar2 ) )? - ar1 = unique(ar1) - ar2 = unique(ar2) + if return_indices: + ar1, ind1 = unique(ar1, return_index=True) + ar2, ind2 = unique(ar2, return_index=True) + else: + ar1 = unique(ar1) + ar2 = unique(ar2) + else: + ar1 = ar1.ravel() + ar2 = ar2.ravel() + aux = np.concatenate((ar1, ar2)) - aux.sort() - return aux[:-1][aux[1:] == aux[:-1]] + if return_indices: + aux_sort_indices = np.argsort(aux, kind='mergesort') + aux = aux[aux_sort_indices] + else: + aux.sort() + + mask = aux[1:] == aux[:-1] + int1d = aux[:-1][mask] + + if return_indices: + ar1_indices = aux_sort_indices[:-1][mask] + ar2_indices = aux_sort_indices[1:][mask] - ar1.size + if not assume_unique: + ar1_indices = ind1[ar1_indices] + ar2_indices = ind2[ar2_indices] + + return int1d, ar1_indices, ar2_indices + else: + return int1d def setxor1d(ar1, ar2, assume_unique=False): """ @@ -660,3 +708,4 @@ def setdiff1d(ar1, ar2, assume_unique=False): ar1 = unique(ar1) ar2 = unique(ar2) return ar1[in1d(ar1, ar2, assume_unique=True, invert=True)] + diff --git a/numpy/lib/format.py b/numpy/lib/format.py index 363bb2101..23eac7e7d 100644 --- a/numpy/lib/format.py +++ b/numpy/lib/format.py @@ -1,5 +1,10 @@ """ -Define a simple format for saving numpy arrays to disk with the full +Binary serialization + +NPY format +========== + +A simple format for saving numpy arrays to disk with the full information about them. The ``.npy`` format is the standard binary file format in NumPy for @@ -143,8 +148,10 @@ data HEADER_LEN." Notes ----- -The ``.npy`` format, including reasons for creating it and a comparison of -alternatives, is described fully in the "npy-format" NEP. +The ``.npy`` format, including motivation for creating it and a comparison of +alternatives, is described in the `"npy-format" NEP +<http://www.numpy.org/neps/nep-0001-npy-format.html>`_, however details have +evolved with time and this document is more current. """ from __future__ import division, absolute_import, print_function diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py index 099b63c40..a6e3e07d3 100644 --- a/numpy/lib/function_base.py +++ b/numpy/lib/function_base.py @@ -54,7 +54,8 @@ __all__ = [ 'bincount', 'digitize', 'cov', 'corrcoef', 'msort', 'median', 'sinc', 'hamming', 'hanning', 'bartlett', 'blackman', 'kaiser', 'trapz', 'i0', 'add_newdoc', 'add_docstring', - 'meshgrid', 'delete', 'insert', 'append', 'interp', 'add_newdoc_ufunc' + 'meshgrid', 'delete', 'insert', 'append', 'interp', 'add_newdoc_ufunc', + 'quantile' ] @@ -1632,9 +1633,9 @@ def disp(mesg, device=None, linefeed=True): Besides ``sys.stdout``, a file-like object can also be used as it has both required methods: - >>> from StringIO import StringIO + >>> from io import StringIO >>> buf = StringIO() - >>> np.disp('"Display" in a file', device=buf) + >>> np.disp(u'"Display" in a file', device=buf) >>> buf.getvalue() '"Display" in a file\\n' @@ -3427,7 +3428,7 @@ def percentile(a, q, axis=None, out=None, interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} This optional parameter specifies the interpolation method to - use when the desired quantile lies between two data points + use when the desired percentile lies between two data points ``i < j``: * 'linear': ``i + (j - i) * fraction``, where ``fraction`` @@ -3463,6 +3464,7 @@ def percentile(a, q, axis=None, out=None, mean median : equivalent to ``percentile(..., 50)`` nanpercentile + quantile : equivalent to percentile, except with q in the range [0, 1]. Notes ----- @@ -3539,6 +3541,110 @@ def percentile(a, q, axis=None, out=None, a, q, axis, out, overwrite_input, interpolation, keepdims) +def quantile(a, q, axis=None, out=None, + overwrite_input=False, interpolation='linear', keepdims=False): + """ + Compute the `q`th quantile of the data along the specified axis. + ..versionadded:: 1.15.0 + + Parameters + ---------- + a : array_like + Input array or object that can be converted to an array. + q : array_like of float + Quantile or sequence of quantiles to compute, which must be between + 0 and 1 inclusive. + axis : {int, tuple of int, None}, optional + Axis or axes along which the quantiles are computed. The + default is to compute the quantile(s) along a flattened + version of the array. + out : ndarray, optional + Alternative output array in which to place the result. It must + have the same shape and buffer length as the expected output, + but the type (of the output) will be cast if necessary. + overwrite_input : bool, optional + If True, then allow the input array `a` to be modified by intermediate + calculations, to save memory. In this case, the contents of the input + `a` after this function completes is undefined. + interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} + This optional parameter specifies the interpolation method to + use when the desired quantile lies between two data points + ``i < j``: + * linear: ``i + (j - i) * fraction``, where ``fraction`` + is the fractional part of the index surrounded by ``i`` + and ``j``. + * lower: ``i``. + * higher: ``j``. + * nearest: ``i`` or ``j``, whichever is nearest. + * midpoint: ``(i + j) / 2``. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left in + the result as dimensions with size one. With this option, the + result will broadcast correctly against the original array `a`. + + Returns + ------- + quantile : scalar or ndarray + If `q` is a single quantile and `axis=None`, then the result + is a scalar. If multiple quantiles are given, first axis of + the result corresponds to the quantiles. The other axes are + the axes that remain after the reduction of `a`. If the input + contains integers or floats smaller than ``float64``, the output + data-type is ``float64``. Otherwise, the output data-type is the + same as that of the input. If `out` is specified, that array is + returned instead. + + See Also + -------- + mean + percentile : equivalent to quantile, but with q in the range [0, 100]. + median : equivalent to ``quantile(..., 0.5)`` + nanquantile + + Notes + ----- + Given a vector ``V`` of length ``N``, the ``q``-th quantile of + ``V`` is the value ``q`` of the way from the minimum to the + maximum in a sorted copy of ``V``. The values and distances of + the two nearest neighbors as well as the `interpolation` parameter + will determine the quantile if the normalized ranking does not + match the location of ``q`` exactly. This function is the same as + the median if ``q=0.5``, the same as the minimum if ``q=0.0`` and the + same as the maximum if ``q=1.0``. + + Examples + -------- + >>> a = np.array([[10, 7, 4], [3, 2, 1]]) + >>> a + array([[10, 7, 4], + [ 3, 2, 1]]) + >>> np.quantile(a, 0.5) + 3.5 + >>> np.quantile(a, 0.5, axis=0) + array([[ 6.5, 4.5, 2.5]]) + >>> np.quantile(a, 0.5, axis=1) + array([ 7., 2.]) + >>> np.quantile(a, 0.5, axis=1, keepdims=True) + array([[ 7.], + [ 2.]]) + >>> m = np.quantile(a, 0.5, axis=0) + >>> out = np.zeros_like(m) + >>> np.quantile(a, 0.5, axis=0, out=out) + array([[ 6.5, 4.5, 2.5]]) + >>> m + array([[ 6.5, 4.5, 2.5]]) + >>> b = a.copy() + >>> np.quantile(b, 0.5, axis=1, overwrite_input=True) + array([ 7., 2.]) + >>> assert not np.all(a == b) + """ + q = np.asanyarray(q) + if not _quantile_is_valid(q): + raise ValueError("Quantiles must be in the range [0, 1]") + return _quantile_unchecked( + a, q, axis, out, overwrite_input, interpolation, keepdims) + + def _quantile_unchecked(a, q, axis=None, out=None, overwrite_input=False, interpolation='linear', keepdims=False): """Assumes that q is in [0, 1], and is an ndarray""" diff --git a/numpy/lib/histograms.py b/numpy/lib/histograms.py index d2a398a0a..2922b3a86 100644 --- a/numpy/lib/histograms.py +++ b/numpy/lib/histograms.py @@ -877,12 +877,6 @@ def histogramdd(sample, bins=10, range=None, normed=False, weights=None): # bins is an integer bins = D*[bins] - # avoid rounding issues for comparisons when dealing with inexact types - if np.issubdtype(sample.dtype, np.inexact): - edge_dt = sample.dtype - else: - edge_dt = float - # normalize the range argument if range is None: range = (None,) * D @@ -896,13 +890,12 @@ def histogramdd(sample, bins=10, range=None, normed=False, weights=None): raise ValueError( '`bins[{}]` must be positive, when an integer'.format(i)) smin, smax = _get_outer_edges(sample[:,i], range[i]) - edges[i] = np.linspace(smin, smax, bins[i] + 1, dtype=edge_dt) + edges[i] = np.linspace(smin, smax, bins[i] + 1) elif np.ndim(bins[i]) == 1: - edges[i] = np.asarray(bins[i], edge_dt) - # not just monotonic, due to the use of mindiff below - if np.any(edges[i][:-1] >= edges[i][1:]): + edges[i] = np.asarray(bins[i]) + if np.any(edges[i][:-1] > edges[i][1:]): raise ValueError( - '`bins[{}]` must be strictly increasing, when an array' + '`bins[{}]` must be monotonically increasing, when an array' .format(i)) else: raise ValueError( @@ -911,13 +904,10 @@ def histogramdd(sample, bins=10, range=None, normed=False, weights=None): nbin[i] = len(edges[i]) + 1 # includes an outlier on each end dedges[i] = np.diff(edges[i]) - # Handle empty input. - if N == 0: - return np.zeros(nbin-2), edges - # Compute the bin number each sample falls into. Ncount = tuple( - np.digitize(sample[:, i], edges[i]) + # avoid np.digitize to work around gh-11022 + np.searchsorted(edges[i], sample[:, i], side='right') for i in _range(D) ) @@ -925,16 +915,10 @@ def histogramdd(sample, bins=10, range=None, normed=False, weights=None): # For the rightmost bin, we want values equal to the right edge to be # counted in the last bin, and not as an outlier. for i in _range(D): - # Rounding precision - mindiff = dedges[i].min() - if not np.isinf(mindiff): - decimal = int(-np.log10(mindiff)) + 6 - # Find which points are on the rightmost edge. - not_smaller_than_edge = (sample[:, i] >= edges[i][-1]) - on_edge = (np.around(sample[:, i], decimal) == - np.around(edges[i][-1], decimal)) - # Shift these points one bin to the left. - Ncount[i][on_edge & not_smaller_than_edge] -= 1 + # Find which points are on the rightmost edge. + on_edge = (sample[:, i] == edges[i][-1]) + # Shift these points one bin to the left. + Ncount[i][on_edge] -= 1 # Compute the sample indices in the flattened histogram matrix. # This raises an error if the array is too large. diff --git a/numpy/lib/index_tricks.py b/numpy/lib/index_tricks.py index 43fdc5627..d2139338e 100644 --- a/numpy/lib/index_tricks.py +++ b/numpy/lib/index_tricks.py @@ -201,7 +201,7 @@ class nd_grid(object): slobj = [_nx.newaxis]*len(size) for k in range(len(size)): slobj[k] = slice(None, None) - nn[k] = nn[k][slobj] + nn[k] = nn[k][tuple(slobj)] slobj[k] = _nx.newaxis return nn except (IndexError, TypeError): diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index dddc0e5b8..abd2da1a2 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -16,6 +16,7 @@ Functions - `nanvar` -- variance of non-NaN values - `nanstd` -- standard deviation of non-NaN values - `nanmedian` -- median of non-NaN values +- `nanquantile` -- qth quantile of non-NaN values - `nanpercentile` -- qth percentile of non-NaN values """ @@ -29,7 +30,7 @@ from numpy.lib import function_base __all__ = [ 'nansum', 'nanmax', 'nanmin', 'nanargmax', 'nanargmin', 'nanmean', 'nanmedian', 'nanpercentile', 'nanvar', 'nanstd', 'nanprod', - 'nancumsum', 'nancumprod' + 'nancumsum', 'nancumprod', 'nanquantile' ] @@ -1057,7 +1058,7 @@ def nanpercentile(a, q, axis=None, out=None, overwrite_input=False, `a` after this function completes is undefined. interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} This optional parameter specifies the interpolation method to - use when the desired quantile lies between two data points + use when the desired percentile lies between two data points ``i < j``: * 'linear': ``i + (j - i) * fraction``, where ``fraction`` @@ -1095,6 +1096,7 @@ def nanpercentile(a, q, axis=None, out=None, overwrite_input=False, nanmean nanmedian : equivalent to ``nanpercentile(..., 50)`` percentile, median, mean + nanquantile : equivalent to nanpercentile, but with q in the range [0, 1]. Notes ----- @@ -1144,6 +1146,110 @@ def nanpercentile(a, q, axis=None, out=None, overwrite_input=False, a, q, axis, out, overwrite_input, interpolation, keepdims) +def nanquantile(a, q, axis=None, out=None, overwrite_input=False, + interpolation='linear', keepdims=np._NoValue): + """ + Compute the qth quantile of the data along the specified axis, + while ignoring nan values. + Returns the qth quantile(s) of the array elements. + .. versionadded:: 1.15.0 + + Parameters + ---------- + a : array_like + Input array or object that can be converted to an array, containing + nan values to be ignored + q : array_like of float + Quantile or sequence of quantiles to compute, which must be between + 0 and 1 inclusive. + axis : {int, tuple of int, None}, optional + Axis or axes along which the quantiles are computed. The + default is to compute the quantile(s) along a flattened + version of the array. + out : ndarray, optional + Alternative output array in which to place the result. It must + have the same shape and buffer length as the expected output, + but the type (of the output) will be cast if necessary. + overwrite_input : bool, optional + If True, then allow the input array `a` to be modified by intermediate + calculations, to save memory. In this case, the contents of the input + `a` after this function completes is undefined. + interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} + This optional parameter specifies the interpolation method to + use when the desired quantile lies between two data points + ``i < j``: + * linear: ``i + (j - i) * fraction``, where ``fraction`` + is the fractional part of the index surrounded by ``i`` + and ``j``. + * lower: ``i``. + * higher: ``j``. + * nearest: ``i`` or ``j``, whichever is nearest. + * midpoint: ``(i + j) / 2``. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left in + the result as dimensions with size one. With this option, the + result will broadcast correctly against the original array `a`. + + If this is anything but the default value it will be passed + through (in the special case of an empty array) to the + `mean` function of the underlying array. If the array is + a sub-class and `mean` does not have the kwarg `keepdims` this + will raise a RuntimeError. + + Returns + ------- + quantile : scalar or ndarray + If `q` is a single percentile and `axis=None`, then the result + is a scalar. If multiple quantiles are given, first axis of + the result corresponds to the quantiles. The other axes are + the axes that remain after the reduction of `a`. If the input + contains integers or floats smaller than ``float64``, the output + data-type is ``float64``. Otherwise, the output data-type is the + same as that of the input. If `out` is specified, that array is + returned instead. + + See Also + -------- + quantile + nanmean, nanmedian + nanmedian : equivalent to ``nanquantile(..., 0.5)`` + nanpercentile : same as nanquantile, but with q in the range [0, 100]. + + Examples + -------- + >>> a = np.array([[10., 7., 4.], [3., 2., 1.]]) + >>> a[0][1] = np.nan + >>> a + array([[ 10., nan, 4.], + [ 3., 2., 1.]]) + >>> np.quantile(a, 0.5) + nan + >>> np.nanquantile(a, 0.5) + 3.5 + >>> np.nanquantile(a, 0.5, axis=0) + array([ 6.5, 2., 2.5]) + >>> np.nanquantile(a, 0.5, axis=1, keepdims=True) + array([[ 7.], + [ 2.]]) + >>> m = np.nanquantile(a, 0.5, axis=0) + >>> out = np.zeros_like(m) + >>> np.nanquantile(a, 0.5, axis=0, out=out) + array([ 6.5, 2., 2.5]) + >>> m + array([ 6.5, 2. , 2.5]) + >>> b = a.copy() + >>> np.nanquantile(b, 0.5, axis=1, overwrite_input=True) + array([ 7., 2.]) + >>> assert not np.all(a==b) + """ + a = np.asanyarray(a) + q = np.asanyarray(q) + if not function_base._quantile_is_valid(q): + raise ValueError("Quantiles must be in the range [0, 1]") + return _nanquantile_unchecked( + a, q, axis, out, overwrite_input, interpolation, keepdims) + + def _nanquantile_unchecked(a, q, axis=None, out=None, overwrite_input=False, interpolation='linear', keepdims=np._NoValue): """Assumes that q is in [0, 1], and is an ndarray""" diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 36589ce82..390927601 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -480,9 +480,7 @@ def save(file, arr, allow_pickle=True, fix_imports=True): Notes ----- - For a description of the ``.npy`` format, see the module docstring - of `numpy.lib.format` or the NumPy Enhancement Proposal - http://numpy.github.io/neps/npy-format.html + For a description of the ``.npy`` format, see :py:mod:`numpy.lib.format`. Examples -------- @@ -566,9 +564,7 @@ def savez(file, *args, **kwds): The ``.npz`` file format is a zipped archive of files named after the variables they contain. The archive is not compressed and each file in the archive contains one variable in ``.npy`` format. For a - description of the ``.npy`` format, see `numpy.lib.format` or the - NumPy Enhancement Proposal - http://numpy.github.io/neps/npy-format.html + description of the ``.npy`` format, see :py:mod:`numpy.lib.format`. When opening the saved ``.npz`` file with `load` a `NpzFile` object is returned. This is a dictionary-like object which can be queried for @@ -647,9 +643,9 @@ def savez_compressed(file, *args, **kwds): The ``.npz`` file format is a zipped archive of files named after the variables they contain. The archive is compressed with ``zipfile.ZIP_DEFLATED`` and each file in the archive contains one variable - in ``.npy`` format. For a description of the ``.npy`` format, see - `numpy.lib.format` or the NumPy Enhancement Proposal - http://numpy.github.io/neps/npy-format.html + in ``.npy`` format. For a description of the ``.npy`` format, see + :py:mod:`numpy.lib.format`. + When opening the saved ``.npz`` file with `load` a `NpzFile` object is returned. This is a dictionary-like object which can be queried for @@ -796,8 +792,8 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, the data-type. comments : str or sequence of str, optional The characters or list of characters used to indicate the start of a - comment. For backwards compatibility, byte strings will be decoded as - 'latin1'. The default is '#'. + comment. None implies no comments. For backwards compatibility, byte + strings will be decoded as 'latin1'. The default is '#'. delimiter : str, optional The string used to separate values. For backwards compatibility, byte strings will be decoded as 'latin1'. The default is whitespace. @@ -864,18 +860,18 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, Examples -------- >>> from io import StringIO # StringIO behaves like a file object - >>> c = StringIO("0 1\\n2 3") + >>> c = StringIO(u"0 1\\n2 3") >>> np.loadtxt(c) array([[ 0., 1.], [ 2., 3.]]) - >>> d = StringIO("M 21 72\\nF 35 58") + >>> d = StringIO(u"M 21 72\\nF 35 58") >>> np.loadtxt(d, dtype={'names': ('gender', 'age', 'weight'), ... 'formats': ('S1', 'i4', 'f4')}) array([('M', 21, 72.0), ('F', 35, 58.0)], dtype=[('gender', '|S1'), ('age', '<i4'), ('weight', '<f4')]) - >>> c = StringIO("1,0,2\\n3,0,4") + >>> c = StringIO(u"1,0,2\\n3,0,4") >>> x, y = np.loadtxt(c, delimiter=',', usecols=(0, 2), unpack=True) >>> x array([ 1., 3.]) @@ -941,7 +937,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, if encoding is not None: fencoding = encoding # we must assume local encoding - # TOOD emit portability warning? + # TODO emit portability warning? elif fencoding is None: import locale fencoding = locale.getpreferredencoding() @@ -1637,7 +1633,7 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, Comma delimited file with mixed dtype - >>> s = StringIO("1,1.3,abcde") + >>> s = StringIO(u"1,1.3,abcde") >>> data = np.genfromtxt(s, dtype=[('myint','i8'),('myfloat','f8'), ... ('mystring','S5')], delimiter=",") >>> data @@ -1664,7 +1660,7 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, An example with fixed-width columns - >>> s = StringIO("11.3abcde") + >>> s = StringIO(u"11.3abcde") >>> data = np.genfromtxt(s, dtype=None, names=['intvar','fltvar','strvar'], ... delimiter=[1,3,5]) >>> data diff --git a/numpy/lib/polynomial.py b/numpy/lib/polynomial.py index 41b5e2f64..078608bbb 100644 --- a/numpy/lib/polynomial.py +++ b/numpy/lib/polynomial.py @@ -113,11 +113,6 @@ def poly(seq_of_zeros): >>> np.poly(P) array([ 1. , 0. , 0.16666667]) - Or a square matrix object: - - >>> np.poly(np.matrix(P)) - array([ 1. , 0. , 0.16666667]) - Note how in all cases the leading coefficient is always 1. """ diff --git a/numpy/lib/scimath.py b/numpy/lib/scimath.py index e07caf805..f1838fee6 100644 --- a/numpy/lib/scimath.py +++ b/numpy/lib/scimath.py @@ -555,7 +555,7 @@ def arctanh(x): -------- >>> np.set_printoptions(precision=4) - >>> np.emath.arctanh(np.matrix(np.eye(2))) + >>> np.emath.arctanh(np.eye(2)) array([[ Inf, 0.], [ 0., Inf]]) >>> np.emath.arctanh([1j]) diff --git a/numpy/lib/shape_base.py b/numpy/lib/shape_base.py index 41ef28ef3..65104115a 100644 --- a/numpy/lib/shape_base.py +++ b/numpy/lib/shape_base.py @@ -16,10 +16,235 @@ from numpy.matrixlib.defmatrix import matrix # this raises all the right alarm __all__ = [ 'column_stack', 'row_stack', 'dstack', 'array_split', 'split', 'hsplit', 'vsplit', 'dsplit', 'apply_over_axes', 'expand_dims', - 'apply_along_axis', 'kron', 'tile', 'get_array_wrap' + 'apply_along_axis', 'kron', 'tile', 'get_array_wrap', 'take_along_axis', + 'put_along_axis' ] +def _make_along_axis_idx(arr_shape, indices, axis): + # compute dimensions to iterate over + if not _nx.issubdtype(indices.dtype, _nx.integer): + raise IndexError('`indices` must be an integer array') + if len(arr_shape) != indices.ndim: + raise ValueError( + "`indices` and `arr` must have the same number of dimensions") + shape_ones = (1,) * indices.ndim + dest_dims = list(range(axis)) + [None] + list(range(axis+1, indices.ndim)) + + # build a fancy index, consisting of orthogonal aranges, with the + # requested index inserted at the right location + fancy_index = [] + for dim, n in zip(dest_dims, arr_shape): + if dim is None: + fancy_index.append(indices) + else: + ind_shape = shape_ones[:dim] + (-1,) + shape_ones[dim+1:] + fancy_index.append(_nx.arange(n).reshape(ind_shape)) + + return tuple(fancy_index) + + +def take_along_axis(arr, indices, axis): + """ + Take values from the input array by matching 1d index and data slices. + + This iterates over matching 1d slices oriented along the specified axis in + the index and data arrays, and uses the former to look up values in the + latter. These slices can be different lengths. + + Functions returning an index along an axis, like `argsort` and + `argpartition`, produce suitable indices for this function. + + .. versionadded:: 1.15.0 + + Parameters + ---------- + arr: ndarray (Ni..., M, Nk...) + Source array + indices: ndarray (Ni..., J, Nk...) + Indices to take along each 1d slice of `arr`. This must match the + dimension of arr, but dimensions Ni and Nj only need to broadcast + against `arr`. + axis: int + The axis to take 1d slices along. If axis is None, the input array is + treated as if it had first been flattened to 1d, for consistency with + `sort` and `argsort`. + + Returns + ------- + out: ndarray (Ni..., J, Nk...) + The indexed result. + + Notes + ----- + This is equivalent to (but faster than) the following use of `ndindex` and + `s_`, which sets each of ``ii`` and ``kk`` to a tuple of indices:: + + Ni, M, Nk = a.shape[:axis], a.shape[axis], a.shape[axis+1:] + J = indices.shape[axis] # Need not equal M + out = np.empty(Nk + (J,) + Nk) + + for ii in ndindex(Ni): + for kk in ndindex(Nk): + a_1d = a [ii + s_[:,] + kk] + indices_1d = indices[ii + s_[:,] + kk] + out_1d = out [ii + s_[:,] + kk] + for j in range(J): + out_1d[j] = a_1d[indices_1d[j]] + + Equivalently, eliminating the inner loop, the last two lines would be:: + + out_1d[:] = a_1d[indices_1d] + + See Also + -------- + take : Take along an axis, using the same indices for every 1d slice + put_along_axis : + Put values into the destination array by matching 1d index and data slices + + Examples + -------- + + For this sample array + + >>> a = np.array([[10, 30, 20], [60, 40, 50]]) + + We can sort either by using sort directly, or argsort and this function + + >>> np.sort(a, axis=1) + array([[10, 20, 30], + [40, 50, 60]]) + >>> ai = np.argsort(a, axis=1); ai + array([[0, 2, 1], + [1, 2, 0]], dtype=int64) + >>> np.take_along_axis(a, ai, axis=1) + array([[10, 20, 30], + [40, 50, 60]]) + + The same works for max and min, if you expand the dimensions: + + >>> np.expand_dims(np.max(a, axis=1), axis=1) + array([[30], + [60]]) + >>> ai = np.expand_dims(np.argmax(a, axis=1), axis=1) + >>> ai + array([[1], + [0], dtype=int64) + >>> np.take_along_axis(a, ai, axis=1) + array([[30], + [60]]) + + If we want to get the max and min at the same time, we can stack the + indices first + + >>> ai_min = np.expand_dims(np.argmin(a, axis=1), axis=1) + >>> ai_max = np.expand_dims(np.argmax(a, axis=1), axis=1) + >>> ai = np.concatenate([ai_min, ai_max], axis=axis) + >> ai + array([[0, 1], + [1, 0]], dtype=int64) + >>> np.take_along_axis(a, ai, axis=1) + array([[10, 30], + [40, 60]]) + """ + # normalize inputs + if axis is None: + arr = arr.flat + arr_shape = (len(arr),) # flatiter has no .shape + axis = 0 + else: + axis = normalize_axis_index(axis, arr.ndim) + arr_shape = arr.shape + + # use the fancy index + return arr[_make_along_axis_idx(arr_shape, indices, axis)] + + +def put_along_axis(arr, indices, values, axis): + """ + Put values into the destination array by matching 1d index and data slices. + + This iterates over matching 1d slices oriented along the specified axis in + the index and data arrays, and uses the former to place values into the + latter. These slices can be different lengths. + + Functions returning an index along an axis, like `argsort` and + `argpartition`, produce suitable indices for this function. + + .. versionadded:: 1.15.0 + + Parameters + ---------- + arr: ndarray (Ni..., M, Nk...) + Destination array. + indices: ndarray (Ni..., J, Nk...) + Indices to change along each 1d slice of `arr`. This must match the + dimension of arr, but dimensions in Ni and Nj may be 1 to broadcast + against `arr`. + values: array_like (Ni..., J, Nk...) + values to insert at those indices. Its shape and dimension are + broadcast to match that of `indices`. + axis: int + The axis to take 1d slices along. If axis is None, the destination + array is treated as if a flattened 1d view had been created of it. + + Notes + ----- + This is equivalent to (but faster than) the following use of `ndindex` and + `s_`, which sets each of ``ii`` and ``kk`` to a tuple of indices:: + + Ni, M, Nk = a.shape[:axis], a.shape[axis], a.shape[axis+1:] + J = indices.shape[axis] # Need not equal M + + for ii in ndindex(Ni): + for kk in ndindex(Nk): + a_1d = a [ii + s_[:,] + kk] + indices_1d = indices[ii + s_[:,] + kk] + values_1d = values [ii + s_[:,] + kk] + for j in range(J): + a_1d[indices_1d[j]] = values_1d[j] + + Equivalently, eliminating the inner loop, the last two lines would be:: + + a_1d[indices_1d] = values_1d + + See Also + -------- + take_along_axis : + Take values from the input array by matching 1d index and data slices + + Examples + -------- + + For this sample array + + >>> a = np.array([[10, 30, 20], [60, 40, 50]]) + + We can replace the maximum values with: + + >>> ai = np.expand_dims(np.argmax(a, axis=1), axis=1) + >>> ai + array([[1], + [0]], dtype=int64) + >>> np.put_along_axis(a, ai, 99, axis=1) + >>> a + array([[10, 99, 20], + [99, 40, 50]]) + + """ + # normalize inputs + if axis is None: + arr = arr.flat + axis = 0 + arr_shape = (len(arr),) # flatiter has no .shape + else: + axis = normalize_axis_index(axis, arr.ndim) + arr_shape = arr.shape + + # use the fancy index + arr[_make_along_axis_idx(arr_shape, indices, axis)] = values + + def apply_along_axis(func1d, axis, arr, *args, **kwargs): """ Apply a function to 1-D slices along the given axis. diff --git a/numpy/lib/tests/test_arraypad.py b/numpy/lib/tests/test_arraypad.py index 8be49ce67..8ba0370b0 100644 --- a/numpy/lib/tests/test_arraypad.py +++ b/numpy/lib/tests/test_arraypad.py @@ -489,6 +489,19 @@ class TestConstant(object): ) assert_allclose(test, expected) + def test_check_large_integers(self): + uint64_max = 2 ** 64 - 1 + arr = np.full(5, uint64_max, dtype=np.uint64) + test = np.pad(arr, 1, mode="constant", constant_values=arr.min()) + expected = np.full(7, uint64_max, dtype=np.uint64) + assert_array_equal(test, expected) + + int64_max = 2 ** 63 - 1 + arr = np.full(5, int64_max, dtype=np.int64) + test = np.pad(arr, 1, mode="constant", constant_values=arr.min()) + expected = np.full(7, int64_max, dtype=np.int64) + assert_array_equal(test, expected) + class TestLinearRamp(object): def test_check_simple(self): diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py index 76c36c53e..dace5ade8 100644 --- a/numpy/lib/tests/test_arraysetops.py +++ b/numpy/lib/tests/test_arraysetops.py @@ -32,7 +32,46 @@ class TestSetOps(object): assert_array_equal(c, ed) assert_array_equal([], intersect1d([], [])) - + + def test_intersect1d_indices(self): + # unique inputs + a = np.array([1, 2, 3, 4]) + b = np.array([2, 1, 4, 6]) + c, i1, i2 = intersect1d(a, b, assume_unique=True, return_indices=True) + ee = np.array([1, 2, 4]) + assert_array_equal(c, ee) + assert_array_equal(a[i1], ee) + assert_array_equal(b[i2], ee) + + # non-unique inputs + a = np.array([1, 2, 2, 3, 4, 3, 2]) + b = np.array([1, 8, 4, 2, 2, 3, 2, 3]) + c, i1, i2 = intersect1d(a, b, return_indices=True) + ef = np.array([1, 2, 3, 4]) + assert_array_equal(c, ef) + assert_array_equal(a[i1], ef) + assert_array_equal(b[i2], ef) + + # non1d, unique inputs + a = np.array([[2, 4, 5, 6], [7, 8, 1, 15]]) + b = np.array([[3, 2, 7, 6], [10, 12, 8, 9]]) + c, i1, i2 = intersect1d(a, b, assume_unique=True, return_indices=True) + ui1 = np.unravel_index(i1, a.shape) + ui2 = np.unravel_index(i2, b.shape) + ea = np.array([2, 6, 7, 8]) + assert_array_equal(ea, a[ui1]) + assert_array_equal(ea, b[ui2]) + + # non1d, not assumed to be uniqueinputs + a = np.array([[2, 4, 5, 6, 6], [4, 7, 8, 7, 2]]) + b = np.array([[3, 2, 7, 7], [10, 12, 8, 7]]) + c, i1, i2 = intersect1d(a, b, return_indices=True) + ui1 = np.unravel_index(i1, a.shape) + ui2 = np.unravel_index(i2, b.shape) + ea = np.array([2, 7, 8]) + assert_array_equal(ea, a[ui1]) + assert_array_equal(ea, b[ui2]) + def test_setxor1d(self): a = np.array([5, 7, 1, 2]) b = np.array([2, 4, 3, 1, 5]) @@ -74,8 +113,6 @@ class TestSetOps(object): assert_array_equal([1,7,8], ediff1d(two_elem, to_end=[7,8])) assert_array_equal([7,1], ediff1d(two_elem, to_begin=7)) assert_array_equal([5,6,1], ediff1d(two_elem, to_begin=[5,6])) - assert(isinstance(ediff1d(np.matrix(1)), np.matrix)) - assert(isinstance(ediff1d(np.matrix(1), to_begin=1), np.matrix)) def test_isin(self): # the tests for in1d cover most of isin's behavior diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py index 6653b5ba1..4103a9eb3 100644 --- a/numpy/lib/tests/test_function_base.py +++ b/numpy/lib/tests/test_function_base.py @@ -287,9 +287,6 @@ class TestAverage(object): assert_almost_equal(y5.mean(0), average(y5, 0)) assert_almost_equal(y5.mean(1), average(y5, 1)) - y6 = np.matrix(rand(5, 5)) - assert_array_equal(y6.mean(0), average(y6, 0)) - def test_weights(self): y = np.arange(10) w = np.arange(10) @@ -357,14 +354,6 @@ class TestAverage(object): assert_equal(type(np.average(a)), subclass) assert_equal(type(np.average(a, weights=w)), subclass) - # also test matrices - a = np.matrix([[1,2],[3,4]]) - w = np.matrix([[1,2],[3,4]]) - - r = np.average(a, axis=0, weights=w) - assert_equal(type(r), np.matrix) - assert_equal(r, [[2.5, 10.0/3]]) - def test_upcasting(self): types = [('i4', 'i4', 'f8'), ('i4', 'f4', 'f8'), ('f4', 'i4', 'f8'), ('f4', 'f4', 'f4'), ('f4', 'f8', 'f8')] @@ -1525,9 +1514,9 @@ class TestDigitize(object): class TestUnwrap(object): def test_simple(self): - # check that unwrap removes jumps greather that 2*pi + # check that unwrap removes jumps greater that 2*pi assert_array_equal(unwrap([1, 1 + 2 * np.pi]), [1, 1]) - # check that unwrap maintans continuity + # check that unwrap maintains continuity assert_(np.all(diff(unwrap(rand(10) * 100)) < np.pi)) @@ -1623,16 +1612,6 @@ class TestTrapz(object): xm = np.ma.array(x, mask=mask) assert_almost_equal(trapz(y, xm), r) - def test_matrix(self): - # Test to make sure matrices give the same answer as ndarrays - x = np.linspace(0, 5) - y = x * x - r = trapz(y, x) - mx = np.matrix(x) - my = np.matrix(y) - mr = trapz(my, mx) - assert_almost_equal(mr, r) - class TestSinc(object): @@ -2749,6 +2728,28 @@ class TestPercentile(object): a, [0.3, 0.6], (0, 2), interpolation='nearest'), b) +class TestQuantile(object): + # most of this is already tested by TestPercentile + + def test_basic(self): + x = np.arange(8) * 0.5 + assert_equal(np.quantile(x, 0), 0.) + assert_equal(np.quantile(x, 1), 3.5) + assert_equal(np.quantile(x, 0.5), 1.75) + + def test_no_p_overwrite(self): + # this is worth retesting, because quantile does not make a copy + p0 = np.array([0, 0.75, 0.25, 0.5, 1.0]) + p = p0.copy() + np.quantile(np.arange(100.), p, interpolation="midpoint") + assert_array_equal(p, p0) + + p0 = p0.tolist() + p = p.tolist() + np.quantile(np.arange(100.), p, interpolation="midpoint") + assert_array_equal(p, p0) + + class TestMedian(object): def test_basic(self): diff --git a/numpy/lib/tests/test_histograms.py b/numpy/lib/tests/test_histograms.py index 06daacbdc..e16ae12c2 100644 --- a/numpy/lib/tests/test_histograms.py +++ b/numpy/lib/tests/test_histograms.py @@ -253,7 +253,7 @@ class TestHistogram(object): one_nan = np.array([0, 1, np.nan]) all_nan = np.array([np.nan, np.nan]) - # the internal commparisons with NaN give warnings + # the internal comparisons with NaN give warnings sup = suppress_warnings() sup.filter(RuntimeWarning) with sup: @@ -613,8 +613,6 @@ class TestHistogramdd(object): assert_raises(ValueError, np.histogramdd, x, bins=[-1, 2, 4, 5]) assert_raises(ValueError, np.histogramdd, x, bins=[1, 0.99, 1, 1]) assert_raises( - ValueError, np.histogramdd, x, bins=[1, 1, 1, [1, 2, 2, 3]]) - assert_raises( ValueError, np.histogramdd, x, bins=[1, 1, 1, [1, 2, 3, -3]]) assert_(np.histogramdd(x, bins=[1, 1, 1, [1, 2, 3, 4]])) @@ -646,7 +644,7 @@ class TestHistogramdd(object): bins = [[0., 0.5, 1.0]] hist, _ = histogramdd(x, bins=bins) assert_(hist[0] == 0.0) - assert_(hist[1] == 1.) + assert_(hist[1] == 0.0) x = [1.0001] bins = [[0., 0.5, 1.0]] hist, _ = histogramdd(x, bins=bins) @@ -660,3 +658,40 @@ class TestHistogramdd(object): range=[[0.0, 1.0], [0.25, 0.75], [0.25, np.inf]]) assert_raises(ValueError, histogramdd, vals, range=[[0.0, 1.0], [np.nan, 0.75], [0.25, 0.5]]) + + def test_equal_edges(self): + """ Test that adjacent entries in an edge array can be equal """ + x = np.array([0, 1, 2]) + y = np.array([0, 1, 2]) + x_edges = np.array([0, 2, 2]) + y_edges = 1 + hist, edges = histogramdd((x, y), bins=(x_edges, y_edges)) + + hist_expected = np.array([ + [2.], + [1.], # x == 2 falls in the final bin + ]) + assert_equal(hist, hist_expected) + + def test_edge_dtype(self): + """ Test that if an edge array is input, its type is preserved """ + x = np.array([0, 10, 20]) + y = x / 10 + x_edges = np.array([0, 5, 15, 20]) + y_edges = x_edges / 10 + hist, edges = histogramdd((x, y), bins=(x_edges, y_edges)) + + assert_equal(edges[0].dtype, x_edges.dtype) + assert_equal(edges[1].dtype, y_edges.dtype) + + def test_large_integers(self): + big = 2**60 # Too large to represent with a full precision float + + x = np.array([0], np.int64) + x_edges = np.array([-1, +1], np.int64) + y = big + x + y_edges = big + x_edges + + hist, edges = histogramdd((x, y), bins=(x_edges, y_edges)) + + assert_equal(hist[0, 0], 1) diff --git a/numpy/lib/tests/test_index_tricks.py b/numpy/lib/tests/test_index_tricks.py index f934e952a..315251daa 100644 --- a/numpy/lib/tests/test_index_tricks.py +++ b/numpy/lib/tests/test_index_tricks.py @@ -6,7 +6,7 @@ from numpy.testing import ( assert_array_almost_equal, assert_raises, assert_raises_regex ) from numpy.lib.index_tricks import ( - mgrid, ndenumerate, fill_diagonal, diag_indices, diag_indices_from, + mgrid, ogrid, ndenumerate, fill_diagonal, diag_indices, diag_indices_from, index_exp, ndindex, r_, s_, ix_ ) @@ -156,6 +156,15 @@ class TestGrid(object): assert_array_almost_equal(d[1, :, 1] - d[1, :, 0], 0.2*np.ones(20, 'd'), 11) + def test_sparse(self): + grid_full = mgrid[-1:1:10j, -2:2:10j] + grid_sparse = ogrid[-1:1:10j, -2:2:10j] + + # sparse grids can be made dense by broadcasting + grid_broadcast = np.broadcast_arrays(*grid_sparse) + for f, b in zip(grid_full, grid_broadcast): + assert_equal(f, b) + class TestConcatenator(object): def test_1d(self): @@ -184,37 +193,6 @@ class TestConcatenator(object): assert_array_equal(d[:5, :], b) assert_array_equal(d[5:, :], c) - def test_matrix(self): - a = [1, 2] - b = [3, 4] - - ab_r = np.r_['r', a, b] - ab_c = np.r_['c', a, b] - - assert_equal(type(ab_r), np.matrix) - assert_equal(type(ab_c), np.matrix) - - assert_equal(np.array(ab_r), [[1,2,3,4]]) - assert_equal(np.array(ab_c), [[1],[2],[3],[4]]) - - assert_raises(ValueError, lambda: np.r_['rc', a, b]) - - def test_matrix_scalar(self): - r = np.r_['r', [1, 2], 3] - assert_equal(type(r), np.matrix) - assert_equal(np.array(r), [[1,2,3]]) - - def test_matrix_builder(self): - a = np.array([1]) - b = np.array([2]) - c = np.array([3]) - d = np.array([4]) - actual = np.r_['a, b; c, d'] - expected = np.bmat([[a, b], [c, d]]) - - assert_equal(actual, expected) - assert_equal(type(actual), type(expected)) - def test_0d(self): assert_equal(r_[0, np.array(1), 2], [0, 1, 2]) assert_equal(r_[[0, 1, 2], np.array(3)], [0, 1, 2, 3]) diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py index 0ce44f28b..f58c9e33d 100644 --- a/numpy/lib/tests/test_io.py +++ b/numpy/lib/tests/test_io.py @@ -937,7 +937,7 @@ class TestLoadTxt(LoadTxtBase): assert_equal(res, tgt) def test_complex_misformatted(self): - # test for backward compatability + # test for backward compatibility # some complex formats used to generate x+-yj a = np.zeros((2, 2), dtype=np.complex128) re = np.pi diff --git a/numpy/lib/tests/test_nanfunctions.py b/numpy/lib/tests/test_nanfunctions.py index 1f403f7b8..504372faf 100644 --- a/numpy/lib/tests/test_nanfunctions.py +++ b/numpy/lib/tests/test_nanfunctions.py @@ -113,42 +113,46 @@ class TestNanFunctions_MinMax(object): for f in self.nanfuncs: assert_(f(0.) == 0.) - def test_matrices(self): + def test_subclass(self): + class MyNDArray(np.ndarray): + pass + # Check that it works and that type and # shape are preserved - mat = np.matrix(np.eye(3)) + mine = np.eye(3).view(MyNDArray) for f in self.nanfuncs: - res = f(mat, axis=0) - assert_(isinstance(res, np.matrix)) - assert_(res.shape == (1, 3)) - res = f(mat, axis=1) - assert_(isinstance(res, np.matrix)) - assert_(res.shape == (3, 1)) - res = f(mat) - assert_(np.isscalar(res)) + res = f(mine, axis=0) + assert_(isinstance(res, MyNDArray)) + assert_(res.shape == (3,)) + res = f(mine, axis=1) + assert_(isinstance(res, MyNDArray)) + assert_(res.shape == (3,)) + res = f(mine) + assert_(res.shape == ()) + # check that rows of nan are dealt with for subclasses (#4628) - mat[1] = np.nan + mine[1] = np.nan for f in self.nanfuncs: with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') - res = f(mat, axis=0) - assert_(isinstance(res, np.matrix)) + res = f(mine, axis=0) + assert_(isinstance(res, MyNDArray)) assert_(not np.any(np.isnan(res))) assert_(len(w) == 0) with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') - res = f(mat, axis=1) - assert_(isinstance(res, np.matrix)) - assert_(np.isnan(res[1, 0]) and not np.isnan(res[0, 0]) - and not np.isnan(res[2, 0])) + res = f(mine, axis=1) + assert_(isinstance(res, MyNDArray)) + assert_(np.isnan(res[1]) and not np.isnan(res[0]) + and not np.isnan(res[2])) assert_(len(w) == 1, 'no warning raised') assert_(issubclass(w[0].category, RuntimeWarning)) with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') - res = f(mat) - assert_(np.isscalar(res)) + res = f(mine) + assert_(res.shape == ()) assert_(res != np.nan) assert_(len(w) == 0) @@ -209,19 +213,22 @@ class TestNanFunctions_ArgminArgmax(object): for f in self.nanfuncs: assert_(f(0.) == 0.) - def test_matrices(self): + def test_subclass(self): + class MyNDArray(np.ndarray): + pass + # Check that it works and that type and # shape are preserved - mat = np.matrix(np.eye(3)) + mine = np.eye(3).view(MyNDArray) for f in self.nanfuncs: - res = f(mat, axis=0) - assert_(isinstance(res, np.matrix)) - assert_(res.shape == (1, 3)) - res = f(mat, axis=1) - assert_(isinstance(res, np.matrix)) - assert_(res.shape == (3, 1)) - res = f(mat) - assert_(np.isscalar(res)) + res = f(mine, axis=0) + assert_(isinstance(res, MyNDArray)) + assert_(res.shape == (3,)) + res = f(mine, axis=1) + assert_(isinstance(res, MyNDArray)) + assert_(res.shape == (3,)) + res = f(mine) + assert_(res.shape == ()) class TestNanFunctions_IntTypes(object): @@ -381,19 +388,27 @@ class SharedNanFunctionsTestsMixin(object): for f in self.nanfuncs: assert_(f(0.) == 0.) - def test_matrices(self): + def test_subclass(self): + class MyNDArray(np.ndarray): + pass + # Check that it works and that type and # shape are preserved - mat = np.matrix(np.eye(3)) + array = np.eye(3) + mine = array.view(MyNDArray) for f in self.nanfuncs: - res = f(mat, axis=0) - assert_(isinstance(res, np.matrix)) - assert_(res.shape == (1, 3)) - res = f(mat, axis=1) - assert_(isinstance(res, np.matrix)) - assert_(res.shape == (3, 1)) - res = f(mat) - assert_(np.isscalar(res)) + expected_shape = f(array, axis=0).shape + res = f(mine, axis=0) + assert_(isinstance(res, MyNDArray)) + assert_(res.shape == expected_shape) + expected_shape = f(array, axis=1).shape + res = f(mine, axis=1) + assert_(isinstance(res, MyNDArray)) + assert_(res.shape == expected_shape) + expected_shape = f(array).shape + res = f(mine) + assert_(isinstance(res, MyNDArray)) + assert_(res.shape == expected_shape) class TestNanFunctions_SumProd(SharedNanFunctionsTestsMixin): @@ -481,18 +496,6 @@ class TestNanFunctions_CumSumProd(SharedNanFunctionsTestsMixin): res = f(d, axis=axis) assert_equal(res.shape, (3, 5, 7, 11)) - def test_matrices(self): - # Check that it works and that type and - # shape are preserved - mat = np.matrix(np.eye(3)) - for f in self.nanfuncs: - for axis in np.arange(2): - res = f(mat, axis=axis) - assert_(isinstance(res, np.matrix)) - assert_(res.shape == (3, 3)) - res = f(mat) - assert_(res.shape == (1, 3*3)) - def test_result_values(self): for axis in (-2, -1, 0, 1, None): tgt = np.cumprod(_ndat_ones, axis=axis) @@ -886,3 +889,39 @@ class TestNanFunctions_Percentile(object): megamat = np.ones((3, 4, 5, 6)) assert_equal(np.nanpercentile(megamat, perc, axis=(1, 2)).shape, (2, 3, 6)) + + +class TestNanFunctions_Quantile(object): + # most of this is already tested by TestPercentile + + def test_regression(self): + ar = np.arange(24).reshape(2, 3, 4).astype(float) + ar[0][1] = np.nan + + assert_equal(np.nanquantile(ar, q=0.5), np.nanpercentile(ar, q=50)) + assert_equal(np.nanquantile(ar, q=0.5, axis=0), + np.nanpercentile(ar, q=50, axis=0)) + assert_equal(np.nanquantile(ar, q=0.5, axis=1), + np.nanpercentile(ar, q=50, axis=1)) + assert_equal(np.nanquantile(ar, q=[0.5], axis=1), + np.nanpercentile(ar, q=[50], axis=1)) + assert_equal(np.nanquantile(ar, q=[0.25, 0.5, 0.75], axis=1), + np.nanpercentile(ar, q=[25, 50, 75], axis=1)) + + def test_basic(self): + x = np.arange(8) * 0.5 + assert_equal(np.nanquantile(x, 0), 0.) + assert_equal(np.nanquantile(x, 1), 3.5) + assert_equal(np.nanquantile(x, 0.5), 1.75) + + def test_no_p_overwrite(self): + # this is worth retesting, because quantile does not make a copy + p0 = np.array([0, 0.75, 0.25, 0.5, 1.0]) + p = p0.copy() + np.nanquantile(np.arange(100.), p, interpolation="midpoint") + assert_array_equal(p, p0) + + p0 = p0.tolist() + p = p.tolist() + np.nanquantile(np.arange(100.), p, interpolation="midpoint") + assert_array_equal(p, p0) diff --git a/numpy/lib/tests/test_shape_base.py b/numpy/lib/tests/test_shape_base.py index 080fd066d..c95894f94 100644 --- a/numpy/lib/tests/test_shape_base.py +++ b/numpy/lib/tests/test_shape_base.py @@ -2,16 +2,106 @@ from __future__ import division, absolute_import, print_function import numpy as np import warnings +import functools from numpy.lib.shape_base import ( apply_along_axis, apply_over_axes, array_split, split, hsplit, dsplit, - vsplit, dstack, column_stack, kron, tile, expand_dims, + vsplit, dstack, column_stack, kron, tile, expand_dims, take_along_axis, + put_along_axis ) from numpy.testing import ( assert_, assert_equal, assert_array_equal, assert_raises, assert_warns ) +def _add_keepdims(func): + """ hack in keepdims behavior into a function taking an axis """ + @functools.wraps(func) + def wrapped(a, axis, **kwargs): + res = func(a, axis=axis, **kwargs) + if axis is None: + axis = 0 # res is now a scalar, so we can insert this anywhere + return np.expand_dims(res, axis=axis) + return wrapped + + +class TestTakeAlongAxis(object): + def test_argequivalent(self): + """ Test it translates from arg<func> to <func> """ + from numpy.random import rand + a = rand(3, 4, 5) + + funcs = [ + (np.sort, np.argsort, dict()), + (_add_keepdims(np.min), _add_keepdims(np.argmin), dict()), + (_add_keepdims(np.max), _add_keepdims(np.argmax), dict()), + (np.partition, np.argpartition, dict(kth=2)), + ] + + for func, argfunc, kwargs in funcs: + for axis in list(range(a.ndim)) + [None]: + a_func = func(a, axis=axis, **kwargs) + ai_func = argfunc(a, axis=axis, **kwargs) + assert_equal(a_func, take_along_axis(a, ai_func, axis=axis)) + + def test_invalid(self): + """ Test it errors when indices has too few dimensions """ + a = np.ones((10, 10)) + ai = np.ones((10, 2), dtype=np.intp) + + # sanity check + take_along_axis(a, ai, axis=1) + + # not enough indices + assert_raises(ValueError, take_along_axis, a, np.array(1), axis=1) + # bool arrays not allowed + assert_raises(IndexError, take_along_axis, a, ai.astype(bool), axis=1) + # float arrays not allowed + assert_raises(IndexError, take_along_axis, a, ai.astype(float), axis=1) + # invalid axis + assert_raises(np.AxisError, take_along_axis, a, ai, axis=10) + + def test_empty(self): + """ Test everything is ok with empty results, even with inserted dims """ + a = np.ones((3, 4, 5)) + ai = np.ones((3, 0, 5), dtype=np.intp) + + actual = take_along_axis(a, ai, axis=1) + assert_equal(actual.shape, ai.shape) + + def test_broadcast(self): + """ Test that non-indexing dimensions are broadcast in both directions """ + a = np.ones((3, 4, 1)) + ai = np.ones((1, 2, 5), dtype=np.intp) + actual = take_along_axis(a, ai, axis=1) + assert_equal(actual.shape, (3, 2, 5)) + + +class TestPutAlongAxis(object): + def test_replace_max(self): + a_base = np.array([[10, 30, 20], [60, 40, 50]]) + + for axis in list(range(a_base.ndim)) + [None]: + # we mutate this in the loop + a = a_base.copy() + + # replace the max with a small value + i_max = _add_keepdims(np.argmax)(a, axis=axis) + put_along_axis(a, i_max, -99, axis=axis) + + # find the new minimum, which should max + i_min = _add_keepdims(np.argmin)(a, axis=axis) + + assert_equal(i_min, i_max) + + def test_broadcast(self): + """ Test that non-indexing dimensions are broadcast in both directions """ + a = np.ones((3, 4, 1)) + ai = np.arange(10, dtype=np.intp).reshape((1, 2, 5)) % 4 + put_along_axis(a, ai, 20, axis=1) + assert_equal(take_along_axis(a, ai, axis=1), 20) + + class TestApplyAlongAxis(object): def test_simple(self): a = np.ones((20, 10), 'd') @@ -29,19 +119,21 @@ class TestApplyAlongAxis(object): [[27, 30, 33], [36, 39, 42], [45, 48, 51]]) def test_preserve_subclass(self): - # this test is particularly malicious because matrix - # refuses to become 1d def double(row): return row * 2 - m = np.matrix([[0, 1], [2, 3]]) - expected = np.matrix([[0, 2], [4, 6]]) + + class MyNDArray(np.ndarray): + pass + + m = np.array([[0, 1], [2, 3]]).view(MyNDArray) + expected = np.array([[0, 2], [4, 6]]).view(MyNDArray) result = apply_along_axis(double, 0, m) - assert_(isinstance(result, np.matrix)) + assert_(isinstance(result, MyNDArray)) assert_array_equal(result, expected) result = apply_along_axis(double, 1, m) - assert_(isinstance(result, np.matrix)) + assert_(isinstance(result, MyNDArray)) assert_array_equal(result, expected) def test_subclass(self): @@ -79,7 +171,7 @@ class TestApplyAlongAxis(object): def test_axis_insertion(self, cls=np.ndarray): def f1to2(x): - """produces an assymmetric non-square matrix from x""" + """produces an asymmetric non-square matrix from x""" assert_equal(x.ndim, 1) return (x[::-1] * x[1:,None]).view(cls) @@ -123,7 +215,7 @@ class TestApplyAlongAxis(object): def test_axis_insertion_ma(self): def f1to2(x): - """produces an assymmetric non-square matrix from x""" + """produces an asymmetric non-square matrix from x""" assert_equal(x.ndim, 1) res = x[::-1] * x[1:,None] return np.ma.masked_where(res%5==0, res) @@ -492,16 +584,10 @@ class TestSqueeze(object): class TestKron(object): def test_return_type(self): - a = np.ones([2, 2]) - m = np.asmatrix(a) - assert_equal(type(kron(a, a)), np.ndarray) - assert_equal(type(kron(m, m)), np.matrix) - assert_equal(type(kron(a, m)), np.matrix) - assert_equal(type(kron(m, a)), np.matrix) - class myarray(np.ndarray): __array_priority__ = 0.0 + a = np.ones([2, 2]) ma = myarray(a.shape, a.dtype, a.data) assert_equal(type(kron(a, a)), np.ndarray) assert_equal(type(kron(ma, ma)), myarray) diff --git a/numpy/lib/twodim_base.py b/numpy/lib/twodim_base.py index 402c18850..cca316e9a 100644 --- a/numpy/lib/twodim_base.py +++ b/numpy/lib/twodim_base.py @@ -650,7 +650,7 @@ def histogram2d(x, y, bins=10, range=None, normed=False, weights=None): N = 1 if N != 1 and N != 2: - xedges = yedges = asarray(bins, float) + xedges = yedges = asarray(bins) bins = [xedges, yedges] hist, edges = histogramdd([x, y], bins, range, normed, weights) return hist, edges[0], edges[1] |