diff options
author | Sebastian Berg <sebastian@sipsolutions.net> | 2012-10-11 18:07:03 +0200 |
---|---|---|
committer | Sebastian Berg <sebastian@sipsolutions.net> | 2013-04-11 18:52:03 +0200 |
commit | 908e06c3c465434023649b0ca522836580c5cfdc (patch) | |
tree | 98ff9a320863f4fd7d861c2c217ecd0c314ef37f /numpy/lib/function_base.py | |
parent | 79126f1c6084c56348b71e1b91fb4b6bc9de86b2 (diff) | |
download | numpy-908e06c3c465434023649b0ca522836580c5cfdc.tar.gz |
ENH: larger fixes for np.delete and np.insert functions
There were several smaller to larger problems for these two functions,
that this addresses:
* delete did not handle out of bound values graciously (ignoring negative
ones)
* both were unnecessarily slow due to use of sets
* insert did not handle unsorted indices correctly
Further changes:
* Add FutureWarning for boolean obj, so it can be handled similar to a
boolean mask with indexing.
* Add FutureWarning to remove inconsistent special cases for 0-d arrays
(neither insertion nor deletion along an axis make sense for a scalar)
* Allow insertion of an array with more then one element along axis when
obj is a sequence with a single item. (i.e. array([1])).
* Reintroduce speed optimization for scalar in insert that existed in 1.6.
Diffstat (limited to 'numpy/lib/function_base.py')
-rw-r--r-- | numpy/lib/function_base.py | 200 |
1 files changed, 147 insertions, 53 deletions
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py index 1e5a822a7..04a39af44 100644 --- a/numpy/lib/function_base.py +++ b/numpy/lib/function_base.py @@ -28,7 +28,6 @@ from numpy.core import atleast_1d, atleast_2d from numpy.lib.twodim_base import diag from ._compiled_base import _insert, add_docstring from ._compiled_base import digitize, bincount, interp as compiled_interp -from .arraysetops import setdiff1d from .utils import deprecate from ._compiled_base import add_newdoc_ufunc import numpy as np @@ -3386,7 +3385,8 @@ def meshgrid(*xi, **kwargs): def delete(arr, obj, axis=None): """ - Return a new array with sub-arrays along an axis deleted. + Return a new array with sub-arrays along an axis deleted. For a one + dimensional array, this returns those entries not returned by `arr[obj,]`. Parameters ---------- @@ -3410,6 +3410,15 @@ def delete(arr, obj, axis=None): insert : Insert elements into an array. append : Append elements at the end of an array. + Notes + ----- + Often it is preferable to use a boolean mask. For example: + >>> mask = np.ones(len(arr), dtype=bool) + >>> mask[[0,2,4]] = False + >>> result = arr[mask,...] + Is equivalent to `np.delete(arr, [0,2,4], axis=0)`, but allows further + use of `mask`. + Examples -------- >>> arr = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]]) @@ -3436,7 +3445,6 @@ def delete(arr, obj, axis=None): except AttributeError: pass - arr = asarray(arr) ndim = arr.ndim if axis is None: @@ -3445,34 +3453,35 @@ def delete(arr, obj, axis=None): ndim = arr.ndim; axis = ndim-1; if ndim == 0: + warnings.warn("in the future the special handleing of scalars " + "will be removed from delete and raise an error", + FutureWarning) if wrap: return wrap(arr) else: return arr.copy() + slobj = [slice(None)]*ndim N = arr.shape[axis] newshape = list(arr.shape) - if isinstance(obj, (int, long, integer)): - if (obj < 0): obj += N - if (obj < 0 or obj >=N): - raise ValueError( - "invalid entry") - newshape[axis]-=1; - new = empty(newshape, arr.dtype, arr.flags.fnc) - slobj[axis] = slice(None, obj) - new[slobj] = arr[slobj] - slobj[axis] = slice(obj,None) - slobj2 = [slice(None)]*ndim - slobj2[axis] = slice(obj+1,None) - new[slobj] = arr[slobj2] - elif isinstance(obj, slice): + + if isinstance(obj, slice): start, stop, step = obj.indices(N) - numtodel = len(list(range(start, stop, step))) + xr = xrange(start, stop, step) + numtodel = len(xr) + if numtodel <= 0: if wrap: return wrap(new) else: return arr.copy() + + # Invert if step is negative: + if step < 0: + step = -step + start = xr[-1] + stop = xr[0] + 1 + newshape[axis] -= numtodel new = empty(newshape, arr.dtype, arr.flags.fnc) # copy initial chunk @@ -3493,24 +3502,52 @@ def delete(arr, obj, axis=None): if step == 1: pass else: # use array indexing. - obj = arange(start, stop, step, dtype=intp) - all = arange(start, stop, dtype=intp) - obj = setdiff1d(all, obj) + keep = ones(stop-start, dtype=bool) + keep[:stop-start:step] = False slobj[axis] = slice(start, stop-numtodel) slobj2 = [slice(None)]*ndim - slobj2[axis] = obj + slobj2[axis] = slice(start, stop) + arr = arr[slobj2] + slobj2[axis] = keep new[slobj] = arr[slobj2] - else: # default behavior - obj = array(obj, dtype=intp, copy=0, ndmin=1) - all = arange(N, dtype=intp) - obj = setdiff1d(all, obj) - slobj[axis] = obj + if wrap: + return wrap(new) + else: + return new + + obj = np.asarray(obj) + if obj.dtype == bool: + warnings.warn("in the future insert will treat boolean arrays " + "and array-likes as boolean index instead " + "of casting it to integer", FutureWarning) + obj = obj.astype(np.intp) + # After deprecation, obj can be used directly for keep below. + if obj.size == 1: + obj = obj.item() + if (obj < -N or obj >=N): + raise IndexError("index %i is out of bounds for axis " + "%i with size %i" % (obj, axis, N)) + if (obj < 0): obj += N + newshape[axis]-=1; + new = empty(newshape, arr.dtype, arr.flags.fnc) + slobj[axis] = slice(None, obj) + new[slobj] = arr[slobj] + slobj[axis] = slice(obj,None) + slobj2 = [slice(None)]*ndim + slobj2[axis] = slice(obj+1,None) + new[slobj] = arr[slobj2] + else: + keep = ones(N, dtype=bool) + keep[obj] = False + slobj[axis] = keep new = arr[slobj] + if wrap: return wrap(new) else: return new + def insert(arr, obj, values, axis=None): """ Insert values along the given axis before the given indices. @@ -3522,9 +3559,16 @@ def insert(arr, obj, values, axis=None): obj : int, slice or sequence of ints Object that defines the index or indices before which `values` is inserted. + + .. versionadded:: 1.8.0 + + Support for multiple insertions when `obj` is a single scalar or a + sequence with one element (similar to calling insert multiple times). values : array_like Values to insert into `arr`. If the type of `values` is different - from that of `arr`, `values` is converted to the type of `arr`. + from that of `arr`, `values` is converted to the type of `arr`. + `values` should be shaped so that ``arr[...,obj,...] = values`` + is legal. axis : int, optional Axis along which to insert `values`. If `axis` is None then `arr` is flattened first. @@ -3539,8 +3583,15 @@ def insert(arr, obj, values, axis=None): See Also -------- append : Append elements at the end of an array. + concatenate : Join a sequence of arrays together. delete : Delete elements from an array. + Notes + ----- + Note that for higher dimensional inserts `obj=0` behaves very different + from `obj=[0]` just like `arr[:,0,:] = values` is different from + `arr[:,[0],:] = values`. + Examples -------- >>> a = np.array([[1, 1], [2, 2], [3, 3]]) @@ -3555,6 +3606,15 @@ def insert(arr, obj, values, axis=None): [2, 5, 2], [3, 5, 3]]) + Difference between sequence and scalars: + >>> np.insert(a, [1], [[1],[2],[3]], axis=1) + array([[1, 1, 1], + [2, 2, 2], + [3, 3, 3]]) + >>> np.array_equal(np.insert(a, 1, [1, 2, 3], axis=1), + ... np.insert(a, [1], [[1],[2],[3]], axis=1)) + True + >>> b = a.flatten() >>> b array([1, 1, 2, 2, 3, 3]) @@ -3589,6 +3649,9 @@ def insert(arr, obj, values, axis=None): ndim = arr.ndim axis = ndim-1 if (ndim == 0): + warnings.warn("in the future the special handleing of scalars " + "will be removed from insert and raise an error", + FutureWarning) arr = arr.copy() arr[...] = values if wrap: @@ -3599,38 +3662,69 @@ def insert(arr, obj, values, axis=None): N = arr.shape[axis] newshape = list(arr.shape) - if isinstance(obj, (int, long, integer)): - if (obj < 0): obj += N - if obj < 0 or obj > N: - raise ValueError( - "index (%d) out of range (0<=index<=%d) "\ - "in dimension %d" % (obj, N, axis)) - if isscalar(values): - obj = [obj] + if isinstance(obj, slice): + # turn it into a range object + indices = arange(*obj.indices(N),**{'dtype':intp}) + else: + obj = np.asarray(obj) + if obj.dtype == bool: + warnings.warn("in the future insert will treat boolean arrays " + "and array-likes as boolean index instead " + "of casting it to integer", FutureWarning) + indices = obj.astype(np.intp) + # Code after warning period: + #if obj.ndim != 1: + # raise ValueError('insertion boolean array must ' + # 'be one dimensional') + #indices = np.flatnonzero(obj) else: - values = asarray(values) - if ndim > values.ndim: - obj = [obj] - else: - obj = [obj] * len(values) + indices = array(obj, dtype=intp) + if indices.ndim > 1: + raise ValueError("`obj` index array to insert must be " + "one dimensional or scalar") + + if indices.size == 1: + index = indices.item() + if index < -N or index > N: + raise IndexError("index %i is out of bounds for axis " + "%i with size %i" % (obj, axis, N)) + if (index < 0): index += N + + values = array(values, copy=False, ndmin=arr.ndim) + if indices.ndim == 0: + # broadcasting is very different here, since a[:,0,:] = ... behaves + # very different from a[:,[0],:] = ...! This changes values so that + # it works likes the second case. (here a[:,0:1,:]) + values = np.rollaxis(values, 0, axis+1) + numnew = values.shape[axis] + newshape[axis] += numnew + new = empty(newshape, arr.dtype, arr.flags.fnc) + slobj[axis] = slice(None, index) + new[slobj] = arr[slobj] + slobj[axis] = slice(index, index+numnew) + new[slobj] = values + slobj[axis] = slice(index+numnew, None) + slobj2 = [slice(None)] * ndim + slobj2[axis] = slice(index, None) + new[slobj] = arr[slobj2] + if wrap: + return wrap(new) + return new - elif isinstance(obj, slice): - # turn it into a range object - obj = arange(*obj.indices(N),**{'dtype':intp}) + indices[indices < 0] += N - # get two sets of indices - # one is the indices which will hold the new stuff - # two is the indices where arr will be copied over + numnew = len(indices) + order = indices.argsort(kind='mergesort') # stable sort + indices[order] += np.arange(numnew) - obj = asarray(obj, dtype=intp) - numnew = len(obj) - index1 = obj + arange(numnew) - index2 = setdiff1d(arange(numnew+N),index1) newshape[axis] += numnew + old_mask = ones(newshape[axis], dtype=bool) + old_mask[indices] = False + new = empty(newshape, arr.dtype, arr.flags.fnc) slobj2 = [slice(None)]*ndim - slobj[axis] = index1 - slobj2[axis] = index2 + slobj[axis] = indices + slobj2[axis] = old_mask new[slobj] = values new[slobj2] = arr |