summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/release/1.15.0-notes.rst17
-rw-r--r--doc/source/reference/routines.indexing.rst2
-rw-r--r--numpy/core/fromnumeric.py8
-rw-r--r--numpy/lib/shape_base.py227
-rw-r--r--numpy/lib/tests/test_shape_base.py92
5 files changed, 343 insertions, 3 deletions
diff --git a/doc/release/1.15.0-notes.rst b/doc/release/1.15.0-notes.rst
index 82e50edac..a7219e959 100644
--- a/doc/release/1.15.0-notes.rst
+++ b/doc/release/1.15.0-notes.rst
@@ -351,5 +351,22 @@ inner-product example, ``keepdims=True, axes=[-2, -2, -2]`` would act on the
one-but-last dimension of the input arguments, and leave a size 1 dimension in
that place in the output.
+New ``np.take_along_axis`` and ``np.put_along_axis`` functions
+--------------------------------------------------------------
+When used on multidimensional arrays, ``argsort``, ``argmin``, ``argmax``, and
+``argpartition`` return arrays that are difficult to use as indices.
+``take_along_axis`` provides an easy way to use these indices to lookup values
+within an array, so that::
+
+ np.take_along_axis(a, np.argsort(a, axis=axis), axis=axis)
+
+is the same as::
+
+ np.sort(a, axis=axis)
+
+``np.put_along_axis`` acts as the dual operation for writing to these indices
+within an array.
+
+
Changes
=======
diff --git a/doc/source/reference/routines.indexing.rst b/doc/source/reference/routines.indexing.rst
index 4d2458d2f..aeec1a1bb 100644
--- a/doc/source/reference/routines.indexing.rst
+++ b/doc/source/reference/routines.indexing.rst
@@ -36,6 +36,7 @@ Indexing-like operations
:toctree: generated/
take
+ take_along_axis
choose
compress
diag
@@ -50,6 +51,7 @@ Inserting data into arrays
place
put
+ put_along_axis
putmask
fill_diagonal
diff --git a/numpy/core/fromnumeric.py b/numpy/core/fromnumeric.py
index 0db5663f9..d1aae0aa0 100644
--- a/numpy/core/fromnumeric.py
+++ b/numpy/core/fromnumeric.py
@@ -140,6 +140,7 @@ def take(a, indices, axis=None, out=None, mode='raise'):
--------
compress : Take elements using a boolean mask
ndarray.take : equivalent method
+ take_along_axis : Take elements by matching the array and the index arrays
Notes
-----
@@ -478,6 +479,7 @@ def put(a, ind, v, mode='raise'):
See Also
--------
putmask, place
+ put_along_axis : Put elements by matching the array and the index arrays
Examples
--------
@@ -723,7 +725,9 @@ def argpartition(a, kth, axis=-1, kind='introselect', order=None):
-------
index_array : ndarray, int
Array of indices that partition `a` along the specified axis.
- In other words, ``a[index_array]`` yields a partitioned `a`.
+ If `a` is one-dimensional, ``a[index_array]`` yields a partitioned `a`.
+ More generally, ``np.take_along_axis(a, index_array, axis=a)`` always
+ yields the partitioned `a`, irrespective of dimensionality.
See Also
--------
@@ -904,6 +908,8 @@ def argsort(a, axis=-1, kind='quicksort', order=None):
index_array : ndarray, int
Array of indices that sort `a` along the specified axis.
If `a` is one-dimensional, ``a[index_array]`` yields a sorted `a`.
+ More generally, ``np.take_along_axis(a, index_array, axis=a)`` always
+ yields the sorted `a`, irrespective of dimensionality.
See Also
--------
diff --git a/numpy/lib/shape_base.py b/numpy/lib/shape_base.py
index 41ef28ef3..65104115a 100644
--- a/numpy/lib/shape_base.py
+++ b/numpy/lib/shape_base.py
@@ -16,10 +16,235 @@ from numpy.matrixlib.defmatrix import matrix # this raises all the right alarm
__all__ = [
'column_stack', 'row_stack', 'dstack', 'array_split', 'split',
'hsplit', 'vsplit', 'dsplit', 'apply_over_axes', 'expand_dims',
- 'apply_along_axis', 'kron', 'tile', 'get_array_wrap'
+ 'apply_along_axis', 'kron', 'tile', 'get_array_wrap', 'take_along_axis',
+ 'put_along_axis'
]
+def _make_along_axis_idx(arr_shape, indices, axis):
+ # compute dimensions to iterate over
+ if not _nx.issubdtype(indices.dtype, _nx.integer):
+ raise IndexError('`indices` must be an integer array')
+ if len(arr_shape) != indices.ndim:
+ raise ValueError(
+ "`indices` and `arr` must have the same number of dimensions")
+ shape_ones = (1,) * indices.ndim
+ dest_dims = list(range(axis)) + [None] + list(range(axis+1, indices.ndim))
+
+ # build a fancy index, consisting of orthogonal aranges, with the
+ # requested index inserted at the right location
+ fancy_index = []
+ for dim, n in zip(dest_dims, arr_shape):
+ if dim is None:
+ fancy_index.append(indices)
+ else:
+ ind_shape = shape_ones[:dim] + (-1,) + shape_ones[dim+1:]
+ fancy_index.append(_nx.arange(n).reshape(ind_shape))
+
+ return tuple(fancy_index)
+
+
+def take_along_axis(arr, indices, axis):
+ """
+ Take values from the input array by matching 1d index and data slices.
+
+ This iterates over matching 1d slices oriented along the specified axis in
+ the index and data arrays, and uses the former to look up values in the
+ latter. These slices can be different lengths.
+
+ Functions returning an index along an axis, like `argsort` and
+ `argpartition`, produce suitable indices for this function.
+
+ .. versionadded:: 1.15.0
+
+ Parameters
+ ----------
+ arr: ndarray (Ni..., M, Nk...)
+ Source array
+ indices: ndarray (Ni..., J, Nk...)
+ Indices to take along each 1d slice of `arr`. This must match the
+ dimension of arr, but dimensions Ni and Nj only need to broadcast
+ against `arr`.
+ axis: int
+ The axis to take 1d slices along. If axis is None, the input array is
+ treated as if it had first been flattened to 1d, for consistency with
+ `sort` and `argsort`.
+
+ Returns
+ -------
+ out: ndarray (Ni..., J, Nk...)
+ The indexed result.
+
+ Notes
+ -----
+ This is equivalent to (but faster than) the following use of `ndindex` and
+ `s_`, which sets each of ``ii`` and ``kk`` to a tuple of indices::
+
+ Ni, M, Nk = a.shape[:axis], a.shape[axis], a.shape[axis+1:]
+ J = indices.shape[axis] # Need not equal M
+ out = np.empty(Nk + (J,) + Nk)
+
+ for ii in ndindex(Ni):
+ for kk in ndindex(Nk):
+ a_1d = a [ii + s_[:,] + kk]
+ indices_1d = indices[ii + s_[:,] + kk]
+ out_1d = out [ii + s_[:,] + kk]
+ for j in range(J):
+ out_1d[j] = a_1d[indices_1d[j]]
+
+ Equivalently, eliminating the inner loop, the last two lines would be::
+
+ out_1d[:] = a_1d[indices_1d]
+
+ See Also
+ --------
+ take : Take along an axis, using the same indices for every 1d slice
+ put_along_axis :
+ Put values into the destination array by matching 1d index and data slices
+
+ Examples
+ --------
+
+ For this sample array
+
+ >>> a = np.array([[10, 30, 20], [60, 40, 50]])
+
+ We can sort either by using sort directly, or argsort and this function
+
+ >>> np.sort(a, axis=1)
+ array([[10, 20, 30],
+ [40, 50, 60]])
+ >>> ai = np.argsort(a, axis=1); ai
+ array([[0, 2, 1],
+ [1, 2, 0]], dtype=int64)
+ >>> np.take_along_axis(a, ai, axis=1)
+ array([[10, 20, 30],
+ [40, 50, 60]])
+
+ The same works for max and min, if you expand the dimensions:
+
+ >>> np.expand_dims(np.max(a, axis=1), axis=1)
+ array([[30],
+ [60]])
+ >>> ai = np.expand_dims(np.argmax(a, axis=1), axis=1)
+ >>> ai
+ array([[1],
+ [0], dtype=int64)
+ >>> np.take_along_axis(a, ai, axis=1)
+ array([[30],
+ [60]])
+
+ If we want to get the max and min at the same time, we can stack the
+ indices first
+
+ >>> ai_min = np.expand_dims(np.argmin(a, axis=1), axis=1)
+ >>> ai_max = np.expand_dims(np.argmax(a, axis=1), axis=1)
+ >>> ai = np.concatenate([ai_min, ai_max], axis=axis)
+ >> ai
+ array([[0, 1],
+ [1, 0]], dtype=int64)
+ >>> np.take_along_axis(a, ai, axis=1)
+ array([[10, 30],
+ [40, 60]])
+ """
+ # normalize inputs
+ if axis is None:
+ arr = arr.flat
+ arr_shape = (len(arr),) # flatiter has no .shape
+ axis = 0
+ else:
+ axis = normalize_axis_index(axis, arr.ndim)
+ arr_shape = arr.shape
+
+ # use the fancy index
+ return arr[_make_along_axis_idx(arr_shape, indices, axis)]
+
+
+def put_along_axis(arr, indices, values, axis):
+ """
+ Put values into the destination array by matching 1d index and data slices.
+
+ This iterates over matching 1d slices oriented along the specified axis in
+ the index and data arrays, and uses the former to place values into the
+ latter. These slices can be different lengths.
+
+ Functions returning an index along an axis, like `argsort` and
+ `argpartition`, produce suitable indices for this function.
+
+ .. versionadded:: 1.15.0
+
+ Parameters
+ ----------
+ arr: ndarray (Ni..., M, Nk...)
+ Destination array.
+ indices: ndarray (Ni..., J, Nk...)
+ Indices to change along each 1d slice of `arr`. This must match the
+ dimension of arr, but dimensions in Ni and Nj may be 1 to broadcast
+ against `arr`.
+ values: array_like (Ni..., J, Nk...)
+ values to insert at those indices. Its shape and dimension are
+ broadcast to match that of `indices`.
+ axis: int
+ The axis to take 1d slices along. If axis is None, the destination
+ array is treated as if a flattened 1d view had been created of it.
+
+ Notes
+ -----
+ This is equivalent to (but faster than) the following use of `ndindex` and
+ `s_`, which sets each of ``ii`` and ``kk`` to a tuple of indices::
+
+ Ni, M, Nk = a.shape[:axis], a.shape[axis], a.shape[axis+1:]
+ J = indices.shape[axis] # Need not equal M
+
+ for ii in ndindex(Ni):
+ for kk in ndindex(Nk):
+ a_1d = a [ii + s_[:,] + kk]
+ indices_1d = indices[ii + s_[:,] + kk]
+ values_1d = values [ii + s_[:,] + kk]
+ for j in range(J):
+ a_1d[indices_1d[j]] = values_1d[j]
+
+ Equivalently, eliminating the inner loop, the last two lines would be::
+
+ a_1d[indices_1d] = values_1d
+
+ See Also
+ --------
+ take_along_axis :
+ Take values from the input array by matching 1d index and data slices
+
+ Examples
+ --------
+
+ For this sample array
+
+ >>> a = np.array([[10, 30, 20], [60, 40, 50]])
+
+ We can replace the maximum values with:
+
+ >>> ai = np.expand_dims(np.argmax(a, axis=1), axis=1)
+ >>> ai
+ array([[1],
+ [0]], dtype=int64)
+ >>> np.put_along_axis(a, ai, 99, axis=1)
+ >>> a
+ array([[10, 99, 20],
+ [99, 40, 50]])
+
+ """
+ # normalize inputs
+ if axis is None:
+ arr = arr.flat
+ axis = 0
+ arr_shape = (len(arr),) # flatiter has no .shape
+ else:
+ axis = normalize_axis_index(axis, arr.ndim)
+ arr_shape = arr.shape
+
+ # use the fancy index
+ arr[_make_along_axis_idx(arr_shape, indices, axis)] = values
+
+
def apply_along_axis(func1d, axis, arr, *args, **kwargs):
"""
Apply a function to 1-D slices along the given axis.
diff --git a/numpy/lib/tests/test_shape_base.py b/numpy/lib/tests/test_shape_base.py
index a35d90b70..c95894f94 100644
--- a/numpy/lib/tests/test_shape_base.py
+++ b/numpy/lib/tests/test_shape_base.py
@@ -2,16 +2,106 @@ from __future__ import division, absolute_import, print_function
import numpy as np
import warnings
+import functools
from numpy.lib.shape_base import (
apply_along_axis, apply_over_axes, array_split, split, hsplit, dsplit,
- vsplit, dstack, column_stack, kron, tile, expand_dims,
+ vsplit, dstack, column_stack, kron, tile, expand_dims, take_along_axis,
+ put_along_axis
)
from numpy.testing import (
assert_, assert_equal, assert_array_equal, assert_raises, assert_warns
)
+def _add_keepdims(func):
+ """ hack in keepdims behavior into a function taking an axis """
+ @functools.wraps(func)
+ def wrapped(a, axis, **kwargs):
+ res = func(a, axis=axis, **kwargs)
+ if axis is None:
+ axis = 0 # res is now a scalar, so we can insert this anywhere
+ return np.expand_dims(res, axis=axis)
+ return wrapped
+
+
+class TestTakeAlongAxis(object):
+ def test_argequivalent(self):
+ """ Test it translates from arg<func> to <func> """
+ from numpy.random import rand
+ a = rand(3, 4, 5)
+
+ funcs = [
+ (np.sort, np.argsort, dict()),
+ (_add_keepdims(np.min), _add_keepdims(np.argmin), dict()),
+ (_add_keepdims(np.max), _add_keepdims(np.argmax), dict()),
+ (np.partition, np.argpartition, dict(kth=2)),
+ ]
+
+ for func, argfunc, kwargs in funcs:
+ for axis in list(range(a.ndim)) + [None]:
+ a_func = func(a, axis=axis, **kwargs)
+ ai_func = argfunc(a, axis=axis, **kwargs)
+ assert_equal(a_func, take_along_axis(a, ai_func, axis=axis))
+
+ def test_invalid(self):
+ """ Test it errors when indices has too few dimensions """
+ a = np.ones((10, 10))
+ ai = np.ones((10, 2), dtype=np.intp)
+
+ # sanity check
+ take_along_axis(a, ai, axis=1)
+
+ # not enough indices
+ assert_raises(ValueError, take_along_axis, a, np.array(1), axis=1)
+ # bool arrays not allowed
+ assert_raises(IndexError, take_along_axis, a, ai.astype(bool), axis=1)
+ # float arrays not allowed
+ assert_raises(IndexError, take_along_axis, a, ai.astype(float), axis=1)
+ # invalid axis
+ assert_raises(np.AxisError, take_along_axis, a, ai, axis=10)
+
+ def test_empty(self):
+ """ Test everything is ok with empty results, even with inserted dims """
+ a = np.ones((3, 4, 5))
+ ai = np.ones((3, 0, 5), dtype=np.intp)
+
+ actual = take_along_axis(a, ai, axis=1)
+ assert_equal(actual.shape, ai.shape)
+
+ def test_broadcast(self):
+ """ Test that non-indexing dimensions are broadcast in both directions """
+ a = np.ones((3, 4, 1))
+ ai = np.ones((1, 2, 5), dtype=np.intp)
+ actual = take_along_axis(a, ai, axis=1)
+ assert_equal(actual.shape, (3, 2, 5))
+
+
+class TestPutAlongAxis(object):
+ def test_replace_max(self):
+ a_base = np.array([[10, 30, 20], [60, 40, 50]])
+
+ for axis in list(range(a_base.ndim)) + [None]:
+ # we mutate this in the loop
+ a = a_base.copy()
+
+ # replace the max with a small value
+ i_max = _add_keepdims(np.argmax)(a, axis=axis)
+ put_along_axis(a, i_max, -99, axis=axis)
+
+ # find the new minimum, which should max
+ i_min = _add_keepdims(np.argmin)(a, axis=axis)
+
+ assert_equal(i_min, i_max)
+
+ def test_broadcast(self):
+ """ Test that non-indexing dimensions are broadcast in both directions """
+ a = np.ones((3, 4, 1))
+ ai = np.arange(10, dtype=np.intp).reshape((1, 2, 5)) % 4
+ put_along_axis(a, ai, 20, axis=1)
+ assert_equal(take_along_axis(a, ai, axis=1), 20)
+
+
class TestApplyAlongAxis(object):
def test_simple(self):
a = np.ones((20, 10), 'd')