summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarten van Kerkwijk <mhvk@astro.utoronto.ca>2018-05-28 20:42:44 -0400
committerGitHub <noreply@github.com>2018-05-28 20:42:44 -0400
commit23cb605d7f230ee2b00c0e7167256cf434731b16 (patch)
tree75660f7d14dc2bdf48ec9f062864c74efee6b80d
parent40b55b6a3003b2f4045be9958af234c538a051f3 (diff)
parent7a3c50ab427cd9c4f3125a8e72d31f9141bd558a (diff)
downloadnumpy-23cb605d7f230ee2b00c0e7167256cf434731b16.tar.gz
Merge pull request #11105 from eric-wieser/take_along_axis-strict
ENH: Add (put|take)_along_axis
-rw-r--r--doc/release/1.15.0-notes.rst17
-rw-r--r--doc/source/reference/routines.indexing.rst2
-rw-r--r--numpy/core/fromnumeric.py8
-rw-r--r--numpy/lib/shape_base.py227
-rw-r--r--numpy/lib/tests/test_shape_base.py92
-rw-r--r--numpy/ma/core.py10
-rw-r--r--numpy/ma/extras.py36
7 files changed, 356 insertions, 36 deletions
diff --git a/doc/release/1.15.0-notes.rst b/doc/release/1.15.0-notes.rst
index f8bb389aa..5f72149e9 100644
--- a/doc/release/1.15.0-notes.rst
+++ b/doc/release/1.15.0-notes.rst
@@ -375,5 +375,22 @@ inner-product example, ``keepdims=True, axes=[-2, -2, -2]`` would act on the
one-but-last dimension of the input arguments, and leave a size 1 dimension in
that place in the output.
+New ``np.take_along_axis`` and ``np.put_along_axis`` functions
+--------------------------------------------------------------
+When used on multidimensional arrays, ``argsort``, ``argmin``, ``argmax``, and
+``argpartition`` return arrays that are difficult to use as indices.
+``take_along_axis`` provides an easy way to use these indices to lookup values
+within an array, so that::
+
+ np.take_along_axis(a, np.argsort(a, axis=axis), axis=axis)
+
+is the same as::
+
+ np.sort(a, axis=axis)
+
+``np.put_along_axis`` acts as the dual operation for writing to these indices
+within an array.
+
+
Changes
=======
diff --git a/doc/source/reference/routines.indexing.rst b/doc/source/reference/routines.indexing.rst
index 4d2458d2f..aeec1a1bb 100644
--- a/doc/source/reference/routines.indexing.rst
+++ b/doc/source/reference/routines.indexing.rst
@@ -36,6 +36,7 @@ Indexing-like operations
:toctree: generated/
take
+ take_along_axis
choose
compress
diag
@@ -50,6 +51,7 @@ Inserting data into arrays
place
put
+ put_along_axis
putmask
fill_diagonal
diff --git a/numpy/core/fromnumeric.py b/numpy/core/fromnumeric.py
index 0db5663f9..d1aae0aa0 100644
--- a/numpy/core/fromnumeric.py
+++ b/numpy/core/fromnumeric.py
@@ -140,6 +140,7 @@ def take(a, indices, axis=None, out=None, mode='raise'):
--------
compress : Take elements using a boolean mask
ndarray.take : equivalent method
+ take_along_axis : Take elements by matching the array and the index arrays
Notes
-----
@@ -478,6 +479,7 @@ def put(a, ind, v, mode='raise'):
See Also
--------
putmask, place
+ put_along_axis : Put elements by matching the array and the index arrays
Examples
--------
@@ -723,7 +725,9 @@ def argpartition(a, kth, axis=-1, kind='introselect', order=None):
-------
index_array : ndarray, int
Array of indices that partition `a` along the specified axis.
- In other words, ``a[index_array]`` yields a partitioned `a`.
+ If `a` is one-dimensional, ``a[index_array]`` yields a partitioned `a`.
+ More generally, ``np.take_along_axis(a, index_array, axis=a)`` always
+ yields the partitioned `a`, irrespective of dimensionality.
See Also
--------
@@ -904,6 +908,8 @@ def argsort(a, axis=-1, kind='quicksort', order=None):
index_array : ndarray, int
Array of indices that sort `a` along the specified axis.
If `a` is one-dimensional, ``a[index_array]`` yields a sorted `a`.
+ More generally, ``np.take_along_axis(a, index_array, axis=a)`` always
+ yields the sorted `a`, irrespective of dimensionality.
See Also
--------
diff --git a/numpy/lib/shape_base.py b/numpy/lib/shape_base.py
index 41ef28ef3..65104115a 100644
--- a/numpy/lib/shape_base.py
+++ b/numpy/lib/shape_base.py
@@ -16,10 +16,235 @@ from numpy.matrixlib.defmatrix import matrix # this raises all the right alarm
__all__ = [
'column_stack', 'row_stack', 'dstack', 'array_split', 'split',
'hsplit', 'vsplit', 'dsplit', 'apply_over_axes', 'expand_dims',
- 'apply_along_axis', 'kron', 'tile', 'get_array_wrap'
+ 'apply_along_axis', 'kron', 'tile', 'get_array_wrap', 'take_along_axis',
+ 'put_along_axis'
]
+def _make_along_axis_idx(arr_shape, indices, axis):
+ # compute dimensions to iterate over
+ if not _nx.issubdtype(indices.dtype, _nx.integer):
+ raise IndexError('`indices` must be an integer array')
+ if len(arr_shape) != indices.ndim:
+ raise ValueError(
+ "`indices` and `arr` must have the same number of dimensions")
+ shape_ones = (1,) * indices.ndim
+ dest_dims = list(range(axis)) + [None] + list(range(axis+1, indices.ndim))
+
+ # build a fancy index, consisting of orthogonal aranges, with the
+ # requested index inserted at the right location
+ fancy_index = []
+ for dim, n in zip(dest_dims, arr_shape):
+ if dim is None:
+ fancy_index.append(indices)
+ else:
+ ind_shape = shape_ones[:dim] + (-1,) + shape_ones[dim+1:]
+ fancy_index.append(_nx.arange(n).reshape(ind_shape))
+
+ return tuple(fancy_index)
+
+
+def take_along_axis(arr, indices, axis):
+ """
+ Take values from the input array by matching 1d index and data slices.
+
+ This iterates over matching 1d slices oriented along the specified axis in
+ the index and data arrays, and uses the former to look up values in the
+ latter. These slices can be different lengths.
+
+ Functions returning an index along an axis, like `argsort` and
+ `argpartition`, produce suitable indices for this function.
+
+ .. versionadded:: 1.15.0
+
+ Parameters
+ ----------
+ arr: ndarray (Ni..., M, Nk...)
+ Source array
+ indices: ndarray (Ni..., J, Nk...)
+ Indices to take along each 1d slice of `arr`. This must match the
+ dimension of arr, but dimensions Ni and Nj only need to broadcast
+ against `arr`.
+ axis: int
+ The axis to take 1d slices along. If axis is None, the input array is
+ treated as if it had first been flattened to 1d, for consistency with
+ `sort` and `argsort`.
+
+ Returns
+ -------
+ out: ndarray (Ni..., J, Nk...)
+ The indexed result.
+
+ Notes
+ -----
+ This is equivalent to (but faster than) the following use of `ndindex` and
+ `s_`, which sets each of ``ii`` and ``kk`` to a tuple of indices::
+
+ Ni, M, Nk = a.shape[:axis], a.shape[axis], a.shape[axis+1:]
+ J = indices.shape[axis] # Need not equal M
+ out = np.empty(Nk + (J,) + Nk)
+
+ for ii in ndindex(Ni):
+ for kk in ndindex(Nk):
+ a_1d = a [ii + s_[:,] + kk]
+ indices_1d = indices[ii + s_[:,] + kk]
+ out_1d = out [ii + s_[:,] + kk]
+ for j in range(J):
+ out_1d[j] = a_1d[indices_1d[j]]
+
+ Equivalently, eliminating the inner loop, the last two lines would be::
+
+ out_1d[:] = a_1d[indices_1d]
+
+ See Also
+ --------
+ take : Take along an axis, using the same indices for every 1d slice
+ put_along_axis :
+ Put values into the destination array by matching 1d index and data slices
+
+ Examples
+ --------
+
+ For this sample array
+
+ >>> a = np.array([[10, 30, 20], [60, 40, 50]])
+
+ We can sort either by using sort directly, or argsort and this function
+
+ >>> np.sort(a, axis=1)
+ array([[10, 20, 30],
+ [40, 50, 60]])
+ >>> ai = np.argsort(a, axis=1); ai
+ array([[0, 2, 1],
+ [1, 2, 0]], dtype=int64)
+ >>> np.take_along_axis(a, ai, axis=1)
+ array([[10, 20, 30],
+ [40, 50, 60]])
+
+ The same works for max and min, if you expand the dimensions:
+
+ >>> np.expand_dims(np.max(a, axis=1), axis=1)
+ array([[30],
+ [60]])
+ >>> ai = np.expand_dims(np.argmax(a, axis=1), axis=1)
+ >>> ai
+ array([[1],
+ [0], dtype=int64)
+ >>> np.take_along_axis(a, ai, axis=1)
+ array([[30],
+ [60]])
+
+ If we want to get the max and min at the same time, we can stack the
+ indices first
+
+ >>> ai_min = np.expand_dims(np.argmin(a, axis=1), axis=1)
+ >>> ai_max = np.expand_dims(np.argmax(a, axis=1), axis=1)
+ >>> ai = np.concatenate([ai_min, ai_max], axis=axis)
+ >> ai
+ array([[0, 1],
+ [1, 0]], dtype=int64)
+ >>> np.take_along_axis(a, ai, axis=1)
+ array([[10, 30],
+ [40, 60]])
+ """
+ # normalize inputs
+ if axis is None:
+ arr = arr.flat
+ arr_shape = (len(arr),) # flatiter has no .shape
+ axis = 0
+ else:
+ axis = normalize_axis_index(axis, arr.ndim)
+ arr_shape = arr.shape
+
+ # use the fancy index
+ return arr[_make_along_axis_idx(arr_shape, indices, axis)]
+
+
+def put_along_axis(arr, indices, values, axis):
+ """
+ Put values into the destination array by matching 1d index and data slices.
+
+ This iterates over matching 1d slices oriented along the specified axis in
+ the index and data arrays, and uses the former to place values into the
+ latter. These slices can be different lengths.
+
+ Functions returning an index along an axis, like `argsort` and
+ `argpartition`, produce suitable indices for this function.
+
+ .. versionadded:: 1.15.0
+
+ Parameters
+ ----------
+ arr: ndarray (Ni..., M, Nk...)
+ Destination array.
+ indices: ndarray (Ni..., J, Nk...)
+ Indices to change along each 1d slice of `arr`. This must match the
+ dimension of arr, but dimensions in Ni and Nj may be 1 to broadcast
+ against `arr`.
+ values: array_like (Ni..., J, Nk...)
+ values to insert at those indices. Its shape and dimension are
+ broadcast to match that of `indices`.
+ axis: int
+ The axis to take 1d slices along. If axis is None, the destination
+ array is treated as if a flattened 1d view had been created of it.
+
+ Notes
+ -----
+ This is equivalent to (but faster than) the following use of `ndindex` and
+ `s_`, which sets each of ``ii`` and ``kk`` to a tuple of indices::
+
+ Ni, M, Nk = a.shape[:axis], a.shape[axis], a.shape[axis+1:]
+ J = indices.shape[axis] # Need not equal M
+
+ for ii in ndindex(Ni):
+ for kk in ndindex(Nk):
+ a_1d = a [ii + s_[:,] + kk]
+ indices_1d = indices[ii + s_[:,] + kk]
+ values_1d = values [ii + s_[:,] + kk]
+ for j in range(J):
+ a_1d[indices_1d[j]] = values_1d[j]
+
+ Equivalently, eliminating the inner loop, the last two lines would be::
+
+ a_1d[indices_1d] = values_1d
+
+ See Also
+ --------
+ take_along_axis :
+ Take values from the input array by matching 1d index and data slices
+
+ Examples
+ --------
+
+ For this sample array
+
+ >>> a = np.array([[10, 30, 20], [60, 40, 50]])
+
+ We can replace the maximum values with:
+
+ >>> ai = np.expand_dims(np.argmax(a, axis=1), axis=1)
+ >>> ai
+ array([[1],
+ [0]], dtype=int64)
+ >>> np.put_along_axis(a, ai, 99, axis=1)
+ >>> a
+ array([[10, 99, 20],
+ [99, 40, 50]])
+
+ """
+ # normalize inputs
+ if axis is None:
+ arr = arr.flat
+ axis = 0
+ arr_shape = (len(arr),) # flatiter has no .shape
+ else:
+ axis = normalize_axis_index(axis, arr.ndim)
+ arr_shape = arr.shape
+
+ # use the fancy index
+ arr[_make_along_axis_idx(arr_shape, indices, axis)] = values
+
+
def apply_along_axis(func1d, axis, arr, *args, **kwargs):
"""
Apply a function to 1-D slices along the given axis.
diff --git a/numpy/lib/tests/test_shape_base.py b/numpy/lib/tests/test_shape_base.py
index a35d90b70..c95894f94 100644
--- a/numpy/lib/tests/test_shape_base.py
+++ b/numpy/lib/tests/test_shape_base.py
@@ -2,16 +2,106 @@ from __future__ import division, absolute_import, print_function
import numpy as np
import warnings
+import functools
from numpy.lib.shape_base import (
apply_along_axis, apply_over_axes, array_split, split, hsplit, dsplit,
- vsplit, dstack, column_stack, kron, tile, expand_dims,
+ vsplit, dstack, column_stack, kron, tile, expand_dims, take_along_axis,
+ put_along_axis
)
from numpy.testing import (
assert_, assert_equal, assert_array_equal, assert_raises, assert_warns
)
+def _add_keepdims(func):
+ """ hack in keepdims behavior into a function taking an axis """
+ @functools.wraps(func)
+ def wrapped(a, axis, **kwargs):
+ res = func(a, axis=axis, **kwargs)
+ if axis is None:
+ axis = 0 # res is now a scalar, so we can insert this anywhere
+ return np.expand_dims(res, axis=axis)
+ return wrapped
+
+
+class TestTakeAlongAxis(object):
+ def test_argequivalent(self):
+ """ Test it translates from arg<func> to <func> """
+ from numpy.random import rand
+ a = rand(3, 4, 5)
+
+ funcs = [
+ (np.sort, np.argsort, dict()),
+ (_add_keepdims(np.min), _add_keepdims(np.argmin), dict()),
+ (_add_keepdims(np.max), _add_keepdims(np.argmax), dict()),
+ (np.partition, np.argpartition, dict(kth=2)),
+ ]
+
+ for func, argfunc, kwargs in funcs:
+ for axis in list(range(a.ndim)) + [None]:
+ a_func = func(a, axis=axis, **kwargs)
+ ai_func = argfunc(a, axis=axis, **kwargs)
+ assert_equal(a_func, take_along_axis(a, ai_func, axis=axis))
+
+ def test_invalid(self):
+ """ Test it errors when indices has too few dimensions """
+ a = np.ones((10, 10))
+ ai = np.ones((10, 2), dtype=np.intp)
+
+ # sanity check
+ take_along_axis(a, ai, axis=1)
+
+ # not enough indices
+ assert_raises(ValueError, take_along_axis, a, np.array(1), axis=1)
+ # bool arrays not allowed
+ assert_raises(IndexError, take_along_axis, a, ai.astype(bool), axis=1)
+ # float arrays not allowed
+ assert_raises(IndexError, take_along_axis, a, ai.astype(float), axis=1)
+ # invalid axis
+ assert_raises(np.AxisError, take_along_axis, a, ai, axis=10)
+
+ def test_empty(self):
+ """ Test everything is ok with empty results, even with inserted dims """
+ a = np.ones((3, 4, 5))
+ ai = np.ones((3, 0, 5), dtype=np.intp)
+
+ actual = take_along_axis(a, ai, axis=1)
+ assert_equal(actual.shape, ai.shape)
+
+ def test_broadcast(self):
+ """ Test that non-indexing dimensions are broadcast in both directions """
+ a = np.ones((3, 4, 1))
+ ai = np.ones((1, 2, 5), dtype=np.intp)
+ actual = take_along_axis(a, ai, axis=1)
+ assert_equal(actual.shape, (3, 2, 5))
+
+
+class TestPutAlongAxis(object):
+ def test_replace_max(self):
+ a_base = np.array([[10, 30, 20], [60, 40, 50]])
+
+ for axis in list(range(a_base.ndim)) + [None]:
+ # we mutate this in the loop
+ a = a_base.copy()
+
+ # replace the max with a small value
+ i_max = _add_keepdims(np.argmax)(a, axis=axis)
+ put_along_axis(a, i_max, -99, axis=axis)
+
+ # find the new minimum, which should max
+ i_min = _add_keepdims(np.argmin)(a, axis=axis)
+
+ assert_equal(i_min, i_max)
+
+ def test_broadcast(self):
+ """ Test that non-indexing dimensions are broadcast in both directions """
+ a = np.ones((3, 4, 1))
+ ai = np.arange(10, dtype=np.intp).reshape((1, 2, 5)) % 4
+ put_along_axis(a, ai, 20, axis=1)
+ assert_equal(take_along_axis(a, ai, axis=1), 20)
+
+
class TestApplyAlongAxis(object):
def test_simple(self):
a = np.ones((20, 10), 'd')
diff --git a/numpy/ma/core.py b/numpy/ma/core.py
index 17682d13f..fdffc7360 100644
--- a/numpy/ma/core.py
+++ b/numpy/ma/core.py
@@ -5524,15 +5524,7 @@ class MaskedArray(ndarray):
sidx = self.argsort(axis=axis, kind=kind, order=order,
fill_value=fill_value, endwith=endwith)
- # save memory for 1d arrays
- if self.ndim == 1:
- idx = sidx
- else:
- idx = list(np.ix_(*[np.arange(x) for x in self.shape]))
- idx[axis] = sidx
- idx = tuple(idx)
-
- self[...] = self[idx]
+ self[...] = np.take_along_axis(self, sidx, axis=axis)
def min(self, axis=None, out=None, fill_value=None, keepdims=np._NoValue):
"""
diff --git a/numpy/ma/extras.py b/numpy/ma/extras.py
index da35217d1..3be4d3625 100644
--- a/numpy/ma/extras.py
+++ b/numpy/ma/extras.py
@@ -747,19 +747,17 @@ def _median(a, axis=None, out=None, overwrite_input=False):
return np.ma.minimum_fill_value(asorted)
return s
- counts = count(asorted, axis=axis)
+ counts = count(asorted, axis=axis, keepdims=True)
h = counts // 2
- # create indexing mesh grid for all but reduced axis
- axes_grid = [np.arange(x) for i, x in enumerate(asorted.shape)
- if i != axis]
- ind = np.meshgrid(*axes_grid, sparse=True, indexing='ij')
+ # duplicate high if odd number of elements so mean does nothing
+ odd = counts % 2 == 1
+ l = np.where(odd, h, h-1)
- # insert indices of low and high median
- ind.insert(axis, h - 1)
- low = asorted[tuple(ind)]
- ind[axis] = np.minimum(h, asorted.shape[axis] - 1)
- high = asorted[tuple(ind)]
+ lh = np.concatenate([l,h], axis=axis)
+
+ # get low and high median
+ low_high = np.take_along_axis(asorted, lh, axis=axis)
def replace_masked(s):
# Replace masked entries with minimum_full_value unless it all values
@@ -767,30 +765,20 @@ def _median(a, axis=None, out=None, overwrite_input=False):
# larger than the fill value is undefined and a valid value placed
# elsewhere, e.g. [4, --, inf].
if np.ma.is_masked(s):
- rep = (~np.all(asorted.mask, axis=axis)) & s.mask
+ rep = (~np.all(asorted.mask, axis=axis, keepdims=True)) & s.mask
s.data[rep] = np.ma.minimum_fill_value(asorted)
s.mask[rep] = False
- replace_masked(low)
- replace_masked(high)
-
- # duplicate high if odd number of elements so mean does nothing
- odd = counts % 2 == 1
- np.copyto(low, high, where=odd)
- # not necessary for scalar True/False masks
- try:
- np.copyto(low.mask, high.mask, where=odd)
- except Exception:
- pass
+ replace_masked(low_high)
if np.issubdtype(asorted.dtype, np.inexact):
# avoid inf / x = masked
- s = np.ma.sum([low, high], axis=0, out=out)
+ s = np.ma.sum(low_high, axis=axis, out=out)
np.true_divide(s.data, 2., casting='unsafe', out=s.data)
s = np.lib.utils._median_nancheck(asorted, s, axis, out)
else:
- s = np.ma.mean([low, high], axis=0, out=out)
+ s = np.ma.mean(low_high, axis=axis, out=out)
return s