summaryrefslogtreecommitdiff
path: root/numpy/lib
diff options
context:
space:
mode:
authorLars GrĂ¼ter <lagru@users.noreply.github.com>2018-11-15 23:23:26 +0100
committerMatti Picus <matti.picus@gmail.com>2018-11-15 14:23:26 -0800
commita4b96ad7649281de2c3a41292fcbab4c77c0743d (patch)
treedf39b13e69f51d79acd5a7e2647153a4c468c5ff /numpy/lib
parent7ada0c13a3e0d003670f421e8533cbb5388f705c (diff)
downloadnumpy-a4b96ad7649281de2c3a41292fcbab4c77c0743d.tar.gz
MAINT: Rewrite shape normalization in pad function (#11966)
Diffstat (limited to 'numpy/lib')
-rw-r--r--numpy/lib/arraypad.py153
-rw-r--r--numpy/lib/tests/test_arraypad.py86
2 files changed, 145 insertions, 94 deletions
diff --git a/numpy/lib/arraypad.py b/numpy/lib/arraypad.py
index d27a3918f..4f6371058 100644
--- a/numpy/lib/arraypad.py
+++ b/numpy/lib/arraypad.py
@@ -886,105 +886,71 @@ def _pad_wrap(arr, pad_amt, axis=-1):
return np.concatenate((wrap_chunk1, arr, wrap_chunk2), axis=axis)
-def _normalize_shape(ndarray, shape, cast_to_int=True):
+def _as_pairs(x, ndim, as_index=False):
"""
- Private function which does some checks and normalizes the possibly
- much simpler representations of 'pad_width', 'stat_length',
- 'constant_values', 'end_values'.
+ Broadcast `x` to an array with the shape (`ndim`, 2).
- Parameters
- ----------
- narray : ndarray
- Input ndarray
- shape : {sequence, array_like, float, int}, optional
- The width of padding (pad_width), the number of elements on the
- edge of the narray used for statistics (stat_length), the constant
- value(s) to use when filling padded regions (constant_values), or the
- endpoint target(s) for linear ramps (end_values).
- ((before_1, after_1), ... (before_N, after_N)) unique number of
- elements for each axis where `N` is rank of `narray`.
- ((before, after),) yields same before and after constants for each
- axis.
- (constant,) or val is a shortcut for before = after = constant for
- all axes.
- cast_to_int : bool, optional
- Controls if values in ``shape`` will be rounded and cast to int
- before being returned.
-
- Returns
- -------
- normalized_shape : tuple of tuples
- val => ((val, val), (val, val), ...)
- [[val1, val2], [val3, val4], ...] => ((val1, val2), (val3, val4), ...)
- ((val1, val2), (val3, val4), ...) => no change
- [[val1, val2], ] => ((val1, val2), (val1, val2), ...)
- ((val1, val2), ) => ((val1, val2), (val1, val2), ...)
- [[val , ], ] => ((val, val), (val, val), ...)
- ((val , ), ) => ((val, val), (val, val), ...)
-
- """
- ndims = ndarray.ndim
-
- # Shortcut shape=None
- if shape is None:
- return ((None, None), ) * ndims
-
- # Convert any input `info` to a NumPy array
- shape_arr = np.asarray(shape)
-
- try:
- shape_arr = np.broadcast_to(shape_arr, (ndims, 2))
- except ValueError:
- fmt = "Unable to create correctly shaped tuple from %s"
- raise ValueError(fmt % (shape,))
-
- # Cast if necessary
- if cast_to_int is True:
- shape_arr = np.round(shape_arr).astype(int)
-
- # Convert list of lists to tuple of tuples
- return tuple(tuple(axis) for axis in shape_arr.tolist())
-
-
-def _validate_lengths(narray, number_elements):
- """
- Private function which does some checks and reformats pad_width and
- stat_length using _normalize_shape.
+ A helper function for `pad` that prepares and validates arguments like
+ `pad_width` for iteration in pairs.
Parameters
----------
- narray : ndarray
- Input ndarray
- number_elements : {sequence, int}, optional
- The width of padding (pad_width) or the number of elements on the edge
- of the narray used for statistics (stat_length).
- ((before_1, after_1), ... (before_N, after_N)) unique number of
- elements for each axis.
- ((before, after),) yields same before and after constants for each
- axis.
- (constant,) or int is a shortcut for before = after = constant for all
- axes.
+ x : {None, scalar, array-like}
+ The object to broadcast to the shape (`ndim`, 2).
+ ndim : int
+ Number of pairs the broadcasted `x` will have.
+ as_index : bool, optional
+ If `x` is not None, try to round each element of `x` to an integer
+ (dtype `np.intp`) and ensure every element is positive.
Returns
-------
- _validate_lengths : tuple of tuples
- int => ((int, int), (int, int), ...)
- [[int1, int2], [int3, int4], ...] => ((int1, int2), (int3, int4), ...)
- ((int1, int2), (int3, int4), ...) => no change
- [[int1, int2], ] => ((int1, int2), (int1, int2), ...)
- ((int1, int2), ) => ((int1, int2), (int1, int2), ...)
- [[int , ], ] => ((int, int), (int, int), ...)
- ((int , ), ) => ((int, int), (int, int), ...)
-
+ pairs : nested iterables, shape (`ndim`, 2)
+ The broadcasted version of `x`.
+
+ Raises
+ ------
+ ValueError
+ If `as_index` is True and `x` contains negative elements.
+ Or if `x` is not broadcastable to the shape (`ndim`, 2).
"""
- normshp = _normalize_shape(narray, number_elements)
- for i in normshp:
- chk = [1 if x is None else x for x in i]
- chk = [1 if x >= 0 else -1 for x in chk]
- if (chk[0] < 0) or (chk[1] < 0):
- fmt = "%s cannot contain negative values."
- raise ValueError(fmt % (number_elements,))
- return normshp
+ if x is None:
+ # Pass through None as a special case, otherwise np.round(x) fails
+ # with an AttributeError
+ return ((None, None),) * ndim
+
+ x = np.array(x)
+ if as_index:
+ x = np.round(x).astype(np.intp, copy=False)
+
+ if x.ndim < 3:
+ # Optimization: Possibly use faster paths for cases where `x` has
+ # only 1 or 2 elements. `np.broadcast_to` could handle these as well
+ # but is currently slower
+
+ if x.size == 1:
+ # x was supplied as a single value
+ x = x.ravel() # Ensure x[0] works for x.ndim == 0, 1, 2
+ if as_index and x < 0:
+ raise ValueError("index can't contain negative values")
+ return ((x[0], x[0]),) * ndim
+
+ if x.size == 2 and x.shape != (2, 1):
+ # x was supplied with a single value for each side
+ # but except case when each dimension has a single value
+ # which should be broadcasted to a pair,
+ # e.g. [[1], [2]] -> [[1, 1], [2, 2]] not [[1, 2], [1, 2]]
+ x = x.ravel() # Ensure x[0], x[1] works
+ if as_index and (x[0] < 0 or x[1] < 0):
+ raise ValueError("index can't contain negative values")
+ return ((x[0], x[1]),) * ndim
+
+ if as_index and x.min() < 0:
+ raise ValueError("index can't contain negative values")
+
+ # Converting the array with `tolist` seems to improve performance
+ # when iterating and indexing the result (see usage in `pad`)
+ return np.broadcast_to(x, (ndim, 2)).tolist()
###############################################################################
@@ -1203,7 +1169,7 @@ def pad(array, pad_width, mode, **kwargs):
raise TypeError('`pad_width` must be of integral type.')
narray = np.array(array)
- pad_width = _validate_lengths(narray, pad_width)
+ pad_width = _as_pairs(pad_width, narray.ndim, as_index=True)
allowedkwargs = {
'constant': ['constant_values'],
@@ -1239,10 +1205,9 @@ def pad(array, pad_width, mode, **kwargs):
# Need to only normalize particular keywords.
for i in kwargs:
if i == 'stat_length':
- kwargs[i] = _validate_lengths(narray, kwargs[i])
+ kwargs[i] = _as_pairs(kwargs[i], narray.ndim, as_index=True)
if i in ['end_values', 'constant_values']:
- kwargs[i] = _normalize_shape(narray, kwargs[i],
- cast_to_int=False)
+ kwargs[i] = _as_pairs(kwargs[i], narray.ndim)
else:
# Drop back to old, slower np.apply_along_axis mode for user-supplied
# vector function
diff --git a/numpy/lib/tests/test_arraypad.py b/numpy/lib/tests/test_arraypad.py
index e62fccaa0..20f6e4a1b 100644
--- a/numpy/lib/tests/test_arraypad.py
+++ b/numpy/lib/tests/test_arraypad.py
@@ -9,6 +9,91 @@ import numpy as np
from numpy.testing import (assert_array_equal, assert_raises, assert_allclose,
assert_equal)
from numpy.lib import pad
+from numpy.lib.arraypad import _as_pairs
+
+
+class TestAsPairs(object):
+
+ def test_single_value(self):
+ """Test casting for a single value."""
+ expected = np.array([[3, 3]] * 10)
+ for x in (3, [3], [[3]]):
+ result = _as_pairs(x, 10)
+ assert_equal(result, expected)
+ # Test with dtype=object
+ obj = object()
+ assert_equal(
+ _as_pairs(obj, 10),
+ np.array([[obj, obj]] * 10)
+ )
+
+ def test_two_values(self):
+ """Test proper casting for two different values."""
+ # Broadcasting in the first dimension with numbers
+ expected = np.array([[3, 4]] * 10)
+ for x in ([3, 4], [[3, 4]]):
+ result = _as_pairs(x, 10)
+ assert_equal(result, expected)
+ # and with dtype=object
+ obj = object()
+ assert_equal(
+ _as_pairs(["a", obj], 10),
+ np.array([["a", obj]] * 10)
+ )
+
+ # Broadcasting in the second / last dimension with numbers
+ assert_equal(
+ _as_pairs([[3], [4]], 2),
+ np.array([[3, 3], [4, 4]])
+ )
+ # and with dtype=object
+ assert_equal(
+ _as_pairs([["a"], [obj]], 2),
+ np.array([["a", "a"], [obj, obj]])
+ )
+
+ def test_with_none(self):
+ expected = ((None, None), (None, None), (None, None))
+ assert_equal(
+ _as_pairs(None, 3, as_index=False),
+ expected
+ )
+ assert_equal(
+ _as_pairs(None, 3, as_index=True),
+ expected
+ )
+
+ def test_pass_through(self):
+ """Test if `x` already matching desired output are passed through."""
+ expected = np.arange(12).reshape((6, 2))
+ assert_equal(
+ _as_pairs(expected, 6),
+ expected
+ )
+
+ def test_as_index(self):
+ """Test results if `as_index=True`."""
+ assert_equal(
+ _as_pairs([2.6, 3.3], 10, as_index=True),
+ np.array([[3, 3]] * 10, dtype=np.intp)
+ )
+ assert_equal(
+ _as_pairs([2.6, 4.49], 10, as_index=True),
+ np.array([[3, 4]] * 10, dtype=np.intp)
+ )
+ for x in (-3, [-3], [[-3]], [-3, 4], [3, -4], [[-3, 4]], [[4, -3]],
+ [[1, 2]] * 9 + [[1, -2]]):
+ with pytest.raises(ValueError, match="negative values"):
+ _as_pairs(x, 10, as_index=True)
+
+ def test_exceptions(self):
+ """Ensure faulty usage is discovered."""
+ with pytest.raises(ValueError, match="more dimensions than allowed"):
+ _as_pairs([[[3]]], 10)
+ with pytest.raises(ValueError, match="could not be broadcast"):
+ _as_pairs([[1, 2], [3, 4]], 3)
+ with pytest.raises(ValueError, match="could not be broadcast"):
+ _as_pairs(np.ones((2, 3)), 3)
class TestConditionalShortcuts(object):
@@ -535,6 +620,7 @@ class TestConstant(object):
assert_array_equal(arr, expected)
+
class TestLinearRamp(object):
def test_check_simple(self):
a = np.arange(100).astype('f')