MAINT: Rewrite shape normalization in pad function (#11966)

author: Lars Grüter <lagru@users.noreply.github.com> 2018-11-15 23:23:26 +0100
committer: Matti Picus <matti.picus@gmail.com> 2018-11-15 14:23:26 -0800
commit: a4b96ad7649281de2c3a41292fcbab4c77c0743d (patch)
tree: df39b13e69f51d79acd5a7e2647153a4c468c5ff /numpy/lib
parent: 7ada0c13a3e0d003670f421e8533cbb5388f705c (diff)
download: numpy-a4b96ad7649281de2c3a41292fcbab4c77c0743d.tar.gz
2 files changed, 145 insertions, 94 deletions
diff --git a/numpy/lib/arraypad.py b/numpy/lib/arraypad.py
index d27a3918f..4f6371058 100644
--- a/numpy/lib/arraypad.py
+++ b/numpy/lib/arraypad.py
@@ -886,105 +886,71 @@ def _pad_wrap(arr, pad_amt, axis=-1):
     return np.concatenate((wrap_chunk1, arr, wrap_chunk2), axis=axis)
 
 
-def _normalize_shape(ndarray, shape, cast_to_int=True):
+def _as_pairs(x, ndim, as_index=False):
     """
-    Private function which does some checks and normalizes the possibly
-    much simpler representations of 'pad_width', 'stat_length',
-    'constant_values', 'end_values'.
+    Broadcast `x` to an array with the shape (`ndim`, 2).
 
-    Parameters
-    ----------
-    narray : ndarray
-        Input ndarray
-    shape : {sequence, array_like, float, int}, optional
-        The width of padding (pad_width), the number of elements on the
-        edge of the narray used for statistics (stat_length), the constant
-        value(s) to use when filling padded regions (constant_values), or the
-        endpoint target(s) for linear ramps (end_values).
-        ((before_1, after_1), ... (before_N, after_N)) unique number of
-        elements for each axis where `N` is rank of `narray`.
-        ((before, after),) yields same before and after constants for each
-        axis.
-        (constant,) or val is a shortcut for before = after = constant for
-        all axes.
-    cast_to_int : bool, optional
-        Controls if values in ``shape`` will be rounded and cast to int
-        before being returned.
-
-    Returns
-    -------
-    normalized_shape : tuple of tuples
-        val                               => ((val, val), (val, val), ...)
-        [[val1, val2], [val3, val4], ...] => ((val1, val2), (val3, val4), ...)
-        ((val1, val2), (val3, val4), ...) => no change
-        [[val1, val2], ]                  => ((val1, val2), (val1, val2), ...)
-        ((val1, val2), )                  => ((val1, val2), (val1, val2), ...)
-        [[val ,     ], ]                  => ((val, val), (val, val), ...)
-        ((val ,     ), )                  => ((val, val), (val, val), ...)
-
-    """
-    ndims = ndarray.ndim
-
-    # Shortcut shape=None
-    if shape is None:
-        return ((None, None), ) * ndims
-
-    # Convert any input `info` to a NumPy array
-    shape_arr = np.asarray(shape)
-
-    try:
-        shape_arr = np.broadcast_to(shape_arr, (ndims, 2))
-    except ValueError:
-        fmt = "Unable to create correctly shaped tuple from %s"
-        raise ValueError(fmt % (shape,))
-
-    # Cast if necessary
-    if cast_to_int is True:
-        shape_arr = np.round(shape_arr).astype(int)
-
-    # Convert list of lists to tuple of tuples
-    return tuple(tuple(axis) for axis in shape_arr.tolist())
-
-
-def _validate_lengths(narray, number_elements):
-    """
-    Private function which does some checks and reformats pad_width and
-    stat_length using _normalize_shape.
+    A helper function for `pad` that prepares and validates arguments like
+    `pad_width` for iteration in pairs.
 
     Parameters
     ----------
-    narray : ndarray
-        Input ndarray
-    number_elements : {sequence, int}, optional
-        The width of padding (pad_width) or the number of elements on the edge
-        of the narray used for statistics (stat_length).
-        ((before_1, after_1), ... (before_N, after_N)) unique number of
-        elements for each axis.
-        ((before, after),) yields same before and after constants for each
-        axis.
-        (constant,) or int is a shortcut for before = after = constant for all
-        axes.
+    x : {None, scalar, array-like}
+        The object to broadcast to the shape (`ndim`, 2).
+    ndim : int
+        Number of pairs the broadcasted `x` will have.
+    as_index : bool, optional
+        If `x` is not None, try to round each element of `x` to an integer
+        (dtype `np.intp`) and ensure every element is positive.
 
     Returns
     -------
-    _validate_lengths : tuple of tuples
-        int                               => ((int, int), (int, int), ...)
-        [[int1, int2], [int3, int4], ...] => ((int1, int2), (int3, int4), ...)
-        ((int1, int2), (int3, int4), ...) => no change
-        [[int1, int2], ]                  => ((int1, int2), (int1, int2), ...)
-        ((int1, int2), )                  => ((int1, int2), (int1, int2), ...)
-        [[int ,     ], ]                  => ((int, int), (int, int), ...)
-        ((int ,     ), )                  => ((int, int), (int, int), ...)
-
+    pairs : nested iterables, shape (`ndim`, 2)
+        The broadcasted version of `x`.
+
+    Raises
+    ------
+    ValueError
+        If `as_index` is True and `x` contains negative elements.
+        Or if `x` is not broadcastable to the shape (`ndim`, 2).
     """
-    normshp = _normalize_shape(narray, number_elements)
-    for i in normshp:
-        chk = [1 if x is None else x for x in i]
-        chk = [1 if x >= 0 else -1 for x in chk]
-        if (chk[0] < 0) or (chk[1] < 0):
-            fmt = "%s cannot contain negative values."
-            raise ValueError(fmt % (number_elements,))
-    return normshp
+    if x is None:
+        # Pass through None as a special case, otherwise np.round(x) fails
+        # with an AttributeError
+        return ((None, None),) * ndim
+
+    x = np.array(x)
+    if as_index:
+        x = np.round(x).astype(np.intp, copy=False)
+
+    if x.ndim < 3:
+        # Optimization: Possibly use faster paths for cases where `x` has
+        # only 1 or 2 elements. `np.broadcast_to` could handle these as well
+        # but is currently slower
+
+        if x.size == 1:
+            # x was supplied as a single value
+            x = x.ravel()  # Ensure x[0] works for x.ndim == 0, 1, 2
+            if as_index and x < 0:
+                raise ValueError("index can't contain negative values")
+            return ((x[0], x[0]),) * ndim
+
+        if x.size == 2 and x.shape != (2, 1):
+            # x was supplied with a single value for each side
+            # but except case when each dimension has a single value
+            # which should be broadcasted to a pair,
+            # e.g. [[1], [2]] -> [[1, 1], [2, 2]] not [[1, 2], [1, 2]]
+            x = x.ravel()  # Ensure x[0], x[1] works
+            if as_index and (x[0] < 0 or x[1] < 0):
+                raise ValueError("index can't contain negative values")
+            return ((x[0], x[1]),) * ndim
+
+    if as_index and x.min() < 0:
+        raise ValueError("index can't contain negative values")
+
+    # Converting the array with `tolist` seems to improve performance
+    # when iterating and indexing the result (see usage in `pad`)
+    return np.broadcast_to(x, (ndim, 2)).tolist()
 
 
 ###############################################################################
@@ -1203,7 +1169,7 @@ def pad(array, pad_width, mode, **kwargs):
         raise TypeError('`pad_width` must be of integral type.')
 
     narray = np.array(array)
-    pad_width = _validate_lengths(narray, pad_width)
+    pad_width = _as_pairs(pad_width, narray.ndim, as_index=True)
 
     allowedkwargs = {
         'constant': ['constant_values'],
@@ -1239,10 +1205,9 @@ def pad(array, pad_width, mode, **kwargs):
         # Need to only normalize particular keywords.
         for i in kwargs:
             if i == 'stat_length':
-                kwargs[i] = _validate_lengths(narray, kwargs[i])
+                kwargs[i] = _as_pairs(kwargs[i], narray.ndim, as_index=True)
             if i in ['end_values', 'constant_values']:
-                kwargs[i] = _normalize_shape(narray, kwargs[i],
-                                             cast_to_int=False)
+                kwargs[i] = _as_pairs(kwargs[i], narray.ndim)
     else:
         # Drop back to old, slower np.apply_along_axis mode for user-supplied
         # vector function
diff --git a/numpy/lib/tests/test_arraypad.py b/numpy/lib/tests/test_arraypad.py
index e62fccaa0..20f6e4a1b 100644
--- a/numpy/lib/tests/test_arraypad.py
+++ b/numpy/lib/tests/test_arraypad.py
@@ -9,6 +9,91 @@ import numpy as np
 from numpy.testing import (assert_array_equal, assert_raises, assert_allclose,
                            assert_equal)
 from numpy.lib import pad
+from numpy.lib.arraypad import _as_pairs
+
+
+class TestAsPairs(object):
+
+    def test_single_value(self):
+        """Test casting for a single value."""
+        expected = np.array([[3, 3]] * 10)
+        for x in (3, [3], [[3]]):
+            result = _as_pairs(x, 10)
+            assert_equal(result, expected)
+        # Test with dtype=object
+        obj = object()
+        assert_equal(
+            _as_pairs(obj, 10),
+            np.array([[obj, obj]] * 10)
+        )
+
+    def test_two_values(self):
+        """Test proper casting for two different values."""
+        # Broadcasting in the first dimension with numbers
+        expected = np.array([[3, 4]] * 10)
+        for x in ([3, 4], [[3, 4]]):
+            result = _as_pairs(x, 10)
+            assert_equal(result, expected)
+        # and with dtype=object
+        obj = object()
+        assert_equal(
+            _as_pairs(["a", obj], 10),
+            np.array([["a", obj]] * 10)
+        )
+
+        # Broadcasting in the second / last dimension with numbers
+        assert_equal(
+            _as_pairs([[3], [4]], 2),
+            np.array([[3, 3], [4, 4]])
+        )
+        # and with dtype=object
+        assert_equal(
+            _as_pairs([["a"], [obj]], 2),
+            np.array([["a", "a"], [obj, obj]])
+        )
+
+    def test_with_none(self):
+        expected = ((None, None), (None, None), (None, None))
+        assert_equal(
+            _as_pairs(None, 3, as_index=False),
+            expected
+        )
+        assert_equal(
+            _as_pairs(None, 3, as_index=True),
+            expected
+        )
+
+    def test_pass_through(self):
+        """Test if `x` already matching desired output are passed through."""
+        expected = np.arange(12).reshape((6, 2))
+        assert_equal(
+            _as_pairs(expected, 6),
+            expected
+        )
+
+    def test_as_index(self):
+        """Test results if `as_index=True`."""
+        assert_equal(
+            _as_pairs([2.6, 3.3], 10, as_index=True),
+            np.array([[3, 3]] * 10, dtype=np.intp)
+        )
+        assert_equal(
+            _as_pairs([2.6, 4.49], 10, as_index=True),
+            np.array([[3, 4]] * 10, dtype=np.intp)
+        )
+        for x in (-3, [-3], [[-3]], [-3, 4], [3, -4], [[-3, 4]], [[4, -3]],
+                  [[1, 2]] * 9 + [[1, -2]]):
+            with pytest.raises(ValueError, match="negative values"):
+                _as_pairs(x, 10, as_index=True)
+
+    def test_exceptions(self):
+        """Ensure faulty usage is discovered."""
+        with pytest.raises(ValueError, match="more dimensions than allowed"):
+            _as_pairs([[[3]]], 10)
+        with pytest.raises(ValueError, match="could not be broadcast"):
+            _as_pairs([[1, 2], [3, 4]], 3)
+        with pytest.raises(ValueError, match="could not be broadcast"):
+            _as_pairs(np.ones((2, 3)), 3)
 
 
 class TestConditionalShortcuts(object):
@@ -535,6 +620,7 @@ class TestConstant(object):
 
         assert_array_equal(arr, expected)
 
+
 class TestLinearRamp(object):
     def test_check_simple(self):
         a = np.arange(100).astype('f')
author	Lars Grüter <lagru@users.noreply.github.com>	2018-11-15 23:23:26 +0100
committer	Matti Picus <matti.picus@gmail.com>	2018-11-15 14:23:26 -0800
commit	a4b96ad7649281de2c3a41292fcbab4c77c0743d (patch)
tree	df39b13e69f51d79acd5a7e2647153a4c468c5ff /numpy/lib
parent	7ada0c13a3e0d003670f421e8533cbb5388f705c (diff)
download	numpy-a4b96ad7649281de2c3a41292fcbab4c77c0743d.tar.gz