ENH: gradient support for unevenly spaced data

This somehow reverts #7618 and solves #6847, #7548 by implementing support for unevenly spaced data. Now the behaviour is similar to that of Matlab/Octave function. As argument it can take: 1. A single scalar to specify a sample distance for all dimensions. 2. N scalars to specify a constant sample distance for each dimension. i.e. `dx`, `dy`, `dz`, ... 3. N arrays to specify the coordinates of the values along each dimension of F. The length of the array must match the size of the corresponding dimension 4. Any combination of N scalars/arrays with the meaning of 2. and 3.
author: Alessandro Pietro Bardelli <apbard@users.noreply.github.com> 2017-01-04 00:58:44 +0100
committer: Alessandro Pietro Bardelli <apbard@users.noreply.github.com> 2017-02-22 10:50:53 +0100
commit: 9520de90837d0afaac3d1612047f4b952563b3d5 (patch)
tree: 623938c6d79bda2affa5e1794f8512155d2d4241 /numpy/lib/function_base.py
parent: f6a07571df745f01eaccf4b05b8476da6f0b5833 (diff)
download: numpy-9520de90837d0afaac3d1612047f4b952563b3d5.tar.gz
1 files changed, 235 insertions, 84 deletions
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 4d1ffbccc..fc49a6fd7 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -128,7 +128,8 @@ def rot90(m, k=1, axes=(0,1)):
         return flip(flip(m, axes[0]), axes[1])
 
     axes_list = arange(0, m.ndim)
-    axes_list[axes[0]], axes_list[axes[1]] = axes_list[axes[1]], axes_list[axes[0]]
+    (axes_list[axes[0]], axes_list[axes[1]]) = (axes_list[axes[1]],
+                                                axes_list[axes[0]])
 
     if k == 1:
         return transpose(flip(m,axes[1]), axes_list)
@@ -332,7 +333,7 @@ def _hist_bin_doane(x):
 
     Improved version of Sturges' formula which works better for
     non-normal data. See
-    http://stats.stackexchange.com/questions/55134/doanes-formula-for-histogram-binning
+    stats.stackexchange.com/questions/55134/doanes-formula-for-histogram-binning
 
     Parameters
     ----------
@@ -638,7 +639,7 @@ def histogram(a, bins=10, range=None, normed=False, weights=None,
     >>> rng = np.random.RandomState(10)  # deterministic random data
     >>> a = np.hstack((rng.normal(size=1000),
     ...                rng.normal(loc=5, scale=2, size=1000)))
-    >>> plt.hist(a, bins='auto')  # plt.hist passes its arguments to np.histogram
+    >>> plt.hist(a, bins='auto')  # arguments are passed to np.histogram
     >>> plt.title("Histogram with 'auto' bins")
     >>> plt.show()
 
@@ -764,15 +765,19 @@ def histogram(a, bins=10, range=None, normed=False, weights=None,
             decrement = tmp_a_data < bin_edges[indices]
             indices[decrement] -= 1
             # The last bin includes the right edge. The other bins do not.
-            increment = (tmp_a_data >= bin_edges[indices + 1]) & (indices != bins - 1)
+            increment = ((tmp_a_data >= bin_edges[indices + 1])
+                         & (indices != bins - 1))
             indices[increment] += 1
 
             # We now compute the histogram using bincount
             if ntype.kind == 'c':
-                n.real += np.bincount(indices, weights=tmp_w.real, minlength=bins)
-                n.imag += np.bincount(indices, weights=tmp_w.imag, minlength=bins)
+                n.real += np.bincount(indices, weights=tmp_w.real,
+                                      minlength=bins)
+                n.imag += np.bincount(indices, weights=tmp_w.imag,
+                                      minlength=bins)
             else:
-                n += np.bincount(indices, weights=tmp_w, minlength=bins).astype(ntype)
+                n += np.bincount(indices, weights=tmp_w,
+                                 minlength=bins).astype(ntype)
 
         # Rename the bin edges for return.
         bins = bin_edges
@@ -1499,19 +1504,29 @@ def gradient(f, *varargs, **kwargs):
     Return the gradient of an N-dimensional array.
 
     The gradient is computed using second order accurate central differences
-    in the interior and either first differences or second order accurate
-    one-sides (forward or backwards) differences at the boundaries. The
-    returned gradient hence has the same shape as the input array.
+    in the interior points and either first or second order accurate one-sides
+    (forward or backwards) differences at the boundaries.
+    The returned gradient hence has the same shape as the input array.
 
     Parameters
     ----------
     f : array_like
         An N-dimensional array containing samples of a scalar function.
-    varargs : scalar or list of scalar, optional
-        N scalars specifying the sample distances for each dimension,
-        i.e. `dx`, `dy`, `dz`, ... Default distance: 1.
-        single scalar specifies sample distance for all dimensions.
-        if `axis` is given, the number of varargs must equal the number of axes.
+    varargs : list of scalar or array, optional
+        Spacing between f values. Default unitary spacing for all dimensions.
+        Spacing can be specified using:
+        
+        1. single scalar to specify a sample distance for all dimensions.
+        2. N scalars to specify a constant sample distance for each dimension.
+           i.e. `dx`, `dy`, `dz`, ...
+        3. N arrays to specify the coordinates of the values along each 
+           dimension of F. The length of the array must match the size of 
+           the corresponding dimension
+        4. Any combination of N scalars/arrays with the meaning of 2. and 3.
+        
+        If `axis` is given, the number of varargs must equal the number of axes.
+        Default: 1.
+        
     edge_order : {1, 2}, optional
         Gradient is calculated using N-th order accurate differences
         at the boundaries. Default: 1.
@@ -1520,8 +1535,9 @@ def gradient(f, *varargs, **kwargs):
 
     axis : None or int or tuple of ints, optional
         Gradient is calculated only along the given axis or axes
-        The default (axis = None) is to calculate the gradient for all the axes of the input array.
-        axis may be negative, in which case it counts from the last to the first axis.
+        The default (axis = None) is to calculate the gradient for all the axes 
+        of the input array. axis may be negative, in which case it counts from 
+        the last to the first axis.
 
         .. versionadded:: 1.11.0
 
@@ -1529,17 +1545,31 @@ def gradient(f, *varargs, **kwargs):
     -------
     gradient : ndarray or list of ndarray
         A set of ndarrays (or a single ndarray if there is only one dimension)
-        correposnding to the derivatives of f with respect to each dimension.
+        corresponding to the derivatives of f with respect to each dimension.
         Each derivative has the same shape as f.
-
+        
     Examples
     --------
-    >>> x = np.array([1, 2, 4, 7, 11, 16], dtype=np.float)
-    >>> np.gradient(x)
+    >>> f = np.array([1, 2, 4, 7, 11, 16], dtype=np.float)
+    >>> np.gradient(f)
     array([ 1. ,  1.5,  2.5,  3.5,  4.5,  5. ])
-    >>> np.gradient(x, 2)
+    >>> np.gradient(f, 2)
     array([ 0.5 ,  0.75,  1.25,  1.75,  2.25,  2.5 ])
+    
+    Spacing can be also specified with an array that represents the coordinates
+    of the values F along the dimensions.
+    For instance a uniform spacing:
+
+    >>> x = np.arange(f.size)
+    >>> np.gradient(f, x)
+    array([ 1. ,  1.5,  2.5,  3.5,  4.5,  5. ])
 
+    Or a non uniform one: 
+    
+    >>> x = np.array([0., 1., 1.5, 3.5, 4., 6.], dtype=np.float)
+    >>> np.gradient(f, x)
+    array([ 1. ,  3. ,  3.5,  6.7,  6.9,  2.5])
+    
     For two dimensional arrays, the return will be two arrays ordered by
     axis. In this example the first array stands for the gradient in
     rows and the second one in columns direction:
@@ -1549,15 +1579,99 @@ def gradient(f, *varargs, **kwargs):
             [ 2.,  2., -1.]]), array([[ 1. ,  2.5,  4. ],
             [ 1. ,  1. ,  1. ]])]
 
+    In this example the spacing is also specified: 
+    uniform for axis=0 and non uniform for axis=1
+
+    >>> dx = 2.
+    >>> y = [1., 1.5, 3.5]
+    >>> np.gradient(np.array([[1, 2, 6], [3, 4, 5]], dtype=np.float), dx, y)
+    [array([[ 1. ,  1. , -0.5],
+            [ 1. ,  1. , -0.5]]), array([[ 2. ,  2. ,  2. ],
+            [ 2. ,  1.7,  0.5]])]
+
+    It is possible to specify how boundaries are treated using `edge_order`
+    
     >>> x = np.array([0, 1, 2, 3, 4])
-    >>> y = x**2
-    >>> np.gradient(y, edge_order=2)
+    >>> f = x**2
+    >>> np.gradient(f, edge_order=1)
+    array([ 1.,  2.,  4.,  6.,  7.])
+    >>> np.gradient(f, edge_order=2)
     array([-0.,  2.,  4.,  6.,  8.])
 
-    The axis keyword can be used to specify a subset of axes of which the gradient is calculated
+    The `axis` keyword can be used to specify a subset of axes of which the 
+    gradient is calculated
+    
     >>> np.gradient(np.array([[1, 2, 6], [3, 4, 5]], dtype=np.float), axis=0)
     array([[ 2.,  2., -1.],
            [ 2.,  2., -1.]])
+
+    Notes
+    -----
+    Assuming that :math:`f\\in C^{3}` (i.e., :math:`f` has at least 3 continous 
+    derivatives) and let be :math:`h_{*}` a non homogeneous stepsize, the 
+    spacing the finite difference coefficients are computed by minimising 
+    the consistency error :math:`\\eta_{i}`:
+    
+    .. math:: 
+        
+        \\eta_{i} = f_{i}^{\\left(1\\right)} - 
+                    \\left[ \\alpha f\\left(x_{i}\\right) + 
+                            \\beta f\\left(x_{i} + h_{d}\\right) +
+                            \\gamma f\\left(x_{i}-h_{s}\\right)
+                    \\right]
+    
+    By substituting :math:`f(x_{i} + h_{d})` and :math:`f(x_{i} - h_{s})` 
+    with their Taylor series expansion, this translates into solving 
+    the following the linear system:
+
+    .. math:: 
+
+        \\left\\{ 
+            \\begin{array}{r}
+                \\alpha+\\beta+\\gamma=0 \\\\
+                -\\beta h_{d}+\\gamma h_{s}=1 \\\\
+                \\beta h_{d}^{2}+\\gamma h_{s}^{2}=0
+            \\end{array}
+        \\right.
+
+    The resulting approximation of :math:`f_{i}^{(1)}` is the following:
+
+    .. math:: 
+    
+        \\hat f_{i}^{(1)} = 
+            \\frac{ 
+                h_{s}^{2}f\\left(x_{i} + h_{d}\\right) 
+                + \\left(h_{d}^{2} - h_{s}^{2}\\right)f\\left(x_{i}\\right) 
+                - h_{d}^{2}f\\left(x_{i}-h_{s}\\right)}
+                { h_{s}h_{d}\\left(h_{d} + h_{s}\\right)}
+            + \mathcal{O}\\left(\\frac{h_{d}h_{s}^{2} 
+                                + h_{s}h_{d}^{2}}{h_{d} 
+                                + h_{s}}\\right)
+
+    It is worth noting that if :math:`h_{s}=h_{d}` 
+    (i.e., data are evenly spaced) 
+    we find the standard second order approximation:
+    
+    .. math:: 
+        
+        \\hat f_{i}^{(1)}=
+            \\frac{f\\left(x_{i+1}\\right) - f\\left(x_{i-1}\\right)}{2h} 
+            + \mathcal{O}\\left(h^{2}\\right)
+
+    With a similar procedure the forward/backward approximations used for 
+    boundaries can be derived.
+    
+    References
+    ----------
+    .. [1]  Quarteroni A., Sacco R., Saleri F. (2007) Numerical Mathematics 
+            (Texts in Applied Mathematics). New York: Springer.
+    .. [2]  Durran D. R. (1999) Numerical Methods for Wave Equations 
+            in Geophysical Fluid Dynamics. New York: Springer.
+    .. [3]  Fornberg B. (1988) Generation of Finite Difference Formulas on 
+            Arbitrarily Spaced Grids, 
+            Mathematics of Computation 51, no. 184 : 699-706. 
+            `PDF <http://www.ams.org/journals/mcom/1988-51-184/
+            S0025-5718-1988-0935077-0/S0025-5718-1988-0935077-0.pdf>`_.
     """
     f = np.asanyarray(f)
     N = len(f.shape)  # number of dimensions
@@ -1576,22 +1690,32 @@ def gradient(f, *varargs, **kwargs):
     if max(axes) >= N or min(axes) < 0:
         raise ValueError("'axis' entry is out of bounds")
 
-    if len(set(axes)) != len(axes):
+    len_axes = len(axes)
+    if len(set(axes)) != len_axes:
         raise ValueError("duplicate value in 'axis'")
 
     n = len(varargs)
     if n == 0:
-        dx = [1.0]*N
-    elif n == 1:
-        dx = [varargs[0]]*N
-    elif n == len(axes):
+        dx = [1.0] * len_axes
+    elif n == len_axes or (n == 1 and np.isscalar(varargs[0])):
         dx = list(varargs)
+        for i, distances in enumerate(dx):
+            if np.isscalar(distances):
+                continue
+            if len(distances) != f.shape[axes[i]]:
+                raise ValueError("distances must be either scalars or match "
+                                 "the length of the corresponding dimension")
+            diffx = np.diff(dx[i])
+            # if distances are constant reduce to the scalar case
+            # since it brings a consistent speedup 
+            if (diffx == diffx[0]).all(): 
+                diffx = diffx[0]
+            dx[i] = diffx
+        if len(dx) == 1:
+            dx *= len_axes
     else:
-        raise SyntaxError(
-            "invalid number of arguments")
-    if any([not np.isscalar(dxi) for dxi in dx]):
-        raise ValueError("distances must be scalars")
-
+        raise TypeError("invalid number of arguments")
+    
     edge_order = kwargs.pop('edge_order', 1)
     if kwargs:
         raise TypeError('"{}" are not valid keyword arguments.'.format(
@@ -1631,62 +1755,88 @@ def gradient(f, *varargs, **kwargs):
         y = f
 
     for i, axis in enumerate(axes):
-
-        if y.shape[axis] < 2:
+        if y.shape[axis] < edge_order + 1:
             raise ValueError(
                 "Shape of array too small to calculate a numerical gradient, "
-                "at least two elements are required.")
-
-        # Numerical differentiation: 1st order edges, 2nd order interior
-        if y.shape[axis] == 2 or edge_order == 1:
-            # Use first order differences for time data
-            out = np.empty_like(y, dtype=otype)
-
-            slice1[axis] = slice(1, -1)
-            slice2[axis] = slice(2, None)
-            slice3[axis] = slice(None, -2)
-            # 1D equivalent -- out[1:-1] = (y[2:] - y[:-2])/2.0
-            out[slice1] = (y[slice2] - y[slice3])/2.0
-
+                "at least (edge_order + 1) elements are required.")
+        # result allocation
+        out = np.empty_like(y, dtype=otype)
+
+        uniform_spacing = np.isscalar(dx[i]) 
+
+        # Numerical differentiation: 2nd order interior
+        slice1[axis] = slice(1, -1)
+        slice2[axis] = slice(None, -2)
+        slice3[axis] = slice(1, -1)
+        slice4[axis] = slice(2, None)
+        
+        if uniform_spacing:
+            out[slice1] = (f[slice4] - f[slice2]) / (2. * dx[i])
+        else:
+            dx1 = dx[i][0:-1]
+            dx2 = dx[i][1:]
+            a = -(dx2)/(dx1 * (dx1 + dx2))
+            b = (dx2 - dx1) / (dx1 * dx2)
+            c = dx1 / (dx2 * (dx1 + dx2))
+            # fix the shape for broadcasting
+            shape = np.ones(N, dtype=int)
+            shape[axis] = -1
+            a.shape = b.shape = c.shape = shape
+            # 1D equivalent -- out[1:-1] = a * f[:-2] + b * f[1:-1] + c * f[2:]
+            out[slice1] = a * f[slice2] + b * f[slice3] + c * f[slice4]
+        
+        # Numerical differentiation: 1st order edges
+        if edge_order == 1:
             slice1[axis] = 0
             slice2[axis] = 1
             slice3[axis] = 0
-            # 1D equivalent -- out[0] = (y[1] - y[0])
-            out[slice1] = (y[slice2] - y[slice3])
-
+            dx_0 = dx[i] if uniform_spacing else dx[i][0]
+            # 1D equivalent -- out[0] = (y[1] - y[0]) / (x[1] - x[0])
+            out[slice1] = (y[slice2] - y[slice3]) / dx_0
+            
             slice1[axis] = -1
             slice2[axis] = -1
             slice3[axis] = -2
-            # 1D equivalent -- out[-1] = (y[-1] - y[-2])
-            out[slice1] = (y[slice2] - y[slice3])
-
-        # Numerical differentiation: 2st order edges, 2nd order interior
+            dx_n = dx[i] if uniform_spacing else dx[i][-1]
+            # 1D equivalent -- out[-1] = (y[-1] - y[-2]) / (x[-1] - x[-2])
+            out[slice1] = (y[slice2] - y[slice3]) / dx_n
+            
+        # Numerical differentiation: 2nd order edges
         else:
-            # Use second order differences where possible
-            out = np.empty_like(y, dtype=otype)
-
-            slice1[axis] = slice(1, -1)
-            slice2[axis] = slice(2, None)
-            slice3[axis] = slice(None, -2)
-            # 1D equivalent -- out[1:-1] = (y[2:] - y[:-2])/2.0
-            out[slice1] = (y[slice2] - y[slice3])/2.0
-
             slice1[axis] = 0
             slice2[axis] = 0
             slice3[axis] = 1
             slice4[axis] = 2
-            # 1D equivalent -- out[0] = -(3*y[0] - 4*y[1] + y[2]) / 2.0
-            out[slice1] = -(3.0*y[slice2] - 4.0*y[slice3] + y[slice4])/2.0
-
+            if uniform_spacing:
+                a = -1.5 / dx[i]
+                b = 2. / dx[i]
+                c = -0.5 / dx[i]
+            else:
+                dx1 = dx[i][0]
+                dx2 = dx[i][1]
+                a = -(2. * dx1 + dx2)/(dx1 * (dx1 + dx2))
+                b = (dx1 + dx2) / (dx1 * dx2)
+                c = - dx1 / (dx2 * (dx1 + dx2))
+            # 1D equivalent -- out[0] = a * y[0] + b * y[1] + c * y[2]
+            out[slice1] = a * y[slice2] + b * y[slice3] + c * y[slice4]
+            
             slice1[axis] = -1
-            slice2[axis] = -1
+            slice2[axis] = -3
             slice3[axis] = -2
-            slice4[axis] = -3
-            # 1D equivalent -- out[-1] = (3*y[-1] - 4*y[-2] + y[-3])
-            out[slice1] = (3.0*y[slice2] - 4.0*y[slice3] + y[slice4])/2.0
-
-        # divide by step size
-        out /= dx[i]
+            slice4[axis] = -1
+            if uniform_spacing:
+                a = 0.5 / dx[i]
+                b = -2. / dx[i]
+                c = 1.5 / dx[i]
+            else:
+                dx1 = dx[i][-2]
+                dx2 = dx[i][-1]
+                a = (dx2) / (dx1 * (dx1 + dx2))
+                b = - (dx2 + dx1) / (dx1 * dx2)
+                c = (2. * dx2 + dx1) / (dx2 * (dx1 + dx2))
+            # 1D equivalent -- out[-1] = a * f[-3] + b * f[-2] + c * f[-1]
+            out[slice1] = a * y[slice2] + b * y[slice3] + c * y[slice4]        
+        
         outvals.append(out)
 
         # reset the slice object in this dimension to ":"
@@ -1695,7 +1845,7 @@ def gradient(f, *varargs, **kwargs):
         slice3[axis] = slice(None)
         slice4[axis] = slice(None)
 
-    if len(axes) == 1:
+    if len_axes == 1:
         return outvals[0]
     else:
         return outvals
@@ -2746,9 +2896,9 @@ def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None,
         contain observations.
     bias : bool, optional
         Default normalization (False) is by ``(N - 1)``, where ``N`` is the
-        number of observations given (unbiased estimate). If `bias` is True, then
-        normalization is by ``N``. These values can be overridden by using the
-        keyword ``ddof`` in numpy versions >= 1.5.
+        number of observations given (unbiased estimate). If `bias` is True, 
+        then normalization is by ``N``. These values can be overridden by using 
+        the keyword ``ddof`` in numpy versions >= 1.5.
     ddof : int, optional
         If not ``None`` the default value implied by `bias` is overridden.
         Note that ``ddof=1`` will return the unbiased estimate, even if both
@@ -2912,7 +3062,8 @@ def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None,
         fact = w_sum - ddof*sum(w*aweights)/w_sum
 
     if fact <= 0:
-        warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning, stacklevel=2)
+        warnings.warn("Degrees of freedom <= 0 for slice", 
+                      RuntimeWarning, stacklevel=2)
         fact = 0.0
 
     X -= avg[:, None]
@@ -4665,9 +4816,9 @@ def delete(arr, obj, axis=None):
     # After removing the special handling of booleans and out of
     # bounds values, the conversion to the array can be removed.
     if obj.dtype == bool:
-        warnings.warn(
-            "in the future insert will treat boolean arrays and array-likes "
-            "as boolean index instead of casting it to integer", FutureWarning, stacklevel=2)
+        warnings.warn("in the future insert will treat boolean arrays and " 
+                      "array-likes as boolean index instead of casting it "
+                      "to integer", FutureWarning, stacklevel=2)
         obj = obj.astype(intp)
     if isinstance(_obj, (int, long, integer)):
         # optimization for a single value
author	Alessandro Pietro Bardelli <apbard@users.noreply.github.com>	2017-01-04 00:58:44 +0100
committer	Alessandro Pietro Bardelli <apbard@users.noreply.github.com>	2017-02-22 10:50:53 +0100
commit	9520de90837d0afaac3d1612047f4b952563b3d5 (patch)
tree	623938c6d79bda2affa5e1794f8512155d2d4241 /numpy/lib/function_base.py
parent	f6a07571df745f01eaccf4b05b8476da6f0b5833 (diff)
download	numpy-9520de90837d0afaac3d1612047f4b952563b3d5.tar.gz