12 files changed, 731 insertions, 221 deletions
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 63b191b07..df5876715 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -13,6 +13,7 @@ __all__ = [
 import warnings
 import sys
 import collections
+import operator
 
 import numpy as np
 import numpy.core.numeric as _nx
@@ -256,22 +257,22 @@ def histogramdd(sample, bins=10, range=None, normed=False, weights=None):
 
     range : sequence, optional
         A sequence of lower and upper bin edges to be used if the edges are
-        not given explicitely in `bins`. Defaults to the minimum and maximum
+        not given explicitly in `bins`. Defaults to the minimum and maximum
         values along each dimension.
     normed : bool, optional
-        If False, returns the number of samples in each bin. If True, returns
-        the bin density, ie, the bin count divided by the bin hypervolume.
+        If False, returns the number of samples in each bin. If True,
+        returns the bin density ``bin_count / sample_count / bin_volume``.
     weights : array_like (N,), optional
         An array of values `w_i` weighing each sample `(x_i, y_i, z_i, ...)`.
-        Weights are normalized to 1 if normed is True. If normed is False, the
-        values of the returned histogram are equal to the sum of the weights
-        belonging to the samples falling into each bin.
+        Weights are normalized to 1 if normed is True. If normed is False,
+        the values of the returned histogram are equal to the sum of the
+        weights belonging to the samples falling into each bin.
 
     Returns
     -------
     H : ndarray
-        The multidimensional histogram of sample x. See normed and weights for
-        the different possible semantics.
+        The multidimensional histogram of sample x. See normed and weights
+        for the different possible semantics.
     edges : list
         A list of D arrays describing the bin edges for each dimension.
 
@@ -373,10 +374,10 @@ def histogramdd(sample, bins=10, range=None, normed=False, weights=None):
         if not np.isinf(mindiff):
             decimal = int(-log10(mindiff)) + 6
             # Find which points are on the rightmost edge.
-            on_edge = where(around(sample[:, i], decimal) ==
-                            around(edges[i][-1], decimal))[0]
+            not_smaller_than_edge = (sample[:, i] >= edges[i][-1])
+            on_edge = (around(sample[:, i], decimal) == around(edges[i][-1], decimal))
             # Shift these points one bin to the left.
-            Ncount[i][on_edge] -= 1
+            Ncount[i][where(on_edge & not_smaller_than_edge)[0]] -= 1
 
     # Flattened histogram matrix (1D)
     # Reshape is used so that overlarge arrays
@@ -770,29 +771,68 @@ def select(condlist, choicelist, default=0):
     array([ 0,  1,  2,  0,  0,  0, 36, 49, 64, 81])
 
     """
-    n = len(condlist)
-    n2 = len(choicelist)
-    if n2 != n:
+    # Check the size of condlist and choicelist are the same, or abort.
+    if len(condlist) != len(choicelist):
         raise ValueError(
-            "list of cases must be same length as list of conditions")
-    choicelist = [default] + choicelist
-    S = 0
-    pfac = 1
-    for k in range(1, n+1):
-        S += k * pfac * asarray(condlist[k-1])
-        if k < n:
-            pfac *= (1-asarray(condlist[k-1]))
-    # handle special case of a 1-element condition but
-    #  a multi-element choice
-    if type(S) in ScalarType or max(asarray(S).shape) == 1:
-        pfac = asarray(1)
-        for k in range(n2+1):
-            pfac = pfac + asarray(choicelist[k])
-        if type(S) in ScalarType:
-            S = S*ones(asarray(pfac).shape, type(S))
-        else:
-            S = S*ones(asarray(pfac).shape, S.dtype)
-    return choose(S, tuple(choicelist))
+            'list of cases must be same length as list of conditions')
+
+    # Now that the dtype is known, handle the deprecated select([], []) case
+    if len(condlist) == 0:
+        warnings.warn("select with an empty condition list is not possible"
+                      "and will be deprecated",
+                      DeprecationWarning)
+        return np.asarray(default)[()]
+
+    choicelist = [np.asarray(choice) for choice in choicelist]
+    choicelist.append(np.asarray(default))
+
+    # need to get the result type before broadcasting for correct scalar
+    # behaviour
+    dtype = np.result_type(*choicelist)
+
+    # Convert conditions to arrays and broadcast conditions and choices
+    # as the shape is needed for the result. Doing it seperatly optimizes
+    # for example when all choices are scalars.
+    condlist = np.broadcast_arrays(*condlist)
+    choicelist = np.broadcast_arrays(*choicelist)
+
+    # If cond array is not an ndarray in boolean format or scalar bool, abort.
+    deprecated_ints = False
+    for i in range(len(condlist)):
+        cond = condlist[i]
+        if cond.dtype.type is not np.bool_:
+            if np.issubdtype(cond.dtype, np.integer):
+                # A previous implementation accepted int ndarrays accidentally.
+                # Supported here deliberately, but deprecated.
+                condlist[i] = condlist[i].astype(bool)
+                deprecated_ints = True
+            else:
+                raise ValueError(
+                    'invalid entry in choicelist: should be boolean ndarray')
+
+    if deprecated_ints:
+        msg = "select condlists containing integer ndarrays is deprecated " \
+            "and will be removed in the future. Use `.astype(bool)` to " \
+            "convert to bools."
+        warnings.warn(msg, DeprecationWarning)
+
+    if choicelist[0].ndim == 0:
+        # This may be common, so avoid the call.
+        result_shape = condlist[0].shape
+    else:
+        result_shape = np.broadcast_arrays(condlist[0], choicelist[0])[0].shape
+
+    result = np.full(result_shape, choicelist[-1], dtype)
+
+    # Use np.copyto to burn each choicelist array onto result, using the
+    # corresponding condlist as a boolean mask. This is done in reverse
+    # order since the first choice should take precedence.
+    choicelist = choicelist[-2::-1]
+    condlist = condlist[::-1]
+    for choice, cond in zip(choicelist, condlist):
+        np.copyto(result, choice, where=cond)
+
+    return result
 
 
 def copy(a, order='K'):
@@ -1101,7 +1141,7 @@ def interp(x, xp, fp, left=None, right=None):
     -----
     Does not check that the x-coordinate sequence `xp` is increasing.
     If `xp` is not increasing, the results are nonsense.
-    A simple check for increasingness is::
+    A simple check for increasing is::
 
         np.all(np.diff(xp) > 0)
 
@@ -1578,20 +1618,22 @@ class vectorize(object):
     The `vectorize` function is provided primarily for convenience, not for
     performance. The implementation is essentially a for loop.
 
-    If `otypes` is not specified, then a call to the function with the first
-    argument will be used to determine the number of outputs.  The results of
-    this call will be cached if `cache` is `True` to prevent calling the
-    function twice.  However, to implement the cache, the original function
-    must be wrapped which will slow down subsequent calls, so only do this if
-    your function is expensive.
+    If `otypes` is not specified, then a call to the function with the
+    first argument will be used to determine the number of outputs.  The
+    results of this call will be cached if `cache` is `True` to prevent
+    calling the function twice.  However, to implement the cache, the
+    original function must be wrapped which will slow down subsequent
+    calls, so only do this if your function is expensive.
+
+    The new keyword argument interface and `excluded` argument support
+    further degrades performance.
 
-    The new keyword argument interface and `excluded` argument support further
-    degrades performance.
     """
     def __init__(self, pyfunc, otypes='', doc=None, excluded=None,
                  cache=False):
         self.pyfunc = pyfunc
         self.cache = cache
+        self._ufunc = None    # Caching to improve default performance
 
         if doc is None:
             self.__doc__ = pyfunc.__doc__
@@ -1615,9 +1657,6 @@ class vectorize(object):
             excluded = set()
         self.excluded = set(excluded)
 
-        if self.otypes and not self.excluded:
-            self._ufunc = None      # Caching to improve default performance
-
     def __call__(self, *args, **kwargs):
         """
         Return arrays with the results of `pyfunc` broadcast (vectorized) over
@@ -1651,7 +1690,8 @@ class vectorize(object):
     def _get_ufunc_and_otypes(self, func, args):
         """Return (ufunc, otypes)."""
         # frompyfunc will fail if args is empty
-        assert args
+        if not args:
+            raise ValueError('args can not be empty')
 
         if self.otypes:
             otypes = self.otypes
@@ -1810,9 +1850,11 @@ def cov(m, y=None, rowvar=1, bias=0, ddof=None):
             "ddof must be integer")
 
     # Handles complex arrays too
+    m = np.asarray(m)
     if y is None:
         dtype = np.result_type(m, np.float64)
     else:
+        y = np.asarray(y)
         dtype = np.result_type(m, y, np.float64)
     X = array(m, ndmin=2, dtype=dtype)
 
@@ -1909,7 +1951,7 @@ def blackman(M):
     """
     Return the Blackman window.
 
-    The Blackman window is a taper formed by using the the first three
+    The Blackman window is a taper formed by using the first three
     terms of a summation of cosines. It was designed to have close to the
     minimal leakage possible.  It is close to optimal, only slightly worse
     than a Kaiser window.
@@ -2139,9 +2181,10 @@ def hanning(M):
     .. math::  w(n) = 0.5 - 0.5cos\\left(\\frac{2\\pi{n}}{M-1}\\right)
                \\qquad 0 \\leq n \\leq M-1
 
-    The Hanning was named for Julius van Hann, an Austrian meterologist. It is
-    also known as the Cosine Bell. Some authors prefer that it be called a
-    Hann window, to help avoid confusion with the very similar Hamming window.
+    The Hanning was named for Julius van Hann, an Austrian meteorologist.
+    It is also known as the Cosine Bell. Some authors prefer that it be
+    called a Hann window, to help avoid confusion with the very similar
+    Hamming window.
 
     Most references to the Hanning window come from the signal processing
     literature, where it is used as one of many windowing functions for
@@ -2238,9 +2281,9 @@ def hamming(M):
     .. math::  w(n) = 0.54 - 0.46cos\\left(\\frac{2\\pi{n}}{M-1}\\right)
                \\qquad 0 \\leq n \\leq M-1
 
-    The Hamming was named for R. W. Hamming, an associate of J. W. Tukey and
-    is described in Blackman and Tukey. It was recommended for smoothing the
-    truncated autocovariance function in the time domain.
+    The Hamming was named for R. W. Hamming, an associate of J. W. Tukey
+    and is described in Blackman and Tukey. It was recommended for
+    smoothing the truncated autocovariance function in the time domain.
     Most references to the Hamming window come from the signal processing
     literature, where it is used as one of many windowing functions for
     smoothing values.  It is also known as an apodization (which means
@@ -2420,11 +2463,11 @@ def i0(x):
     Notes
     -----
     We use the algorithm published by Clenshaw [1]_ and referenced by
-    Abramowitz and Stegun [2]_, for which the function domain is partitioned
-    into the two intervals [0,8] and (8,inf), and Chebyshev polynomial
-    expansions are employed in each interval. Relative error on the domain
-    [0,30] using IEEE arithmetic is documented [3]_ as having a peak of 5.8e-16
-    with an rms of 1.4e-16 (n = 30000).
+    Abramowitz and Stegun [2]_, for which the function domain is
+    partitioned into the two intervals [0,8] and (8,inf), and Chebyshev
+    polynomial expansions are employed in each interval. Relative error on
+    the domain [0,30] using IEEE arithmetic is documented [3]_ as having a
+    peak of 5.8e-16 with an rms of 1.4e-16 (n = 30000).
 
     References
     ----------
@@ -2494,12 +2537,11 @@ def kaiser(M, beta):
 
     where :math:`I_0` is the modified zeroth-order Bessel function.
 
-    The Kaiser was named for Jim Kaiser, who discovered a simple approximation
-    to the DPSS window based on Bessel functions.
-    The Kaiser window is a very good approximation to the Digital Prolate
-    Spheroidal Sequence, or Slepian window, which is the transform which
-    maximizes the energy in the main lobe of the window relative to total
-    energy.
+    The Kaiser was named for Jim Kaiser, who discovered a simple
+    approximation to the DPSS window based on Bessel functions.  The Kaiser
+    window is a very good approximation to the Digital Prolate Spheroidal
+    Sequence, or Slepian window, which is the transform which maximizes the
+    energy in the main lobe of the window relative to total energy.
 
     The Kaiser can approximate many other windows by varying the beta
     parameter.
@@ -2609,8 +2651,8 @@ def sinc(x):
     The name sinc is short for "sine cardinal" or "sinus cardinalis".
 
     The sinc function is used in various signal processing applications,
-    including in anti-aliasing, in the construction of a
-    Lanczos resampling filter, and in interpolation.
+    including in anti-aliasing, in the construction of a Lanczos resampling
+    filter, and in interpolation.
 
     For bandlimited interpolation of discrete-time signals, the ideal
     interpolation kernel is proportional to the sinc function.
@@ -2692,7 +2734,67 @@ def msort(a):
     return b
 
 
-def median(a, axis=None, out=None, overwrite_input=False):
+def _ureduce(a, func, **kwargs):
+    """
+    Internal Function.
+    Call `func` with `a` as first argument swapping the axes to use extended
+    axis on functions that don't support it natively.
+
+    Returns result and a.shape with axis dims set to 1.
+
+    Parameters
+    ----------
+    a : array_like
+        Input array or object that can be converted to an array.
+    func : callable
+        Reduction function Kapable of receiving an axis argument.
+        It is is called with `a` as first argument followed by `kwargs`.
+     kwargs : keyword arguments
+        additional keyword arguments to pass to `func`.
+
+    Returns
+    -------
+    result : tuple
+        Result of func(a, **kwargs) and a.shape with axis dims set to 1
+        which can be used to reshape the result to the same shape a ufunc with
+        keepdims=True would produce.
+
+    """
+    a = np.asanyarray(a)
+    axis = kwargs.get('axis', None)
+    if axis is not None:
+        keepdim = list(a.shape)
+        nd = a.ndim
+        try:
+            axis = operator.index(axis)
+            if axis >= nd or axis < -nd:
+                raise IndexError("axis %d out of bounds (%d)" % (axis, a.ndim))
+            keepdim[axis] = 1
+        except TypeError:
+            sax = set()
+            for x in axis:
+                if x >= nd or x < -nd:
+                    raise IndexError("axis %d out of bounds (%d)" % (x, nd))
+                if x in sax:
+                    raise ValueError("duplicate value in axis")
+                sax.add(x % nd)
+                keepdim[x] = 1
+            keep = sax.symmetric_difference(frozenset(range(nd)))
+            nkeep = len(keep)
+            # swap axis that should not be reduced to front
+            for i, s in enumerate(sorted(keep)):
+                a = a.swapaxes(i, s)
+            # merge reduced axis
+            a = a.reshape(a.shape[:nkeep] + (-1,))
+            kwargs['axis'] = -1
+    else:
+        keepdim = [1] * a.ndim
+
+    r = func(a, **kwargs)
+    return r, keepdim
+
+
+def median(a, axis=None, out=None, overwrite_input=False, keepdims=False):
     """
     Compute the median along the specified axis.
 
@@ -2702,27 +2804,35 @@ def median(a, axis=None, out=None, overwrite_input=False):
     ----------
     a : array_like
         Input array or object that can be converted to an array.
-    axis : int, optional
+    axis : int or sequence of int, optional
         Axis along which the medians are computed. The default (axis=None)
         is to compute the median along a flattened version of the array.
+        A sequence of axes is supported since version 1.9.0.
     out : ndarray, optional
-        Alternative output array in which to place the result. It must
-        have the same shape and buffer length as the expected output,
-        but the type (of the output) will be cast if necessary.
+        Alternative output array in which to place the result. It must have
+        the same shape and buffer length as the expected output, but the
+        type (of the output) will be cast if necessary.
     overwrite_input : bool, optional
        If True, then allow use of memory of input array (a) for
        calculations. The input array will be modified by the call to
-       median. This will save memory when you do not need to preserve
-       the contents of the input array. Treat the input as undefined,
-       but it will probably be fully or partially sorted. Default is
-       False. Note that, if `overwrite_input` is True and the input
-       is not already an ndarray, an error will be raised.
+       median. This will save memory when you do not need to preserve the
+       contents of the input array. Treat the input as undefined, but it
+       will probably be fully or partially sorted. Default is False. Note
+       that, if `overwrite_input` is True and the input is not already an
+       ndarray, an error will be raised.
+    keepdims : bool, optional
+        If this is set to True, the axes which are reduced are left
+        in the result as dimensions with size one. With this option,
+        the result will broadcast correctly against the original `arr`.
+
+        .. versionadded:: 1.9.0
+
 
     Returns
     -------
     median : ndarray
-        A new array holding the result (unless `out` is specified, in
-        which case that array is returned instead).  If the input contains
+        A new array holding the result (unless `out` is specified, in which
+        case that array is returned instead).  If the input contains
         integers, or floats of smaller precision than 64, then the output
         data-type is float64.  Otherwise, the output data-type is the same
         as that of the input.
@@ -2766,6 +2876,16 @@ def median(a, axis=None, out=None, overwrite_input=False):
     >>> assert not np.all(a==b)
 
     """
+    r, k = _ureduce(a, func=_median, axis=axis, out=out,
+                    overwrite_input=overwrite_input)
+    if keepdims:
+        return r.reshape(k)
+    else:
+        return r
+
+def _median(a, axis=None, out=None, overwrite_input=False):
+    # can't be reasonably be implemented in terms of percentile as we have to
+    # call mean to not break astropy
     a = np.asanyarray(a)
     if axis is not None and axis >= a.ndim:
         raise IndexError(
@@ -2815,7 +2935,7 @@ def median(a, axis=None, out=None, overwrite_input=False):
 
 
 def percentile(a, q, axis=None, out=None,
-               overwrite_input=False, interpolation='linear'):
+               overwrite_input=False, interpolation='linear', keepdims=False):
     """
     Compute the qth percentile of the data along the specified axis.
 
@@ -2827,9 +2947,10 @@ def percentile(a, q, axis=None, out=None,
         Input array or object that can be converted to an array.
     q : float in range of [0,100] (or sequence of floats)
         Percentile to compute which must be between 0 and 100 inclusive.
-    axis : int, optional
+    axis : int or sequence of int, optional
         Axis along which the percentiles are computed. The default (None)
         is to compute the percentiles along a flattened version of the array.
+        A sequence of axes is supported since version 1.9.0.
     out : ndarray, optional
         Alternative output array in which to place the result. It must
         have the same shape and buffer length as the expected output,
@@ -2855,17 +2976,23 @@ def percentile(a, q, axis=None, out=None,
             * midpoint: (`i` + `j`) / 2.
 
         .. versionadded:: 1.9.0
+    keepdims : bool, optional
+        If this is set to True, the axes which are reduced are left
+        in the result as dimensions with size one. With this option,
+        the result will broadcast correctly against the original `arr`.
+
+        .. versionadded:: 1.9.0
 
     Returns
     -------
     percentile : scalar or ndarray
-        If a single percentile `q` is given and axis=None a scalar is returned.
-        If multiple percentiles `q` are given an array holding the result is
-        returned. The results are listed in the first axis.
-        (If `out` is specified, in which case that array is returned instead).
-        If the input contains integers, or floats of smaller precision than 64,
-        then the output data-type is float64. Otherwise, the output data-type
-        is the same as that of the input.
+        If a single percentile `q` is given and axis=None a scalar is
+        returned.  If multiple percentiles `q` are given an array holding
+        the result is returned. The results are listed in the first axis.
+        (If `out` is specified, in which case that array is returned
+        instead).  If the input contains integers, or floats of smaller
+        precision than 64, then the output data-type is float64. Otherwise,
+        the output data-type is the same as that of the input.
 
     See Also
     --------
@@ -2873,12 +3000,12 @@ def percentile(a, q, axis=None, out=None,
 
     Notes
     -----
-    Given a vector V of length N, the qth percentile of V is the qth ranked
-    value in a sorted copy of V.  The values and distances of the two nearest
-    neighbors as well as the `interpolation` parameter will determine the
-    percentile if the normalized ranking does not match q exactly. This
-    function is the same as the median if ``q=50``, the same as the minimum
-    if ``q=0``and the same as the maximum if ``q=100``.
+    Given a vector V of length N, the q-th percentile of V is the q-th ranked
+    value in a sorted copy of V.  The values and distances of the two
+    nearest neighbors as well as the `interpolation` parameter will
+    determine the percentile if the normalized ranking does not match q
+    exactly. This function is the same as the median if ``q=50``, the same
+    as the minimum if ``q=0``and the same as the maximum if ``q=100``.
 
     Examples
     --------
@@ -2911,8 +3038,22 @@ def percentile(a, q, axis=None, out=None,
     array([ 3.5])
 
     """
+    q = asarray(q, dtype=np.float64)
+    r, k = _ureduce(a, func=_percentile, q=q, axis=axis, out=out,
+                    overwrite_input=overwrite_input,
+                    interpolation=interpolation)
+    if keepdims:
+        if q.ndim == 0:
+            return r.reshape(k)
+        else:
+            return r.reshape([len(q)] + k)
+    else:
+        return r
+
+
+def _percentile(a, q, axis=None, out=None,
+                overwrite_input=False, interpolation='linear', keepdims=False):
     a = asarray(a)
-    q = asarray(q)
     if q.ndim == 0:
         # Do not allow 0-d arrays because following code fails for scalar
         zerod = True
@@ -2920,10 +3061,17 @@ def percentile(a, q, axis=None, out=None,
     else:
         zerod = False
 
-    q = q / 100.0
-    if (q < 0).any() or (q > 1).any():
-        raise ValueError(
-            "Percentiles must be in the range [0,100]")
+    # avoid expensive reductions, relevant for arrays with < O(1000) elements
+    if q.size < 10:
+        for i in range(q.size):
+            if q[i] < 0. or q[i] > 100.:
+                raise ValueError("Percentiles must be in the range [0,100]")
+            q[i] /= 100.
+    else:
+        # faster than any()
+        if np.count_nonzero(q < 0.) or np.count_nonzero(q > 100.):
+            raise ValueError("Percentiles must be in the range [0,100]")
+        q /= 100.
 
     # prepare a for partioning
     if overwrite_input:
@@ -3029,10 +3177,11 @@ def trapz(y, x=None, dx=1.0, axis=-1):
 
     Notes
     -----
-    Image [2]_ illustrates trapezoidal rule -- y-axis locations of points will
-    be taken from `y` array, by default x-axis distances between points will be
-    1.0, alternatively they can be provided with `x` array or with `dx` scalar.
-    Return value will be equal to combined area under the red lines.
+    Image [2]_ illustrates trapezoidal rule -- y-axis locations of points
+    will be taken from `y` array, by default x-axis distances between
+    points will be 1.0, alternatively they can be provided with `x` array
+    or with `dx` scalar.  Return value will be equal to combined area under
+    the red lines.
 
 
     References
@@ -3130,7 +3279,7 @@ def meshgrid(*xi, **kwargs):
     Make N-D coordinate arrays for vectorized evaluations of
     N-D scalar/vector fields over N-D grids, given
     one-dimensional coordinate arrays x1, x2,..., xn.
-    
+
     .. versionchanged:: 1.9
        1-D and 0-D cases are allowed.
 
@@ -3145,12 +3294,12 @@ def meshgrid(*xi, **kwargs):
          If True a sparse grid is returned in order to conserve memory.
          Default is False.
     copy : bool, optional
-        If False, a view into the original arrays are returned in
-        order to conserve memory.  Default is True.  Please note that
-        ``sparse=False, copy=False`` will likely return non-contiguous arrays.
-        Furthermore, more than one element of a broadcast array may refer to
-        a single memory location.  If you need to write to the arrays, make
-        copies first.
+        If False, a view into the original arrays are returned in order to
+        conserve memory.  Default is True.  Please note that
+        ``sparse=False, copy=False`` will likely return non-contiguous
+        arrays.  Furthermore, more than one element of a broadcast array
+        may refer to a single memory location.  If you need to write to the
+        arrays, make copies first.
 
     Returns
     -------
@@ -3164,13 +3313,13 @@ def meshgrid(*xi, **kwargs):
     Notes
     -----
     This function supports both indexing conventions through the indexing
-    keyword argument.  Giving the string 'ij' returns a meshgrid with matrix
-    indexing, while 'xy' returns a meshgrid with Cartesian indexing.  In the
-    2-D case with inputs of length M and N, the outputs are of shape (N, M) for
-    'xy' indexing and (M, N) for 'ij' indexing.  In the 3-D case with inputs of
-    length M, N and P, outputs are of shape (N, M, P) for 'xy' indexing and
-    (M, N, P) for 'ij' indexing.  The difference is illustrated by the
-    following code snippet::
+    keyword argument.  Giving the string 'ij' returns a meshgrid with
+    matrix indexing, while 'xy' returns a meshgrid with Cartesian indexing.
+    In the 2-D case with inputs of length M and N, the outputs are of shape
+    (N, M) for 'xy' indexing and (M, N) for 'ij' indexing.  In the 3-D case
+    with inputs of length M, N and P, outputs are of shape (N, M, P) for
+    'xy' indexing and (M, N, P) for 'ij' indexing.  The difference is
+    illustrated by the following code snippet::
 
         xv, yv = meshgrid(x, y, sparse=False, indexing='ij')
         for i in range(nx):
@@ -3181,7 +3330,7 @@ def meshgrid(*xi, **kwargs):
         for i in range(nx):
             for j in range(ny):
                 # treat xv[j,i], yv[j,i]
-    
+
     In the 1-D and 0-D case, the indexing and sparse keywords have no effect.
 
     See Also
@@ -3258,7 +3407,8 @@ def meshgrid(*xi, **kwargs):
 def delete(arr, obj, axis=None):
     """
     Return a new array with sub-arrays along an axis deleted. For a one
-    dimensional array, this returns those entries not returned by `arr[obj]`.
+    dimensional array, this returns those entries not returned by
+    `arr[obj]`.
 
     Parameters
     ----------
@@ -3285,9 +3435,11 @@ def delete(arr, obj, axis=None):
     Notes
     -----
     Often it is preferable to use a boolean mask. For example:
+    
     >>> mask = np.ones(len(arr), dtype=bool)
     >>> mask[[0,2,4]] = False
     >>> result = arr[mask,...]
+
     Is equivalent to `np.delete(arr, [0,2,4], axis=0)`, but allows further
     use of `mask`.
 
@@ -3465,7 +3617,8 @@ def insert(arr, obj, values, axis=None):
         .. versionadded:: 1.8.0
 
         Support for multiple insertions when `obj` is a single scalar or a
-        sequence with one element (similar to calling insert multiple times).
+        sequence with one element (similar to calling insert multiple
+        times).
     values : array_like
         Values to insert into `arr`. If the type of `values` is different
         from that of `arr`, `values` is converted to the type of `arr`.
@@ -3664,19 +3817,19 @@ def append(arr, values, axis=None):
         Values are appended to a copy of this array.
     values : array_like
         These values are appended to a copy of `arr`.  It must be of the
-        correct shape (the same shape as `arr`, excluding `axis`).  If `axis`
-        is not specified, `values` can be any shape and will be flattened
-        before use.
+        correct shape (the same shape as `arr`, excluding `axis`).  If
+        `axis` is not specified, `values` can be any shape and will be
+        flattened before use.
     axis : int, optional
-        The axis along which `values` are appended.  If `axis` is not given,
-        both `arr` and `values` are flattened before use.
+        The axis along which `values` are appended.  If `axis` is not
+        given, both `arr` and `values` are flattened before use.
 
     Returns
     -------
     append : ndarray
-        A copy of `arr` with `values` appended to `axis`.  Note that `append`
-        does not occur in-place: a new array is allocated and filled.  If
-        `axis` is None, `out` is a flattened array.
+        A copy of `arr` with `values` appended to `axis`.  Note that
+        `append` does not occur in-place: a new array is allocated and
+        filled.  If `axis` is None, `out` is a flattened array.
 
     See Also
     --------
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py
index d47a9ffd2..f69ca0c73 100644
--- a/numpy/lib/npyio.py
+++ b/numpy/lib/npyio.py
@@ -285,21 +285,23 @@ def load(file, mmap_mode=None):
     Parameters
     ----------
     file : file-like object or string
-        The file to read.  It must support ``seek()`` and ``read()`` methods.
-        If the filename extension is ``.gz``, the file is first decompressed.
+        The file to read. Compressed files with the filename extension
+        ``.gz`` are acceptable. File-like objects must support the
+        ``seek()`` and ``read()`` methods. Pickled files require that the
+        file-like object support the ``readline()`` method as well.
     mmap_mode : {None, 'r+', 'r', 'w+', 'c'}, optional
-        If not None, then memory-map the file, using the given mode
-        (see `numpy.memmap` for a detailed description of the modes).
-        A memory-mapped array is kept on disk. However, it can be accessed
-        and sliced like any ndarray.  Memory mapping is especially useful for
-        accessing small fragments of large files without reading the entire
-        file into memory.
+        If not None, then memory-map the file, using the given mode (see
+        `numpy.memmap` for a detailed description of the modes).  A
+        memory-mapped array is kept on disk. However, it can be accessed
+        and sliced like any ndarray.  Memory mapping is especially useful
+        for accessing small fragments of large files without reading the
+        entire file into memory.
 
     Returns
     -------
     result : array, tuple, dict, etc.
-        Data stored in the file. For ``.npz`` files, the returned instance of
-        NpzFile class must be closed to avoid leaking file descriptors.
+        Data stored in the file. For ``.npz`` files, the returned instance
+        of NpzFile class must be closed to avoid leaking file descriptors.
 
     Raises
     ------
@@ -308,7 +310,7 @@ def load(file, mmap_mode=None):
 
     See Also
     --------
-    save, savez, loadtxt
+    save, savez, savez_compressed, loadtxt
     memmap : Create a memory-map to an array stored in a file on disk.
 
     Notes
@@ -319,13 +321,14 @@ def load(file, mmap_mode=None):
     - If the file is a ``.npz`` file, then a dictionary-like object is
       returned, containing ``{filename: array}`` key-value pairs, one for
       each file in the archive.
-    - If the file is a ``.npz`` file, the returned value supports the context
-      manager protocol in a similar fashion to the open function::
+    - If the file is a ``.npz`` file, the returned value supports the
+      context manager protocol in a similar fashion to the open function::
 
         with load('foo.npz') as data:
             a = data['a']
 
-      The underlying file descriptor is closed when exiting the 'with' block.
+      The underlying file descriptor is closed when exiting the 'with'
+      block.
 
     Examples
     --------
@@ -549,6 +552,7 @@ def savez_compressed(file, *args, **kwds):
     See Also
     --------
     numpy.savez : Save several arrays into an uncompressed ``.npz`` file format
+    numpy.load : Load the files created by savez_compressed.
 
     """
     _savez(file, args, kwds, True)
@@ -1195,14 +1199,20 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
         The string used to separate values.  By default, any consecutive
         whitespaces act as delimiter.  An integer or sequence of integers
         can also be provided as width(s) of each field.
+    skip_rows : int, optional
+        `skip_rows` was deprecated in numpy 1.5, and will be removed in
+        numpy 2.0. Please use `skip_header` instead.
     skip_header : int, optional
-        The numbers of lines to skip at the beginning of the file.
+        The number of lines to skip at the beginning of the file.
     skip_footer : int, optional
-        The numbers of lines to skip at the end of the file
+        The number of lines to skip at the end of the file.
     converters : variable, optional
         The set of functions that convert the data of a column to a value.
         The converters can also be used to provide a default value
         for missing data: ``converters = {3: lambda s: float(s or 0)}``.
+    missing : variable, optional
+        `missing` was deprecated in numpy 1.5, and will be removed in
+        numpy 2.0. Please use `missing_values` instead.
     missing_values : variable, optional
         The set of strings corresponding to missing data.
     filling_values : variable, optional
@@ -1240,6 +1250,8 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
     usemask : bool, optional
         If True, return a masked array.
         If False, return a regular array.
+    loose : bool, optional
+        If True, do not raise errors for invalid values.
     invalid_raise : bool, optional
         If True, an exception is raised if an inconsistency is detected in the
         number of columns.
diff --git a/numpy/lib/polynomial.py b/numpy/lib/polynomial.py
index 8545375cd..0dcb85bb2 100644
--- a/numpy/lib/polynomial.py
+++ b/numpy/lib/polynomial.py
@@ -127,7 +127,7 @@ def poly(seq_of_zeros):
     elif len(sh) == 1:
         pass
     else:
-        raise ValueError("input must be 1d or square 2d array.")
+        raise ValueError("input must be 1d or non-empty square 2d array.")
 
     if len(seq_of_zeros) == 0:
         return 1.0
@@ -806,6 +806,8 @@ def polymul(a1, a2):
     poly1d : A one-dimensional polynomial class.
     poly, polyadd, polyder, polydiv, polyfit, polyint, polysub,
     polyval
+    convolve : Array convolution. Same output as polymul, but has parameter
+               for overlap mode.
 
     Examples
     --------
diff --git a/numpy/lib/shape_base.py b/numpy/lib/shape_base.py
index c299c1976..38b928d57 100644
--- a/numpy/lib/shape_base.py
+++ b/numpy/lib/shape_base.py
@@ -153,6 +153,12 @@ def apply_over_axes(func, a, axes):
     apply_along_axis :
         Apply a function to 1-D slices of an array along the given axis.
 
+    Notes
+    ------
+    This function is equivalent to tuple axis arguments to reorderable ufuncs
+    with keepdims=True. Tuple axis arguments to ufuncs have been availabe since
+    version 1.7.0.
+
     Examples
     --------
     >>> a = np.arange(24).reshape(2,3,4)
@@ -172,6 +178,13 @@ def apply_over_axes(func, a, axes):
             [ 92],
             [124]]])
 
+    Tuple axis arguments to ufuncs are equivalent:
+
+    >>> np.sum(a, axis=(0,2), keepdims=True)
+    array([[[ 60],
+            [ 92],
+            [124]]])
+
     """
     val = asarray(a)
     N = a.ndim
diff --git a/numpy/lib/src/_compiled_base.c b/numpy/lib/src/_compiled_base.c
index 652268f24..a461613e3 100644
--- a/numpy/lib/src/_compiled_base.c
+++ b/numpy/lib/src/_compiled_base.c
@@ -161,14 +161,22 @@ arr_bincount(PyObject *NPY_UNUSED(self), PyObject *args, PyObject *kwds)
     len = PyArray_SIZE(lst);
     type = PyArray_DescrFromType(NPY_INTP);
 
-    /* handle empty list */
-    if (len < 1) {
-        if (mlength == Py_None) {
-            minlength = 0;
-        }
-        else if (!(minlength = PyArray_PyIntAsIntp(mlength))) {
+    if (mlength == Py_None) {
+        minlength = 0;
+    }
+    else {
+        minlength = PyArray_PyIntAsIntp(mlength);
+        if (minlength <= 0) {
+            if (!PyErr_Occurred()) {
+                PyErr_SetString(PyExc_ValueError,
+                                "minlength must be positive");
+            }
             goto fail;
         }
+    }
+
+    /* handle empty list */
+    if (len == 0) {
         if (!(ans = (PyArrayObject *)PyArray_Zeros(1, &minlength, type, 0))){
             goto fail;
         }
@@ -185,15 +193,6 @@ arr_bincount(PyObject *NPY_UNUSED(self), PyObject *args, PyObject *kwds)
     }
     ans_size = mx + 1;
     if (mlength != Py_None) {
-        if (!(minlength = PyArray_PyIntAsIntp(mlength))) {
-            goto fail;
-        }
-        if (minlength <= 0) {
-            /* superfluous, but may catch incorrect usage */
-            PyErr_SetString(PyExc_ValueError,
-                    "minlength must be positive");
-            goto fail;
-        }
         if (ans_size < minlength) {
             ans_size = minlength;
         }
@@ -1417,6 +1416,9 @@ _packbits( void *In,
     npy_intp out_Nm1;
     int maxi, remain, nonzero, j;
     char *outptr,*inptr;
+    NPY_BEGIN_THREADS_DEF;
+
+    NPY_BEGIN_THREADS_THRESHOLDED(out_N);
 
     outptr = Out;    /* pointer to output buffer */
     inptr  = In;     /* pointer to input buffer */
@@ -1451,6 +1453,8 @@ _packbits( void *In,
         *outptr = build;
         outptr += out_stride;
     }
+
+    NPY_END_THREADS;
     return;
 }
 
@@ -1468,6 +1472,9 @@ _unpackbits(void *In,
     unsigned char mask;
     int i, index;
     char *inptr, *outptr;
+    NPY_BEGIN_THREADS_DEF;
+
+    NPY_BEGIN_THREADS_THRESHOLDED(in_N);
 
     outptr = Out;
     inptr  = In;
@@ -1480,6 +1487,8 @@ _unpackbits(void *In,
         }
         inptr += in_stride;
     }
+
+    NPY_END_THREADS;
     return;
 }
 
diff --git a/numpy/lib/stride_tricks.py b/numpy/lib/stride_tricks.py
index d092f92a8..1ffaaee36 100644
--- a/numpy/lib/stride_tricks.py
+++ b/numpy/lib/stride_tricks.py
@@ -29,7 +29,8 @@ def as_strided(x, shape=None, strides=None):
         interface['strides'] = tuple(strides)
     array = np.asarray(DummyArray(interface, base=x))
     # Make sure dtype is correct in case of custom dtype
-    array.dtype = x.dtype
+    if array.dtype.kind == 'V':
+        array.dtype = x.dtype
     return array
 
 def broadcast_arrays(*args):
diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py
index 953292550..59db23a83 100644
--- a/numpy/lib/tests/test_function_base.py
+++ b/numpy/lib/tests/test_function_base.py
@@ -6,7 +6,7 @@ import numpy as np
 from numpy.testing import (
     run_module_suite, TestCase, assert_, assert_equal, assert_array_equal,
     assert_almost_equal, assert_array_almost_equal, assert_raises,
-    assert_allclose, assert_array_max_ulp, assert_warns
+    assert_allclose, assert_array_max_ulp, assert_warns, assert_raises_regex
     )
 from numpy.random import rand
 from numpy.lib import *
@@ -150,6 +150,13 @@ class TestAverage(TestCase):
 
 
 class TestSelect(TestCase):
+    choices = [np.array([1, 2, 3]),
+               np.array([4, 5, 6]),
+               np.array([7, 8, 9])]
+    conditions = [np.array([False, False, False]),
+                  np.array([False, True, False]),
+                  np.array([False, False, True])]
+
     def _select(self, cond, values, default=0):
         output = []
         for m in range(len(cond)):
@@ -157,18 +164,62 @@ class TestSelect(TestCase):
         return output
 
     def test_basic(self):
-        choices = [np.array([1, 2, 3]),
-                   np.array([4, 5, 6]),
-                   np.array([7, 8, 9])]
-        conditions = [np.array([0, 0, 0]),
-                      np.array([0, 1, 0]),
-                      np.array([0, 0, 1])]
+        choices = self.choices
+        conditions = self.conditions
         assert_array_equal(select(conditions, choices, default=15),
                            self._select(conditions, choices, default=15))
 
         assert_equal(len(choices), 3)
         assert_equal(len(conditions), 3)
 
+    def test_broadcasting(self):
+        conditions = [np.array(True), np.array([False, True, False])]
+        choices = [1, np.arange(12).reshape(4, 3)]
+        assert_array_equal(select(conditions, choices), np.ones((4, 3)))
+        # default can broadcast too:
+        assert_equal(select([True], [0], default=[0]).shape, (1,))
+
+    def test_return_dtype(self):
+        assert_equal(select(self.conditions, self.choices, 1j).dtype,
+                     np.complex_)
+        # But the conditions need to be stronger then the scalar default
+        # if it is scalar.
+        choices = [choice.astype(np.int8) for choice in self.choices]
+        assert_equal(select(self.conditions, choices).dtype, np.int8)
+
+        d = np.array([1, 2, 3, np.nan, 5, 7])
+        m = np.isnan(d)
+        assert_equal(select([m], [d]), [0, 0, 0, np.nan, 0, 0])
+
+    def test_deprecated_empty(self):
+        with warnings.catch_warnings(record=True):
+            warnings.simplefilter("always")
+            assert_equal(select([], [], 3j), 3j)
+
+        with warnings.catch_warnings():
+            warnings.simplefilter("always")
+            assert_warns(DeprecationWarning, select, [], [])
+            warnings.simplefilter("error")
+            assert_raises(DeprecationWarning, select, [], [])
+
+    def test_non_bool_deprecation(self):
+        choices = self.choices
+        conditions = self.conditions[:]
+        with warnings.catch_warnings():
+            warnings.filterwarnings("always")
+            conditions[0] = conditions[0].astype(np.int_)
+            assert_warns(DeprecationWarning, select, conditions, choices)
+            conditions[0] = conditions[0].astype(np.uint8)
+            assert_warns(DeprecationWarning, select, conditions, choices)
+            warnings.filterwarnings("error")
+            assert_raises(DeprecationWarning, select, conditions, choices)
+
+    def test_many_arguments(self):
+        # This used to be limited by NPY_MAXARGS == 32
+        conditions = [np.array([False])] * 100
+        choices = [np.array([1])] * 100
+        select(conditions, choices)
+
 
 class TestInsert(TestCase):
     def test_basic(self):
@@ -721,6 +772,12 @@ class TestVectorize(TestCase):
         assert_array_equal(f(x), x*x)
         assert_equal(_calls[0], len(x))
 
+    def test_otypes(self):
+        f = np.vectorize(lambda x: x)
+        f.otypes = 'i'
+        x = np.arange(5)
+        assert_array_equal(f(x), x)
+
 
 class TestDigitize(TestCase):
     def test_forward(self):
@@ -1155,6 +1212,30 @@ class TestHistogramdd(TestCase):
             h, e = np.histogramdd(x, bins=[3, [-np.inf, 3, np.inf]])
             assert_allclose(h, expected)
 
+    def test_rightmost_binedge(self):
+        """Test event very close to rightmost binedge.
+        See Github issue #4266"""
+        x = [0.9999999995]
+        bins = [[0.,0.5,1.0]]
+        hist, _ = histogramdd(x, bins=bins)
+        assert_(hist[0] == 0.0)
+        assert_(hist[1] == 1.)
+        x = [1.0]
+        bins = [[0.,0.5,1.0]]
+        hist, _ = histogramdd(x, bins=bins)
+        assert_(hist[0] == 0.0)
+        assert_(hist[1] == 1.)
+        x = [1.0000000001]
+        bins = [[0.,0.5,1.0]]
+        hist, _ = histogramdd(x, bins=bins)
+        assert_(hist[0] == 0.0)
+        assert_(hist[1] == 1.)
+        x = [1.0001]
+        bins = [[0.,0.5,1.0]]
+        hist, _ = histogramdd(x, bins=bins)
+        assert_(hist[0] == 0.0)
+        assert_(hist[1] == 0.0)
+
 
 class TestUnique(TestCase):
     def test_simple(self):
@@ -1204,6 +1285,10 @@ class TestCorrCoef(TestCase):
          [0.66318558, 0.88157256, 0.71483595, -0.51366032, 1., 0.98317823],
          [0.51532523, 0.78052386, 0.83053601, -0.66173113, 0.98317823, 1.]])
 
+    def test_non_array(self):
+        assert_almost_equal(np.corrcoef([0, 1, 0], [1, 0, 1]),
+                            [[1., -1.], [-1.,  1.]])
+
     def test_simple(self):
         assert_almost_equal(corrcoef(self.A), self.res1)
         assert_almost_equal(corrcoef(self.A, self.B), self.res2)
@@ -1461,6 +1546,23 @@ class TestBincount(TestCase):
         y = np.bincount(x, minlength=5)
         assert_array_equal(y, np.zeros(5, dtype=int))
 
+    def test_with_incorrect_minlength(self):
+        x = np.array([], dtype=int)
+        assert_raises_regex(TypeError, "an integer is required",
+                            lambda: np.bincount(x, minlength="foobar"))
+        assert_raises_regex(ValueError, "must be positive",
+                            lambda: np.bincount(x, minlength=-1))
+        assert_raises_regex(ValueError, "must be positive",
+                            lambda: np.bincount(x, minlength=0))
+
+        x = np.arange(5)
+        assert_raises_regex(TypeError, "an integer is required",
+                            lambda: np.bincount(x, minlength="foobar"))
+        assert_raises_regex(ValueError, "minlength must be positive",
+                            lambda: np.bincount(x, minlength=-1))
+        assert_raises_regex(ValueError, "minlength must be positive",
+                            lambda: np.bincount(x, minlength=0))
+
 
 class TestInterp(TestCase):
     def test_exceptions(self):
@@ -1654,6 +1756,8 @@ class TestScoreatpercentile(TestCase):
                       interpolation='foobar')
         assert_raises(ValueError, np.percentile, [1], 101)
         assert_raises(ValueError, np.percentile, [1], -1)
+        assert_raises(ValueError, np.percentile, [1], list(range(50)) + [101])
+        assert_raises(ValueError, np.percentile, [1], list(range(50)) + [-0.1])
 
     def test_percentile_list(self):
         assert_equal(np.percentile([1, 2, 3], 0), 1)
@@ -1745,6 +1849,65 @@ class TestScoreatpercentile(TestCase):
         b = np.percentile([2, 3, 4, 1], [50], overwrite_input=True)
         assert_equal(b, np.array([2.5]))
 
+    def test_extended_axis(self):
+        o = np.random.normal(size=(71, 23))
+        x = np.dstack([o] * 10)
+        assert_equal(np.percentile(x, 30, axis=(0, 1)), np.percentile(o, 30))
+        x = np.rollaxis(x, -1, 0)
+        assert_equal(np.percentile(x, 30, axis=(-2, -1)), np.percentile(o, 30))
+        x = x.swapaxes(0, 1).copy()
+        assert_equal(np.percentile(x, 30, axis=(0, -1)), np.percentile(o, 30))
+        x = x.swapaxes(0, 1).copy()
+
+        assert_equal(np.percentile(x, [25, 60], axis=(0, 1, 2)),
+                     np.percentile(x, [25, 60], axis=None))
+        assert_equal(np.percentile(x, [25, 60], axis=(0,)),
+                     np.percentile(x, [25, 60], axis=0))
+
+        d = np.arange(3 * 5 * 7 * 11).reshape(3, 5, 7, 11)
+        np.random.shuffle(d)
+        assert_equal(np.percentile(d, 25,  axis=(0, 1, 2))[0],
+                     np.percentile(d[:, :, :, 0].flatten(), 25))
+        assert_equal(np.percentile(d, [10, 90], axis=(0, 1, 3))[:, 1],
+                     np.percentile(d[:, :, 1, :].flatten(), [10, 90]))
+        assert_equal(np.percentile(d, 25, axis=(3, 1, -4))[2],
+                     np.percentile(d[:, :, 2, :].flatten(), 25))
+        assert_equal(np.percentile(d, 25, axis=(3, 1, 2))[2],
+                     np.percentile(d[2, :, :, :].flatten(), 25))
+        assert_equal(np.percentile(d, 25, axis=(3, 2))[2, 1],
+                     np.percentile(d[2, 1, :, :].flatten(), 25))
+        assert_equal(np.percentile(d, 25, axis=(1, -2))[2, 1],
+                     np.percentile(d[2, :, :, 1].flatten(), 25))
+        assert_equal(np.percentile(d, 25, axis=(1, 3))[2, 2],
+                     np.percentile(d[2, :, 2, :].flatten(), 25))
+
+    def test_extended_axis_invalid(self):
+        d = np.ones((3, 5, 7, 11))
+        assert_raises(IndexError, np.percentile, d, axis=-5, q=25)
+        assert_raises(IndexError, np.percentile, d, axis=(0, -5), q=25)
+        assert_raises(IndexError, np.percentile, d, axis=4, q=25)
+        assert_raises(IndexError, np.percentile, d, axis=(0, 4), q=25)
+        assert_raises(ValueError, np.percentile, d, axis=(1, 1), q=25)
+
+    def test_keepdims(self):
+        d = np.ones((3, 5, 7, 11))
+        assert_equal(np.percentile(d, 7, axis=None, keepdims=True).shape,
+                     (1, 1, 1, 1))
+        assert_equal(np.percentile(d, 7, axis=(0, 1), keepdims=True).shape,
+                     (1, 1, 7, 11))
+        assert_equal(np.percentile(d, 7, axis=(0, 3), keepdims=True).shape,
+                     (1, 5, 7, 1))
+        assert_equal(np.percentile(d, 7, axis=(1,), keepdims=True).shape,
+                     (3, 1, 7, 11))
+        assert_equal(np.percentile(d, 7, (0, 1, 2, 3), keepdims=True).shape,
+                     (1, 1, 1, 1))
+        assert_equal(np.percentile(d, 7, axis=(0, 1, 3), keepdims=True).shape,
+                     (1, 1, 7, 1))
+
+        assert_equal(np.percentile(d, [1, 7], axis=(0, 1, 3),
+                                   keepdims=True).shape, (2, 1, 1, 7, 1))
+        assert_equal(np.percentile(d, [1, 7], axis=(0, 3),
+                                   keepdims=True).shape, (2, 1, 5, 7, 1))
 
 
 class TestMedian(TestCase):
@@ -1752,19 +1915,23 @@ class TestMedian(TestCase):
         a0 = np.array(1)
         a1 = np.arange(2)
         a2 = np.arange(6).reshape(2, 3)
-        assert_allclose(np.median(a0), 1)
+        assert_equal(np.median(a0), 1)
         assert_allclose(np.median(a1), 0.5)
         assert_allclose(np.median(a2), 2.5)
         assert_allclose(np.median(a2, axis=0), [1.5,  2.5,  3.5])
-        assert_allclose(np.median(a2, axis=1), [1, 4])
+        assert_equal(np.median(a2, axis=1), [1, 4])
         assert_allclose(np.median(a2, axis=None), 2.5)
 
         a = np.array([0.0444502, 0.0463301, 0.141249, 0.0606775])
         assert_almost_equal((a[1] + a[3]) / 2., np.median(a))
         a = np.array([0.0463301, 0.0444502, 0.141249])
-        assert_almost_equal(a[0], np.median(a))
+        assert_equal(a[0], np.median(a))
         a = np.array([0.0444502, 0.141249, 0.0463301])
-        assert_almost_equal(a[-1], np.median(a))
+        assert_equal(a[-1], np.median(a))
+        # check array scalar result
+        assert_equal(np.median(a).ndim, 0)
+        a[1] = np.nan
+        assert_equal(np.median(a).ndim, 0)
 
     def test_axis_keyword(self):
         a3 = np.array([[2, 3],
@@ -1838,6 +2005,66 @@ class TestMedian(TestCase):
         a = MySubClass([1,2,3])
         assert_equal(np.median(a), -7)
 
+    def test_object(self):
+        o = np.arange(7.);
+        assert_(type(np.median(o.astype(object))), float)
+        o[2] = np.nan
+        assert_(type(np.median(o.astype(object))), float)
+
+    def test_extended_axis(self):
+        o = np.random.normal(size=(71, 23))
+        x = np.dstack([o] * 10)
+        assert_equal(np.median(x, axis=(0, 1)), np.median(o))
+        x = np.rollaxis(x, -1, 0)
+        assert_equal(np.median(x, axis=(-2, -1)), np.median(o))
+        x = x.swapaxes(0, 1).copy()
+        assert_equal(np.median(x, axis=(0, -1)), np.median(o))
+
+        assert_equal(np.median(x, axis=(0, 1, 2)), np.median(x, axis=None))
+        assert_equal(np.median(x, axis=(0, )), np.median(x, axis=0))
+        assert_equal(np.median(x, axis=(-1, )), np.median(x, axis=-1))
+
+        d = np.arange(3 * 5 * 7 * 11).reshape(3, 5, 7, 11)
+        np.random.shuffle(d)
+        assert_equal(np.median(d, axis=(0, 1, 2))[0],
+                     np.median(d[:, :, :, 0].flatten()))
+        assert_equal(np.median(d, axis=(0, 1, 3))[1],
+                     np.median(d[:, :, 1, :].flatten()))
+        assert_equal(np.median(d, axis=(3, 1, -4))[2],
+                     np.median(d[:, :, 2, :].flatten()))
+        assert_equal(np.median(d, axis=(3, 1, 2))[2],
+                     np.median(d[2, :, :, :].flatten()))
+        assert_equal(np.median(d, axis=(3, 2))[2, 1],
+                     np.median(d[2, 1, :, :].flatten()))
+        assert_equal(np.median(d, axis=(1, -2))[2, 1],
+                     np.median(d[2, :, :, 1].flatten()))
+        assert_equal(np.median(d, axis=(1, 3))[2, 2],
+                     np.median(d[2, :, 2, :].flatten()))
+
+    def test_extended_axis_invalid(self):
+        d = np.ones((3, 5, 7, 11))
+        assert_raises(IndexError, np.median, d, axis=-5)
+        assert_raises(IndexError, np.median, d, axis=(0, -5))
+        assert_raises(IndexError, np.median, d, axis=4)
+        assert_raises(IndexError, np.median, d, axis=(0, 4))
+        assert_raises(ValueError, np.median, d, axis=(1, 1))
+
+    def test_keepdims(self):
+        d = np.ones((3, 5, 7, 11))
+        assert_equal(np.median(d, axis=None, keepdims=True).shape,
+                     (1, 1, 1, 1))
+        assert_equal(np.median(d, axis=(0, 1), keepdims=True).shape,
+                     (1, 1, 7, 11))
+        assert_equal(np.median(d, axis=(0, 3), keepdims=True).shape,
+                     (1, 5, 7, 1))
+        assert_equal(np.median(d, axis=(1,), keepdims=True).shape,
+                     (3, 1, 7, 11))
+        assert_equal(np.median(d, axis=(0, 1, 2, 3), keepdims=True).shape,
+                     (1, 1, 1, 1))
+        assert_equal(np.median(d, axis=(0, 1, 3), keepdims=True).shape,
+                     (1, 1, 7, 1))
+
+
 
 class TestAdd_newdoc_ufunc(TestCase):
 
diff --git a/numpy/lib/tests/test_stride_tricks.py b/numpy/lib/tests/test_stride_tricks.py
index 4d57d2ca7..3adcf0b41 100644
--- a/numpy/lib/tests/test_stride_tricks.py
+++ b/numpy/lib/tests/test_stride_tricks.py
@@ -3,6 +3,7 @@ from __future__ import division, absolute_import, print_function
 import numpy as np
 from numpy.testing import *
 from numpy.lib.stride_tricks import broadcast_arrays
+from numpy.lib.stride_tricks import as_strided
 
 
 def assert_shapes_correct(input_shapes, expected_shape):
@@ -214,6 +215,22 @@ def test_same_as_ufunc():
             assert_same_as_ufunc(input_shapes[0], input_shapes[1], False, True)
             assert_same_as_ufunc(input_shapes[0], input_shapes[1], True, True)
 
+def test_as_strided():
+    a = np.array([None])
+    a_view = as_strided(a)
+    expected = np.array([None])
+    assert_array_equal(a_view, np.array([None]))
+
+    a = np.array([1, 2, 3, 4])
+    a_view = as_strided(a, shape=(2,), strides=(2 * a.itemsize,))
+    expected = np.array([1, 3])
+    assert_array_equal(a_view, expected)
+
+    a = np.array([1, 2, 3, 4])
+    a_view = as_strided(a, shape=(3, 4), strides=(0, 1 * a.itemsize))
+    expected = np.array([[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]])
+    assert_array_equal(a_view, expected)
+
 
 if __name__ == "__main__":
     run_module_suite()
diff --git a/numpy/lib/tests/test_twodim_base.py b/numpy/lib/tests/test_twodim_base.py
index 022c45bd0..9e81cfe4b 100644
--- a/numpy/lib/tests/test_twodim_base.py
+++ b/numpy/lib/tests/test_twodim_base.py
@@ -286,6 +286,7 @@ def test_tril_triu_ndim2():
         yield assert_equal, b.dtype, a.dtype
         yield assert_equal, c.dtype, a.dtype
 
+
 def test_tril_triu_ndim3():
     for dtype in np.typecodes['AllFloat'] + np.typecodes['AllInteger']:
         a = np.array([
@@ -324,16 +325,21 @@ def test_mask_indices():
 def test_tril_indices():
     # indices without and with offset
     il1 = tril_indices(4)
-    il2 = tril_indices(4, 2)
+    il2 = tril_indices(4, k=2)
+    il3 = tril_indices(4, m=5)
+    il4 = tril_indices(4, k=2, m=5)
 
     a = np.array([[1,  2,  3,  4],
                   [5,  6,  7,  8],
                   [9,  10, 11, 12],
                   [13, 14, 15, 16]])
+    b = np.arange(1, 21).reshape(4, 5)
 
     # indexing:
     yield (assert_array_equal, a[il1],
            array([1,  5,  6,  9, 10, 11, 13, 14, 15, 16]))
+    yield (assert_array_equal, b[il3],
+           array([1, 6, 7, 11, 12, 13, 16, 17, 18, 19]))
 
     # And for assigning values:
     a[il1] = -1
@@ -342,7 +348,12 @@ def test_tril_indices():
                   [-1, -1,  7, 8],
                   [-1, -1, -1, 12],
                   [-1, -1, -1, -1]]))
-
+    b[il3] = -1
+    yield (assert_array_equal, b,
+           array([[-1,  2,  3,  4,  5],
+                  [-1, -1,  8,  9, 10],
+                  [-1, -1, -1, 14, 15],
+                  [-1, -1, -1, -1, 20]]))
     # These cover almost the whole array (two diagonals right of the main one):
     a[il2] = -10
     yield (assert_array_equal, a,
@@ -350,21 +361,32 @@ def test_tril_indices():
                   [-10, -10, -10, -10],
                   [-10, -10, -10, -10],
                   [-10, -10, -10, -10]]))
+    b[il4] = -10
+    yield (assert_array_equal, b,
+           array([[-10, -10, -10,   4,   5],
+                  [-10, -10, -10, -10,  10],
+                  [-10, -10, -10, -10, -10],
+                  [-10, -10, -10, -10, -10]]))
 
 
 class TestTriuIndices(object):
     def test_triu_indices(self):
         iu1 = triu_indices(4)
-        iu2 = triu_indices(4, 2)
+        iu2 = triu_indices(4, k=2)
+        iu3 = triu_indices(4, m=5)
+        iu4 = triu_indices(4, k=2, m=5)
 
         a = np.array([[1,  2,  3,  4],
                       [5,  6,  7,  8],
                       [9,  10, 11, 12],
                       [13, 14, 15, 16]])
+        b = np.arange(1, 21).reshape(4, 5)
 
         # Both for indexing:
         yield (assert_array_equal, a[iu1],
                array([1, 2,  3,  4,  6, 7, 8, 11, 12, 16]))
+        yield (assert_array_equal, b[iu3],
+               array([1, 2, 3, 4, 5, 7, 8, 9, 10, 13, 14, 15, 19, 20]))
 
         # And for assigning values:
         a[iu1] = -1
@@ -373,6 +395,12 @@ class TestTriuIndices(object):
                       [5,  -1, -1, -1],
                       [9,  10, -1, -1],
                       [13, 14, 15, -1]]))
+        b[iu3] = -1
+        yield (assert_array_equal, b,
+               array([[-1, -1, -1, -1, -1],
+                      [ 6, -1, -1, -1, -1],
+                      [11, 12, -1, -1, -1],
+                      [16, 17, 18, -1, -1]]))
 
         # These cover almost the whole array (two diagonals right of the
         # main one):
@@ -382,20 +410,26 @@ class TestTriuIndices(object):
                       [5,   -1, -1,  -10],
                       [9,   10, -1,  -1],
                       [13,  14, 15,  -1]]))
+        b[iu4] = -10
+        yield (assert_array_equal, b,
+               array([[-1, -1, -10, -10, -10],
+                      [6, -1, -1, -10, -10],
+                      [11, 12, -1, -1, -10],
+                      [16, 17, 18, -1, -1]]))
 
 
 class TestTrilIndicesFrom(object):
     def test_exceptions(self):
         assert_raises(ValueError, tril_indices_from, np.ones((2,)))
         assert_raises(ValueError, tril_indices_from, np.ones((2, 2, 2)))
-        assert_raises(ValueError, tril_indices_from, np.ones((2, 3)))
+        # assert_raises(ValueError, tril_indices_from, np.ones((2, 3)))
 
 
 class TestTriuIndicesFrom(object):
     def test_exceptions(self):
         assert_raises(ValueError, triu_indices_from, np.ones((2,)))
         assert_raises(ValueError, triu_indices_from, np.ones((2, 2, 2)))
-        assert_raises(ValueError, triu_indices_from, np.ones((2, 3)))
+        # assert_raises(ValueError, triu_indices_from, np.ones((2, 3)))
 
 
 class TestVander(object):
diff --git a/numpy/lib/tests/test_utils.py b/numpy/lib/tests/test_utils.py
index 93c674766..36d5d6428 100644
--- a/numpy/lib/tests/test_utils.py
+++ b/numpy/lib/tests/test_utils.py
@@ -1,6 +1,7 @@
 from __future__ import division, absolute_import, print_function
 
 import sys
+from numpy.core import arange
 from numpy.testing import *
 import numpy.lib.utils as utils
 from numpy.lib import deprecate
@@ -46,9 +47,17 @@ def test_deprecate_fn():
     assert_('old_func3' in new_func3.__doc__)
     assert_('new_func3' in new_func3.__doc__)
 
+
 def test_safe_eval_nameconstant():
     # Test if safe_eval supports Python 3.4 _ast.NameConstant
     utils.safe_eval('None')
 
+
+def test_byte_bounds():
+    a = arange(12).reshape(3, 4)
+    low, high = utils.byte_bounds(a)
+    assert_equal(high - low, a.size * a.itemsize)
+
+
 if __name__ == "__main__":
     run_module_suite()
diff --git a/numpy/lib/twodim_base.py b/numpy/lib/twodim_base.py
index 336f23c64..20b5cdd67 100644
--- a/numpy/lib/twodim_base.py
+++ b/numpy/lib/twodim_base.py
@@ -11,8 +11,23 @@ __all__ = ['diag', 'diagflat', 'eye', 'fliplr', 'flipud', 'rot90', 'tri',
 
 from numpy.core.numeric import (
     asanyarray, subtract, arange, zeros, greater_equal, multiply, ones,
-    asarray, where,
+    asarray, where, dtype as np_dtype, less, int8, int16, int32, int64
     )
+from numpy.core import iinfo
+
+
+i1 = iinfo(int8)
+i2 = iinfo(int16)
+i4 = iinfo(int32)
+def _min_int(low, high):
+    """ get small int that fits the range """
+    if high <= i1.max and low >= i1.min:
+        return int8
+    if high <= i2.max and low >= i2.min:
+        return int16
+    if high <= i4.max and low >= i4.min:
+        return int32
+    return int64
 
 
 def fliplr(m):
@@ -372,6 +387,7 @@ def tri(N, M=None, k=0, dtype=float):
     dtype : dtype, optional
         Data type of the returned array.  The default is float.
 
+
     Returns
     -------
     tri : ndarray of shape (N, M)
@@ -393,8 +409,15 @@ def tri(N, M=None, k=0, dtype=float):
     """
     if M is None:
         M = N
-    m = greater_equal(subtract.outer(arange(N), arange(M)), -k)
-    return m.astype(dtype)
+
+    m = greater_equal.outer(arange(N, dtype=_min_int(0, N)),
+                            arange(-k, M-k, dtype=_min_int(-k, M - k)))
+
+    # Avoid making a copy if the requested type is already bool
+    if np_dtype(dtype) != np_dtype(bool):
+        m = m.astype(dtype)
+
+    return m
 
 
 def tril(m, k=0):
@@ -430,8 +453,7 @@ def tril(m, k=0):
 
     """
     m = asanyarray(m)
-    out = multiply(tri(m.shape[-2], m.shape[-1], k=k, dtype=m.dtype), m)
-    return out
+    return multiply(tri(*m.shape[-2:], k=k, dtype=bool), m, dtype=m.dtype)
 
 
 def triu(m, k=0):
@@ -457,8 +479,7 @@ def triu(m, k=0):
 
     """
     m = asanyarray(m)
-    out = multiply((1 - tri(m.shape[-2], m.shape[-1], k - 1, dtype=m.dtype)), m)
-    return out
+    return multiply(~tri(*m.shape[-2:], k=k-1, dtype=bool), m, dtype=m.dtype)
 
 
 # Originally borrowed from John Hunter and matplotlib
@@ -558,9 +579,11 @@ def histogram2d(x, y, bins=10, range=None, normed=False, weights=None):
     Parameters
     ----------
     x : array_like, shape (N,)
-        An array containing the x coordinates of the points to be histogrammed.
+        An array containing the x coordinates of the points to be
+        histogrammed.
     y : array_like, shape (N,)
-        An array containing the y coordinates of the points to be histogrammed.
+        An array containing the y coordinates of the points to be
+        histogrammed.
     bins : int or [int, int] or array_like or [array, array], optional
         The bin specification:
 
@@ -578,13 +601,13 @@ def histogram2d(x, y, bins=10, range=None, normed=False, weights=None):
         ``[[xmin, xmax], [ymin, ymax]]``. All values outside of this range
         will be considered outliers and not tallied in the histogram.
     normed : bool, optional
-        If False, returns the number of samples in each bin. If True, returns
-        the bin density, i.e. the bin count divided by the bin area.
+        If False, returns the number of samples in each bin. If True,
+        returns the bin density ``bin_count / sample_count / bin_area``.
     weights : array_like, shape(N,), optional
-        An array of values ``w_i`` weighing each sample ``(x_i, y_i)``. Weights
-        are normalized to 1 if `normed` is True. If `normed` is False, the
-        values of the returned histogram are equal to the sum of the weights
-        belonging to the samples falling into each bin.
+        An array of values ``w_i`` weighing each sample ``(x_i, y_i)``.
+        Weights are normalized to 1 if `normed` is True. If `normed` is
+        False, the values of the returned histogram are equal to the sum of
+        the weights belonging to the samples falling into each bin.
 
     Returns
     -------
@@ -604,20 +627,15 @@ def histogram2d(x, y, bins=10, range=None, normed=False, weights=None):
 
     Notes
     -----
-    When `normed` is True, then the returned histogram is the sample density,
-    defined such that:
-
-    .. math::
-      \\sum_{i=0}^{nx-1} \\sum_{j=0}^{ny-1} H_{i,j} \\Delta x_i \\Delta y_j = 1
-
-    where `H` is the histogram array and :math:`\\Delta x_i \\Delta y_i`
-    the area of bin ``{i,j}``.
+    When `normed` is True, then the returned histogram is the sample
+    density, defined such that the sum over bins of the product
+    ``bin_value * bin_area`` is 1.
 
     Please note that the histogram does not follow the Cartesian convention
-    where `x` values are on the abcissa and `y` values on the ordinate axis.
-    Rather, `x` is histogrammed along the first dimension of the array
-    (vertical), and `y` along the second dimension of the array (horizontal).
-    This ensures compatibility with `histogramdd`.
+    where `x` values are on the abscissa and `y` values on the ordinate
+    axis.  Rather, `x` is histogrammed along the first dimension of the
+    array (vertical), and `y` along the second dimension of the array
+    (horizontal).  This ensures compatibility with `histogramdd`.
 
     Examples
     --------
@@ -651,7 +669,7 @@ def histogram2d(x, y, bins=10, range=None, normed=False, weights=None):
     >>> ax = fig.add_subplot(131)
     >>> ax.set_title('imshow: equidistant')
     >>> im = plt.imshow(H, interpolation='nearest', origin='low',
-                        extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]])
+                    extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]])
 
     pcolormesh can display exact bin edges:
 
@@ -760,17 +778,24 @@ def mask_indices(n, mask_func, k=0):
     return where(a != 0)
 
 
-def tril_indices(n, k=0):
+def tril_indices(n, k=0, m=None):
     """
-    Return the indices for the lower-triangle of an (n, n) array.
+    Return the indices for the lower-triangle of an (n, m) array.
 
     Parameters
     ----------
     n : int
-        The row dimension of the square arrays for which the returned
+        The row dimension of the arrays for which the returned
         indices will be valid.
     k : int, optional
         Diagonal offset (see `tril` for details).
+    m : int, optional
+        .. versionadded:: 1.9.0
+
+        The column dimension of the arrays for which the returned
+        arrays will be valid.
+        By default `m` is taken equal to `n`.
+
 
     Returns
     -------
@@ -830,7 +855,7 @@ def tril_indices(n, k=0):
            [-10, -10, -10, -10]])
 
     """
-    return mask_indices(n, tril, k)
+    return where(tri(n, m, k=k, dtype=bool))
 
 
 def tril_indices_from(arr, k=0):
@@ -856,14 +881,14 @@ def tril_indices_from(arr, k=0):
     .. versionadded:: 1.4.0
 
     """
-    if not (arr.ndim == 2 and arr.shape[0] == arr.shape[1]):
-        raise ValueError("input array must be 2-d and square")
-    return tril_indices(arr.shape[0], k)
+    if arr.ndim != 2:
+        raise ValueError("input array must be 2-d")
+    return tril_indices(arr.shape[-2], k=k, m=arr.shape[-1])
 
 
-def triu_indices(n, k=0):
+def triu_indices(n, k=0, m=None):
     """
-    Return the indices for the upper-triangle of an (n, n) array.
+    Return the indices for the upper-triangle of an (n, m) array.
 
     Parameters
     ----------
@@ -872,6 +897,13 @@ def triu_indices(n, k=0):
         be valid.
     k : int, optional
         Diagonal offset (see `triu` for details).
+    m : int, optional
+        .. versionadded:: 1.9.0
+
+        The column dimension of the arrays for which the returned
+        arrays will be valid.
+        By default `m` is taken equal to `n`.
+
 
     Returns
     -------
@@ -933,12 +965,12 @@ def triu_indices(n, k=0):
            [ 12,  13,  14,  -1]])
 
     """
-    return mask_indices(n, triu, k)
+    return where(~tri(n, m, k=k-1, dtype=bool))
 
 
 def triu_indices_from(arr, k=0):
     """
-    Return the indices for the upper-triangle of a (N, N) array.
+    Return the indices for the upper-triangle of arr.
 
     See `triu_indices` for full details.
 
@@ -963,6 +995,6 @@ def triu_indices_from(arr, k=0):
     .. versionadded:: 1.4.0
 
     """
-    if not (arr.ndim == 2 and arr.shape[0] == arr.shape[1]):
-        raise ValueError("input array must be 2-d and square")
-    return triu_indices(arr.shape[0], k)
+    if arr.ndim != 2:
+        raise ValueError("input array must be 2-d")
+    return triu_indices(arr.shape[-2], k=k, m=arr.shape[-1])
diff --git a/numpy/lib/utils.py b/numpy/lib/utils.py
index 1f1cdfc8a..6d41e8eb4 100644
--- a/numpy/lib/utils.py
+++ b/numpy/lib/utils.py
@@ -6,7 +6,7 @@ import types
 import re
 
 from numpy.core.numerictypes import issubclass_, issubsctype, issubdtype
-from numpy.core import product, ndarray, ufunc
+from numpy.core import product, ndarray, ufunc, asarray
 
 __all__ = [
     'issubclass_', 'issubsctype', 'issubdtype', 'deprecate',
@@ -210,8 +210,9 @@ def byte_bounds(a):
     a_data = ai['data'][0]
     astrides = ai['strides']
     ashape = ai['shape']
-    bytes_a = int(ai['typestr'][2:])
-
+   
+    bytes_a = asarray(a).dtype.itemsize
+       
     a_low = a_high = a_data
     if astrides is None: # contiguous case
         a_high += a.size * bytes_a