118 files changed, 6148 insertions, 2380 deletions
diff --git a/numpy/__init__.py b/numpy/__init__.py
index e1df236bb..ba88c733f 100644
--- a/numpy/__init__.py
+++ b/numpy/__init__.py
@@ -163,6 +163,8 @@ else:
         from __builtin__ import bool, int, float, complex, object, unicode, str
 
     from .core import round, abs, max, min
+    # now that numpy modules are imported, can initialize limits
+    core.getlimits._register_known_types()
 
     __all__.extend(['__version__', 'show_config'])
     __all__.extend(core.__all__)
diff --git a/numpy/compat/py3k.py b/numpy/compat/py3k.py
index ce4543bc3..8e06ead78 100644
--- a/numpy/compat/py3k.py
+++ b/numpy/compat/py3k.py
@@ -8,13 +8,13 @@ __all__ = ['bytes', 'asbytes', 'isfileobj', 'getexception', 'strchar',
            'unicode', 'asunicode', 'asbytes_nested', 'asunicode_nested',
            'asstr', 'open_latin1', 'long', 'basestring', 'sixu',
            'integer_types', 'is_pathlib_path', 'npy_load_module', 'Path',
-           'contextlib_nullcontext']
+           'contextlib_nullcontext', 'os_fspath', 'os_PathLike']
 
 import sys
 try:
-    from pathlib import Path
+    from pathlib import Path, PurePath
 except ImportError:
-    Path = None
+    Path = PurePath = None
 
 if sys.version_info[0] >= 3:
     import io
@@ -95,6 +95,8 @@ def asunicode_nested(x):
 def is_pathlib_path(obj):
     """
     Check whether obj is a pathlib.Path object.
+
+    Prefer using `isinstance(obj, os_PathLike)` instead of this function.
     """
     return Path is not None and isinstance(obj, Path)
 
@@ -177,3 +179,65 @@ else:
         finally:
             fo.close()
         return mod
+
+# backport abc.ABC
+import abc
+if sys.version_info[:2] >= (3, 4):
+    abc_ABC = abc.ABC
+else:
+    abc_ABC = abc.ABCMeta('ABC', (object,), {'__slots__': ()})
+
+
+# Backport os.fs_path, os.PathLike, and PurePath.__fspath__
+if sys.version_info[:2] >= (3, 6):
+    import os
+    os_fspath = os.fspath
+    os_PathLike = os.PathLike
+else:
+    def _PurePath__fspath__(self):
+        return str(self)
+
+    class os_PathLike(abc_ABC):
+        """Abstract base class for implementing the file system path protocol."""
+
+        @abc.abstractmethod
+        def __fspath__(self):
+            """Return the file system path representation of the object."""
+            raise NotImplementedError
+
+        @classmethod
+        def __subclasshook__(cls, subclass):
+            if PurePath is not None and issubclass(subclass, PurePath):
+                return True
+            return hasattr(subclass, '__fspath__')
+
+
+    def os_fspath(path):
+        """Return the path representation of a path-like object.
+        If str or bytes is passed in, it is returned unchanged. Otherwise the
+        os.PathLike interface is used to get the path representation. If the
+        path representation is not str or bytes, TypeError is raised. If the
+        provided path is not str, bytes, or os.PathLike, TypeError is raised.
+        """
+        if isinstance(path, (str, bytes)):
+            return path
+
+        # Work from the object's type to match method resolution of other magic
+        # methods.
+        path_type = type(path)
+        try:
+            path_repr = path_type.__fspath__(path)
+        except AttributeError:
+            if hasattr(path_type, '__fspath__'):
+                raise
+            elif PurePath is not None and issubclass(path_type, PurePath):
+                return _PurePath__fspath__(path)
+            else:
+                raise TypeError("expected str, bytes or os.PathLike object, "
+                                "not " + path_type.__name__)
+        if isinstance(path_repr, (str, bytes)):
+            return path_repr
+        else:
+            raise TypeError("expected {}.__fspath__() to return str or bytes, "
+                            "not {}".format(path_type.__name__,
+                                            type(path_repr).__name__))
diff --git a/numpy/core/_add_newdocs.py b/numpy/core/_add_newdocs.py
index 1c82cfde4..ea472f1b3 100644
--- a/numpy/core/_add_newdocs.py
+++ b/numpy/core/_add_newdocs.py
@@ -947,66 +947,6 @@ add_newdoc('numpy.core.multiarray', 'empty',
 
     """)
 
-add_newdoc('numpy.core.multiarray', 'empty_like',
-    """
-    empty_like(prototype, dtype=None, order='K', subok=True)
-
-    Return a new array with the same shape and type as a given array.
-
-    Parameters
-    ----------
-    prototype : array_like
-        The shape and data-type of `prototype` define these same attributes
-        of the returned array.
-    dtype : data-type, optional
-        Overrides the data type of the result.
-
-        .. versionadded:: 1.6.0
-    order : {'C', 'F', 'A', or 'K'}, optional
-        Overrides the memory layout of the result. 'C' means C-order,
-        'F' means F-order, 'A' means 'F' if ``prototype`` is Fortran
-        contiguous, 'C' otherwise. 'K' means match the layout of ``prototype``
-        as closely as possible.
-
-        .. versionadded:: 1.6.0
-    subok : bool, optional.
-        If True, then the newly created array will use the sub-class
-        type of 'a', otherwise it will be a base-class array. Defaults
-        to True.
-
-    Returns
-    -------
-    out : ndarray
-        Array of uninitialized (arbitrary) data with the same
-        shape and type as `prototype`.
-
-    See Also
-    --------
-    ones_like : Return an array of ones with shape and type of input.
-    zeros_like : Return an array of zeros with shape and type of input.
-    full_like : Return a new array with shape of input filled with value.
-    empty : Return a new uninitialized array.
-
-    Notes
-    -----
-    This function does *not* initialize the returned array; to do that use
-    `zeros_like` or `ones_like` instead.  It may be marginally faster than
-    the functions that do set the array values.
-
-    Examples
-    --------
-    >>> a = ([1,2,3], [4,5,6])                         # a is array-like
-    >>> np.empty_like(a)
-    array([[-1073741821, -1073741821,           3],    #random
-           [          0,           0, -1073741821]])
-    >>> a = np.array([[1., 2., 3.],[4.,5.,6.]])
-    >>> np.empty_like(a)
-    array([[ -2.00000715e+000,   1.48219694e-323,  -2.00000572e+000],#random
-           [  4.38791518e-305,  -2.00000715e+000,   4.17269252e-309]])
-
-    """)
-
-
 add_newdoc('numpy.core.multiarray', 'scalar',
     """
     scalar(dtype, obj)
@@ -1284,163 +1224,6 @@ add_newdoc('numpy.core.multiarray', 'frombuffer',
 
     """)
 
-add_newdoc('numpy.core.multiarray', 'concatenate',
-    """
-    concatenate((a1, a2, ...), axis=0, out=None)
-
-    Join a sequence of arrays along an existing axis.
-
-    Parameters
-    ----------
-    a1, a2, ... : sequence of array_like
-        The arrays must have the same shape, except in the dimension
-        corresponding to `axis` (the first, by default).
-    axis : int, optional
-        The axis along which the arrays will be joined.  If axis is None,
-        arrays are flattened before use.  Default is 0.
-    out : ndarray, optional
-        If provided, the destination to place the result. The shape must be
-        correct, matching that of what concatenate would have returned if no
-        out argument were specified.
-
-    Returns
-    -------
-    res : ndarray
-        The concatenated array.
-
-    See Also
-    --------
-    ma.concatenate : Concatenate function that preserves input masks.
-    array_split : Split an array into multiple sub-arrays of equal or
-                  near-equal size.
-    split : Split array into a list of multiple sub-arrays of equal size.
-    hsplit : Split array into multiple sub-arrays horizontally (column wise)
-    vsplit : Split array into multiple sub-arrays vertically (row wise)
-    dsplit : Split array into multiple sub-arrays along the 3rd axis (depth).
-    stack : Stack a sequence of arrays along a new axis.
-    hstack : Stack arrays in sequence horizontally (column wise)
-    vstack : Stack arrays in sequence vertically (row wise)
-    dstack : Stack arrays in sequence depth wise (along third dimension)
-    block : Assemble arrays from blocks.
-
-    Notes
-    -----
-    When one or more of the arrays to be concatenated is a MaskedArray,
-    this function will return a MaskedArray object instead of an ndarray,
-    but the input masks are *not* preserved. In cases where a MaskedArray
-    is expected as input, use the ma.concatenate function from the masked
-    array module instead.
-
-    Examples
-    --------
-    >>> a = np.array([[1, 2], [3, 4]])
-    >>> b = np.array([[5, 6]])
-    >>> np.concatenate((a, b), axis=0)
-    array([[1, 2],
-           [3, 4],
-           [5, 6]])
-    >>> np.concatenate((a, b.T), axis=1)
-    array([[1, 2, 5],
-           [3, 4, 6]])
-    >>> np.concatenate((a, b), axis=None)
-    array([1, 2, 3, 4, 5, 6])
-
-    This function will not preserve masking of MaskedArray inputs.
-
-    >>> a = np.ma.arange(3)
-    >>> a[1] = np.ma.masked
-    >>> b = np.arange(2, 5)
-    >>> a
-    masked_array(data=[0, --, 2],
-                 mask=[False,  True, False],
-           fill_value=999999)
-    >>> b
-    array([2, 3, 4])
-    >>> np.concatenate([a, b])
-    masked_array(data=[0, 1, 2, 2, 3, 4],
-                 mask=False,
-           fill_value=999999)
-    >>> np.ma.concatenate([a, b])
-    masked_array(data=[0, --, 2, 2, 3, 4],
-                 mask=[False,  True, False, False, False, False],
-           fill_value=999999)
-
-    """)
-
-add_newdoc('numpy.core', 'inner',
-    """
-    inner(a, b)
-
-    Inner product of two arrays.
-
-    Ordinary inner product of vectors for 1-D arrays (without complex
-    conjugation), in higher dimensions a sum product over the last axes.
-
-    Parameters
-    ----------
-    a, b : array_like
-        If `a` and `b` are nonscalar, their last dimensions must match.
-
-    Returns
-    -------
-    out : ndarray
-        `out.shape = a.shape[:-1] + b.shape[:-1]`
-
-    Raises
-    ------
-    ValueError
-        If the last dimension of `a` and `b` has different size.
-
-    See Also
-    --------
-    tensordot : Sum products over arbitrary axes.
-    dot : Generalised matrix product, using second last dimension of `b`.
-    einsum : Einstein summation convention.
-
-    Notes
-    -----
-    For vectors (1-D arrays) it computes the ordinary inner-product::
-
-        np.inner(a, b) = sum(a[:]*b[:])
-
-    More generally, if `ndim(a) = r > 0` and `ndim(b) = s > 0`::
-
-        np.inner(a, b) = np.tensordot(a, b, axes=(-1,-1))
-
-    or explicitly::
-
-        np.inner(a, b)[i0,...,ir-1,j0,...,js-1]
-             = sum(a[i0,...,ir-1,:]*b[j0,...,js-1,:])
-
-    In addition `a` or `b` may be scalars, in which case::
-
-       np.inner(a,b) = a*b
-
-    Examples
-    --------
-    Ordinary inner product for vectors:
-
-    >>> a = np.array([1,2,3])
-    >>> b = np.array([0,1,0])
-    >>> np.inner(a, b)
-    2
-
-    A multidimensional example:
-
-    >>> a = np.arange(24).reshape((2,3,4))
-    >>> b = np.arange(4)
-    >>> np.inner(a, b)
-    array([[ 14,  38,  62],
-           [ 86, 110, 134]])
-
-    An example where `b` is a scalar:
-
-    >>> np.inner(np.eye(2), 7)
-    array([[ 7.,  0.],
-           [ 0.,  7.]])
-
-    """)
-
 add_newdoc('numpy.core', 'fastCopyAndTranspose',
     """_fastCopyAndTranspose(a)""")
 
@@ -1575,263 +1358,6 @@ add_newdoc('numpy.core.multiarray', 'set_numeric_ops',
 
     """)
 
-add_newdoc('numpy.core.multiarray', 'where',
-    """
-    where(condition, [x, y])
-
-    Return elements chosen from `x` or `y` depending on `condition`.
-
-    .. note::
-        When only `condition` is provided, this function is a shorthand for
-        ``np.asarray(condition).nonzero()``. Using `nonzero` directly should be
-        preferred, as it behaves correctly for subclasses. The rest of this
-        documentation covers only the case where all three arguments are
-        provided.
-
-    Parameters
-    ----------
-    condition : array_like, bool
-        Where True, yield `x`, otherwise yield `y`.
-    x, y : array_like
-        Values from which to choose. `x`, `y` and `condition` need to be
-        broadcastable to some shape.
-
-    Returns
-    -------
-    out : ndarray
-        An array with elements from `x` where `condition` is True, and elements
-        from `y` elsewhere.
-
-    See Also
-    --------
-    choose
-    nonzero : The function that is called when x and y are omitted
-
-    Notes
-    -----
-    If all the arrays are 1-D, `where` is equivalent to::
-
-        [xv if c else yv
-         for c, xv, yv in zip(condition, x, y)]
-
-    Examples
-    --------
-    >>> a = np.arange(10)
-    >>> a
-    array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
-    >>> np.where(a < 5, a, 10*a)
-    array([ 0,  1,  2,  3,  4, 50, 60, 70, 80, 90])
-
-    This can be used on multidimensional arrays too:
-
-    >>> np.where([[True, False], [True, True]],
-    ...          [[1, 2], [3, 4]],
-    ...          [[9, 8], [7, 6]])
-    array([[1, 8],
-           [3, 4]])
-
-    The shapes of x, y, and the condition are broadcast together:
-
-    >>> x, y = np.ogrid[:3, :4]
-    >>> np.where(x < y, x, 10 + y)  # both x and 10+y are broadcast
-    array([[10,  0,  0,  0],
-           [10, 11,  1,  1],
-           [10, 11, 12,  2]])
-
-    >>> a = np.array([[0, 1, 2],
-    ...               [0, 2, 4],
-    ...               [0, 3, 6]])
-    >>> np.where(a < 4, a, -1)  # -1 is broadcast
-    array([[ 0,  1,  2],
-           [ 0,  2, -1],
-           [ 0,  3, -1]])
-    """)
-
-
-add_newdoc('numpy.core.multiarray', 'lexsort',
-    """
-    lexsort(keys, axis=-1)
-
-    Perform an indirect stable sort using a sequence of keys.
-
-    Given multiple sorting keys, which can be interpreted as columns in a
-    spreadsheet, lexsort returns an array of integer indices that describes
-    the sort order by multiple columns. The last key in the sequence is used
-    for the primary sort order, the second-to-last key for the secondary sort
-    order, and so on. The keys argument must be a sequence of objects that
-    can be converted to arrays of the same shape. If a 2D array is provided
-    for the keys argument, it's rows are interpreted as the sorting keys and
-    sorting is according to the last row, second last row etc.
-
-    Parameters
-    ----------
-    keys : (k, N) array or tuple containing k (N,)-shaped sequences
-        The `k` different "columns" to be sorted.  The last column (or row if
-        `keys` is a 2D array) is the primary sort key.
-    axis : int, optional
-        Axis to be indirectly sorted.  By default, sort over the last axis.
-
-    Returns
-    -------
-    indices : (N,) ndarray of ints
-        Array of indices that sort the keys along the specified axis.
-
-    See Also
-    --------
-    argsort : Indirect sort.
-    ndarray.sort : In-place sort.
-    sort : Return a sorted copy of an array.
-
-    Examples
-    --------
-    Sort names: first by surname, then by name.
-
-    >>> surnames =    ('Hertz',    'Galilei', 'Hertz')
-    >>> first_names = ('Heinrich', 'Galileo', 'Gustav')
-    >>> ind = np.lexsort((first_names, surnames))
-    >>> ind
-    array([1, 2, 0])
-
-    >>> [surnames[i] + ", " + first_names[i] for i in ind]
-    ['Galilei, Galileo', 'Hertz, Gustav', 'Hertz, Heinrich']
-
-    Sort two columns of numbers:
-
-    >>> a = [1,5,1,4,3,4,4] # First column
-    >>> b = [9,4,0,4,0,2,1] # Second column
-    >>> ind = np.lexsort((b,a)) # Sort by a, then by b
-    >>> print(ind)
-    [2 0 4 6 5 3 1]
-
-    >>> [(a[i],b[i]) for i in ind]
-    [(1, 0), (1, 9), (3, 0), (4, 1), (4, 2), (4, 4), (5, 4)]
-
-    Note that sorting is first according to the elements of ``a``.
-    Secondary sorting is according to the elements of ``b``.
-
-    A normal ``argsort`` would have yielded:
-
-    >>> [(a[i],b[i]) for i in np.argsort(a)]
-    [(1, 9), (1, 0), (3, 0), (4, 4), (4, 2), (4, 1), (5, 4)]
-
-    Structured arrays are sorted lexically by ``argsort``:
-
-    >>> x = np.array([(1,9), (5,4), (1,0), (4,4), (3,0), (4,2), (4,1)],
-    ...              dtype=np.dtype([('x', int), ('y', int)]))
-
-    >>> np.argsort(x) # or np.argsort(x, order=('x', 'y'))
-    array([2, 0, 4, 6, 5, 3, 1])
-
-    """)
-
-add_newdoc('numpy.core.multiarray', 'can_cast',
-    """
-    can_cast(from_, to, casting='safe')
-
-    Returns True if cast between data types can occur according to the
-    casting rule.  If from is a scalar or array scalar, also returns
-    True if the scalar value can be cast without overflow or truncation
-    to an integer.
-
-    Parameters
-    ----------
-    from_ : dtype, dtype specifier, scalar, or array
-        Data type, scalar, or array to cast from.
-    to : dtype or dtype specifier
-        Data type to cast to.
-    casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
-        Controls what kind of data casting may occur.
-
-          * 'no' means the data types should not be cast at all.
-          * 'equiv' means only byte-order changes are allowed.
-          * 'safe' means only casts which can preserve values are allowed.
-          * 'same_kind' means only safe casts or casts within a kind,
-            like float64 to float32, are allowed.
-          * 'unsafe' means any data conversions may be done.
-
-    Returns
-    -------
-    out : bool
-        True if cast can occur according to the casting rule.
-
-    Notes
-    -----
-    Starting in NumPy 1.9, can_cast function now returns False in 'safe'
-    casting mode for integer/float dtype and string dtype if the string dtype
-    length is not long enough to store the max integer/float value converted
-    to a string. Previously can_cast in 'safe' mode returned True for
-    integer/float dtype and a string dtype of any length.
-
-    See also
-    --------
-    dtype, result_type
-
-    Examples
-    --------
-    Basic examples
-
-    >>> np.can_cast(np.int32, np.int64)
-    True
-    >>> np.can_cast(np.float64, complex)
-    True
-    >>> np.can_cast(complex, float)
-    False
-
-    >>> np.can_cast('i8', 'f8')
-    True
-    >>> np.can_cast('i8', 'f4')
-    False
-    >>> np.can_cast('i4', 'S4')
-    False
-
-    Casting scalars
-
-    >>> np.can_cast(100, 'i1')
-    True
-    >>> np.can_cast(150, 'i1')
-    False
-    >>> np.can_cast(150, 'u1')
-    True
-
-    >>> np.can_cast(3.5e100, np.float32)
-    False
-    >>> np.can_cast(1000.0, np.float32)
-    True
-
-    Array scalar checks the value, array does not
-
-    >>> np.can_cast(np.array(1000.0), np.float32)
-    True
-    >>> np.can_cast(np.array([1000.0]), np.float32)
-    False
-
-    Using the casting rules
-
-    >>> np.can_cast('i8', 'i8', 'no')
-    True
-    >>> np.can_cast('<i8', '>i8', 'no')
-    False
-
-    >>> np.can_cast('<i8', '>i8', 'equiv')
-    True
-    >>> np.can_cast('<i4', '>i8', 'equiv')
-    False
-
-    >>> np.can_cast('<i4', '>i8', 'safe')
-    True
-    >>> np.can_cast('<i8', '>i4', 'safe')
-    False
-
-    >>> np.can_cast('<i8', '>i4', 'same_kind')
-    True
-    >>> np.can_cast('<i8', '>u4', 'same_kind')
-    False
-
-    >>> np.can_cast('<i8', '>u4', 'unsafe')
-    True
-
-    """)
-
 add_newdoc('numpy.core.multiarray', 'promote_types',
     """
     promote_types(type1, type2)
@@ -1892,123 +1418,6 @@ add_newdoc('numpy.core.multiarray', 'promote_types',
 
     """)
 
-add_newdoc('numpy.core.multiarray', 'min_scalar_type',
-    """
-    min_scalar_type(a)
-
-    For scalar ``a``, returns the data type with the smallest size
-    and smallest scalar kind which can hold its value.  For non-scalar
-    array ``a``, returns the vector's dtype unmodified.
-
-    Floating point values are not demoted to integers,
-    and complex values are not demoted to floats.
-
-    Parameters
-    ----------
-    a : scalar or array_like
-        The value whose minimal data type is to be found.
-
-    Returns
-    -------
-    out : dtype
-        The minimal data type.
-
-    Notes
-    -----
-    .. versionadded:: 1.6.0
-
-    See Also
-    --------
-    result_type, promote_types, dtype, can_cast
-
-    Examples
-    --------
-    >>> np.min_scalar_type(10)
-    dtype('uint8')
-
-    >>> np.min_scalar_type(-260)
-    dtype('int16')
-
-    >>> np.min_scalar_type(3.1)
-    dtype('float16')
-
-    >>> np.min_scalar_type(1e50)
-    dtype('float64')
-
-    >>> np.min_scalar_type(np.arange(4,dtype='f8'))
-    dtype('float64')
-
-    """)
-
-add_newdoc('numpy.core.multiarray', 'result_type',
-    """
-    result_type(*arrays_and_dtypes)
-
-    Returns the type that results from applying the NumPy
-    type promotion rules to the arguments.
-
-    Type promotion in NumPy works similarly to the rules in languages
-    like C++, with some slight differences.  When both scalars and
-    arrays are used, the array's type takes precedence and the actual value
-    of the scalar is taken into account.
-
-    For example, calculating 3*a, where a is an array of 32-bit floats,
-    intuitively should result in a 32-bit float output.  If the 3 is a
-    32-bit integer, the NumPy rules indicate it can't convert losslessly
-    into a 32-bit float, so a 64-bit float should be the result type.
-    By examining the value of the constant, '3', we see that it fits in
-    an 8-bit integer, which can be cast losslessly into the 32-bit float.
-
-    Parameters
-    ----------
-    arrays_and_dtypes : list of arrays and dtypes
-        The operands of some operation whose result type is needed.
-
-    Returns
-    -------
-    out : dtype
-        The result type.
-
-    See also
-    --------
-    dtype, promote_types, min_scalar_type, can_cast
-
-    Notes
-    -----
-    .. versionadded:: 1.6.0
-
-    The specific algorithm used is as follows.
-
-    Categories are determined by first checking which of boolean,
-    integer (int/uint), or floating point (float/complex) the maximum
-    kind of all the arrays and the scalars are.
-
-    If there are only scalars or the maximum category of the scalars
-    is higher than the maximum category of the arrays,
-    the data types are combined with :func:`promote_types`
-    to produce the return value.
-
-    Otherwise, `min_scalar_type` is called on each array, and
-    the resulting data types are all combined with :func:`promote_types`
-    to produce the return value.
-
-    The set of int values is not a subset of the uint values for types
-    with the same number of bits, something not reflected in
-    :func:`min_scalar_type`, but handled as a special case in `result_type`.
-
-    Examples
-    --------
-    >>> np.result_type(3, np.arange(7, dtype='i1'))
-    dtype('int8')
-
-    >>> np.result_type('i4', 'c8')
-    dtype('complex128')
-
-    >>> np.result_type(3.0, -2)
-    dtype('float64')
-
-    """)
-
 add_newdoc('numpy.core.multiarray', 'newbuffer',
     """
     newbuffer(size)
@@ -2061,91 +1470,6 @@ add_newdoc('numpy.core.multiarray', 'getbuffer',
 
     """)
 
-add_newdoc('numpy.core', 'dot',
-    """
-    dot(a, b, out=None)
-
-    Dot product of two arrays. Specifically,
-
-    - If both `a` and `b` are 1-D arrays, it is inner product of vectors
-      (without complex conjugation).
-
-    - If both `a` and `b` are 2-D arrays, it is matrix multiplication,
-      but using :func:`matmul` or ``a @ b`` is preferred.
-
-    - If either `a` or `b` is 0-D (scalar), it is equivalent to :func:`multiply`
-      and using ``numpy.multiply(a, b)`` or ``a * b`` is preferred.
-
-    - If `a` is an N-D array and `b` is a 1-D array, it is a sum product over
-      the last axis of `a` and `b`.
-
-    - If `a` is an N-D array and `b` is an M-D array (where ``M>=2``), it is a
-      sum product over the last axis of `a` and the second-to-last axis of `b`::
-
-        dot(a, b)[i,j,k,m] = sum(a[i,j,:] * b[k,:,m])
-
-    Parameters
-    ----------
-    a : array_like
-        First argument.
-    b : array_like
-        Second argument.
-    out : ndarray, optional
-        Output argument. This must have the exact kind that would be returned
-        if it was not used. In particular, it must have the right type, must be
-        C-contiguous, and its dtype must be the dtype that would be returned
-        for `dot(a,b)`. This is a performance feature. Therefore, if these
-        conditions are not met, an exception is raised, instead of attempting
-        to be flexible.
-
-    Returns
-    -------
-    output : ndarray
-        Returns the dot product of `a` and `b`.  If `a` and `b` are both
-        scalars or both 1-D arrays then a scalar is returned; otherwise
-        an array is returned.
-        If `out` is given, then it is returned.
-
-    Raises
-    ------
-    ValueError
-        If the last dimension of `a` is not the same size as
-        the second-to-last dimension of `b`.
-
-    See Also
-    --------
-    vdot : Complex-conjugating dot product.
-    tensordot : Sum products over arbitrary axes.
-    einsum : Einstein summation convention.
-    matmul : '@' operator as method with out parameter.
-
-    Examples
-    --------
-    >>> np.dot(3, 4)
-    12
-
-    Neither argument is complex-conjugated:
-
-    >>> np.dot([2j, 3j], [2j, 3j])
-    (-13+0j)
-
-    For 2-D arrays it is the matrix product:
-
-    >>> a = [[1, 0], [0, 1]]
-    >>> b = [[4, 1], [2, 2]]
-    >>> np.dot(a, b)
-    array([[4, 1],
-           [2, 2]])
-
-    >>> a = np.arange(3*4*5*6).reshape((3,4,5,6))
-    >>> b = np.arange(3*4*5*6)[::-1].reshape((5,4,6,3))
-    >>> np.dot(a, b)[2,3,2,1,2,2]
-    499128
-    >>> sum(a[2,3,2,:] * b[1,2,:,2])
-    499128
-
-    """)
-
 add_newdoc('numpy.core', 'matmul',
     """
     matmul(a, b, out=None)
@@ -2269,61 +1593,6 @@ add_newdoc('numpy.core', 'matmul',
 
     """)
 
-add_newdoc('numpy.core', 'vdot',
-    """
-    vdot(a, b)
-
-    Return the dot product of two vectors.
-
-    The vdot(`a`, `b`) function handles complex numbers differently than
-    dot(`a`, `b`).  If the first argument is complex the complex conjugate
-    of the first argument is used for the calculation of the dot product.
-
-    Note that `vdot` handles multidimensional arrays differently than `dot`:
-    it does *not* perform a matrix product, but flattens input arguments
-    to 1-D vectors first. Consequently, it should only be used for vectors.
-
-    Parameters
-    ----------
-    a : array_like
-        If `a` is complex the complex conjugate is taken before calculation
-        of the dot product.
-    b : array_like
-        Second argument to the dot product.
-
-    Returns
-    -------
-    output : ndarray
-        Dot product of `a` and `b`.  Can be an int, float, or
-        complex depending on the types of `a` and `b`.
-
-    See Also
-    --------
-    dot : Return the dot product without using the complex conjugate of the
-          first argument.
-
-    Examples
-    --------
-    >>> a = np.array([1+2j,3+4j])
-    >>> b = np.array([5+6j,7+8j])
-    >>> np.vdot(a, b)
-    (70-8j)
-    >>> np.vdot(b, a)
-    (70+8j)
-
-    Note that higher-dimensional arrays are flattened!
-
-    >>> a = np.array([[1, 4], [5, 6]])
-    >>> b = np.array([[4, 1], [2, 2]])
-    >>> np.vdot(a, b)
-    30
-    >>> np.vdot(b, a)
-    30
-    >>> 1*4 + 4*1 + 5*2 + 6*2
-    30
-
-    """)
-
 add_newdoc('numpy.core.multiarray', 'c_einsum',
     """
     c_einsum(subscripts, *operands, out=None, dtype=None, order='K',
@@ -5410,7 +4679,7 @@ add_newdoc('numpy.core.multiarray', 'ravel_multi_index',
 
 add_newdoc('numpy.core.multiarray', 'unravel_index',
     """
-    unravel_index(indices, dims, order='C')
+    unravel_index(indices, shape, order='C')
 
     Converts a flat index or array of flat indices into a tuple
     of coordinate arrays.
@@ -5419,10 +4688,14 @@ add_newdoc('numpy.core.multiarray', 'unravel_index',
     ----------
     indices : array_like
         An integer array whose elements are indices into the flattened
-        version of an array of dimensions ``dims``. Before version 1.6.0,
+        version of an array of dimensions ``shape``. Before version 1.6.0,
         this function accepted just one index value.
-    dims : tuple of ints
+    shape : tuple of ints
         The shape of the array to use for unraveling ``indices``.
+
+        .. versionchanged:: 1.16.0
+            Renamed from ``dims`` to ``shape``.
+
     order : {'C', 'F'}, optional
         Determines whether the indices should be viewed as indexing in
         row-major (C-style) or column-major (Fortran-style) order.
@@ -6791,211 +6064,6 @@ add_newdoc('numpy.core.multiarray', 'busdaycalendar', ('weekmask',
 add_newdoc('numpy.core.multiarray', 'busdaycalendar', ('holidays',
     """A copy of the holiday array indicating additional invalid days."""))
 
-add_newdoc('numpy.core.multiarray', 'is_busday',
-    """
-    is_busday(dates, weekmask='1111100', holidays=None, busdaycal=None, out=None)
-
-    Calculates which of the given dates are valid days, and which are not.
-
-    .. versionadded:: 1.7.0
-
-    Parameters
-    ----------
-    dates : array_like of datetime64[D]
-        The array of dates to process.
-    weekmask : str or array_like of bool, optional
-        A seven-element array indicating which of Monday through Sunday are
-        valid days. May be specified as a length-seven list or array, like
-        [1,1,1,1,1,0,0]; a length-seven string, like '1111100'; or a string
-        like "Mon Tue Wed Thu Fri", made up of 3-character abbreviations for
-        weekdays, optionally separated by white space. Valid abbreviations
-        are: Mon Tue Wed Thu Fri Sat Sun
-    holidays : array_like of datetime64[D], optional
-        An array of dates to consider as invalid dates.  They may be
-        specified in any order, and NaT (not-a-time) dates are ignored.
-        This list is saved in a normalized form that is suited for
-        fast calculations of valid days.
-    busdaycal : busdaycalendar, optional
-        A `busdaycalendar` object which specifies the valid days. If this
-        parameter is provided, neither weekmask nor holidays may be
-        provided.
-    out : array of bool, optional
-        If provided, this array is filled with the result.
-
-    Returns
-    -------
-    out : array of bool
-        An array with the same shape as ``dates``, containing True for
-        each valid day, and False for each invalid day.
-
-    See Also
-    --------
-    busdaycalendar: An object that specifies a custom set of valid days.
-    busday_offset : Applies an offset counted in valid days.
-    busday_count : Counts how many valid days are in a half-open date range.
-
-    Examples
-    --------
-    >>> # The weekdays are Friday, Saturday, and Monday
-    ... np.is_busday(['2011-07-01', '2011-07-02', '2011-07-18'],
-    ...                 holidays=['2011-07-01', '2011-07-04', '2011-07-17'])
-    array([False, False,  True], dtype='bool')
-    """)
-
-add_newdoc('numpy.core.multiarray', 'busday_offset',
-    """
-    busday_offset(dates, offsets, roll='raise', weekmask='1111100', holidays=None, busdaycal=None, out=None)
-
-    First adjusts the date to fall on a valid day according to
-    the ``roll`` rule, then applies offsets to the given dates
-    counted in valid days.
-
-    .. versionadded:: 1.7.0
-
-    Parameters
-    ----------
-    dates : array_like of datetime64[D]
-        The array of dates to process.
-    offsets : array_like of int
-        The array of offsets, which is broadcast with ``dates``.
-    roll : {'raise', 'nat', 'forward', 'following', 'backward', 'preceding', 'modifiedfollowing', 'modifiedpreceding'}, optional
-        How to treat dates that do not fall on a valid day. The default
-        is 'raise'.
-
-          * 'raise' means to raise an exception for an invalid day.
-          * 'nat' means to return a NaT (not-a-time) for an invalid day.
-          * 'forward' and 'following' mean to take the first valid day
-            later in time.
-          * 'backward' and 'preceding' mean to take the first valid day
-            earlier in time.
-          * 'modifiedfollowing' means to take the first valid day
-            later in time unless it is across a Month boundary, in which
-            case to take the first valid day earlier in time.
-          * 'modifiedpreceding' means to take the first valid day
-            earlier in time unless it is across a Month boundary, in which
-            case to take the first valid day later in time.
-    weekmask : str or array_like of bool, optional
-        A seven-element array indicating which of Monday through Sunday are
-        valid days. May be specified as a length-seven list or array, like
-        [1,1,1,1,1,0,0]; a length-seven string, like '1111100'; or a string
-        like "Mon Tue Wed Thu Fri", made up of 3-character abbreviations for
-        weekdays, optionally separated by white space. Valid abbreviations
-        are: Mon Tue Wed Thu Fri Sat Sun
-    holidays : array_like of datetime64[D], optional
-        An array of dates to consider as invalid dates.  They may be
-        specified in any order, and NaT (not-a-time) dates are ignored.
-        This list is saved in a normalized form that is suited for
-        fast calculations of valid days.
-    busdaycal : busdaycalendar, optional
-        A `busdaycalendar` object which specifies the valid days. If this
-        parameter is provided, neither weekmask nor holidays may be
-        provided.
-    out : array of datetime64[D], optional
-        If provided, this array is filled with the result.
-
-    Returns
-    -------
-    out : array of datetime64[D]
-        An array with a shape from broadcasting ``dates`` and ``offsets``
-        together, containing the dates with offsets applied.
-
-    See Also
-    --------
-    busdaycalendar: An object that specifies a custom set of valid days.
-    is_busday : Returns a boolean array indicating valid days.
-    busday_count : Counts how many valid days are in a half-open date range.
-
-    Examples
-    --------
-    >>> # First business day in October 2011 (not accounting for holidays)
-    ... np.busday_offset('2011-10', 0, roll='forward')
-    numpy.datetime64('2011-10-03','D')
-    >>> # Last business day in February 2012 (not accounting for holidays)
-    ... np.busday_offset('2012-03', -1, roll='forward')
-    numpy.datetime64('2012-02-29','D')
-    >>> # Third Wednesday in January 2011
-    ... np.busday_offset('2011-01', 2, roll='forward', weekmask='Wed')
-    numpy.datetime64('2011-01-19','D')
-    >>> # 2012 Mother's Day in Canada and the U.S.
-    ... np.busday_offset('2012-05', 1, roll='forward', weekmask='Sun')
-    numpy.datetime64('2012-05-13','D')
-
-    >>> # First business day on or after a date
-    ... np.busday_offset('2011-03-20', 0, roll='forward')
-    numpy.datetime64('2011-03-21','D')
-    >>> np.busday_offset('2011-03-22', 0, roll='forward')
-    numpy.datetime64('2011-03-22','D')
-    >>> # First business day after a date
-    ... np.busday_offset('2011-03-20', 1, roll='backward')
-    numpy.datetime64('2011-03-21','D')
-    >>> np.busday_offset('2011-03-22', 1, roll='backward')
-    numpy.datetime64('2011-03-23','D')
-    """)
-
-add_newdoc('numpy.core.multiarray', 'busday_count',
-    """
-    busday_count(begindates, enddates, weekmask='1111100', holidays=[], busdaycal=None, out=None)
-
-    Counts the number of valid days between `begindates` and
-    `enddates`, not including the day of `enddates`.
-
-    If ``enddates`` specifies a date value that is earlier than the
-    corresponding ``begindates`` date value, the count will be negative.
-
-    .. versionadded:: 1.7.0
-
-    Parameters
-    ----------
-    begindates : array_like of datetime64[D]
-        The array of the first dates for counting.
-    enddates : array_like of datetime64[D]
-        The array of the end dates for counting, which are excluded
-        from the count themselves.
-    weekmask : str or array_like of bool, optional
-        A seven-element array indicating which of Monday through Sunday are
-        valid days. May be specified as a length-seven list or array, like
-        [1,1,1,1,1,0,0]; a length-seven string, like '1111100'; or a string
-        like "Mon Tue Wed Thu Fri", made up of 3-character abbreviations for
-        weekdays, optionally separated by white space. Valid abbreviations
-        are: Mon Tue Wed Thu Fri Sat Sun
-    holidays : array_like of datetime64[D], optional
-        An array of dates to consider as invalid dates.  They may be
-        specified in any order, and NaT (not-a-time) dates are ignored.
-        This list is saved in a normalized form that is suited for
-        fast calculations of valid days.
-    busdaycal : busdaycalendar, optional
-        A `busdaycalendar` object which specifies the valid days. If this
-        parameter is provided, neither weekmask nor holidays may be
-        provided.
-    out : array of int, optional
-        If provided, this array is filled with the result.
-
-    Returns
-    -------
-    out : array of int
-        An array with a shape from broadcasting ``begindates`` and ``enddates``
-        together, containing the number of valid days between
-        the begin and end dates.
-
-    See Also
-    --------
-    busdaycalendar: An object that specifies a custom set of valid days.
-    is_busday : Returns a boolean array indicating valid days.
-    busday_offset : Applies an offset counted in valid days.
-
-    Examples
-    --------
-    >>> # Number of weekdays in January 2011
-    ... np.busday_count('2011-01', '2011-02')
-    21
-    >>> # Number of weekdays in 2011
-    ...  np.busday_count('2011', '2012')
-    260
-    >>> # Number of Saturdays in 2011
-    ... np.busday_count('2011', '2012', weekmask='Sat')
-    53
-    """)
-
 add_newdoc('numpy.core.multiarray', 'normalize_axis_index',
     """
     normalize_axis_index(axis, ndim, msg_prefix=None)
@@ -7047,67 +6115,6 @@ add_newdoc('numpy.core.multiarray', 'normalize_axis_index',
     AxisError: axes_arg: axis -4 is out of bounds for array of dimension 3
     """)
 
-add_newdoc('numpy.core.multiarray', 'datetime_as_string',
-    """
-    datetime_as_string(arr, unit=None, timezone='naive', casting='same_kind')
-
-    Convert an array of datetimes into an array of strings.
-
-    Parameters
-    ----------
-    arr : array_like of datetime64
-        The array of UTC timestamps to format.
-    unit : str
-        One of None, 'auto', or a :ref:`datetime unit <arrays.dtypes.dateunits>`.
-    timezone : {'naive', 'UTC', 'local'} or tzinfo
-        Timezone information to use when displaying the datetime. If 'UTC', end
-        with a Z to indicate UTC time. If 'local', convert to the local timezone
-        first, and suffix with a +-#### timezone offset. If a tzinfo object,
-        then do as with 'local', but use the specified timezone.
-    casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}
-        Casting to allow when changing between datetime units.
-
-    Returns
-    -------
-    str_arr : ndarray
-        An array of strings the same shape as `arr`.
-
-    Examples
-    --------
-    >>> d = np.arange('2002-10-27T04:30', 4*60, 60, dtype='M8[m]')
-    >>> d
-    array(['2002-10-27T04:30', '2002-10-27T05:30', '2002-10-27T06:30',
-           '2002-10-27T07:30'], dtype='datetime64[m]')
-
-    Setting the timezone to UTC shows the same information, but with a Z suffix
-
-    >>> np.datetime_as_string(d, timezone='UTC')
-    array(['2002-10-27T04:30Z', '2002-10-27T05:30Z', '2002-10-27T06:30Z',
-           '2002-10-27T07:30Z'], dtype='<U35')
-
-    Note that we picked datetimes that cross a DST boundary. Passing in a
-    ``pytz`` timezone object will print the appropriate offset
-
-    >>> np.datetime_as_string(d, timezone=pytz.timezone('US/Eastern'))
-    array(['2002-10-27T00:30-0400', '2002-10-27T01:30-0400',
-           '2002-10-27T01:30-0500', '2002-10-27T02:30-0500'], dtype='<U39')
-
-    Passing in a unit will change the precision
-
-    >>> np.datetime_as_string(d, unit='h')
-    array(['2002-10-27T04', '2002-10-27T05', '2002-10-27T06', '2002-10-27T07'],
-          dtype='<U32')
-    >>> np.datetime_as_string(d, unit='s')
-    array(['2002-10-27T04:30:00', '2002-10-27T05:30:00', '2002-10-27T06:30:00',
-           '2002-10-27T07:30:00'], dtype='<U38')
-
-    'casting' can be used to specify whether precision can be changed
-
-    >>> np.datetime_as_string(d, unit='h', casting='safe')
-    TypeError: Cannot create a datetime string as units 'h' from a NumPy
-    datetime with units 'm' according to the rule 'safe'
-    """)
-
 add_newdoc('numpy.core.multiarray', 'datetime_data',
     """
     datetime_data(dtype, /)
diff --git a/numpy/core/_dtype.py b/numpy/core/_dtype.py
index 26c44eaaf..3a12c8fad 100644
--- a/numpy/core/_dtype.py
+++ b/numpy/core/_dtype.py
@@ -5,9 +5,44 @@ String handling is much easier to do correctly in python.
 """
 from __future__ import division, absolute_import, print_function
 
+import sys
+
 import numpy as np
 
 
+_kind_to_stem = {
+    'u': 'uint',
+    'i': 'int',
+    'c': 'complex',
+    'f': 'float',
+    'b': 'bool',
+    'V': 'void',
+    'O': 'object',
+    'M': 'datetime',
+    'm': 'timedelta'
+}
+if sys.version_info[0] >= 3:
+    _kind_to_stem.update({
+        'S': 'bytes',
+        'U': 'str'
+    })
+else:
+    _kind_to_stem.update({
+        'S': 'string',
+        'U': 'unicode'
+    })
+
+
+def _kind_name(dtype):
+    try:
+        return _kind_to_stem[dtype.kind]
+    except KeyError:
+        raise RuntimeError(
+            "internal dtype error, unknown kind {!r}"
+            .format(dtype.kind)
+        )
+
+
 def __str__(dtype):
     if dtype.fields is not None:
         return _struct_str(dtype, include_align=True)
@@ -103,7 +138,9 @@ def _scalar_str(dtype, short):
         else:
             return "'%sU%d'" % (byteorder, dtype.itemsize / 4)
 
-    elif dtype.type == np.void:
+    # unlike the other types, subclasses of void are preserved - but
+    # historically the repr does not actually reveal the subclass
+    elif issubclass(dtype.type, np.void):
         if _isunsized(dtype):
             return "'V'"
         else:
@@ -122,20 +159,7 @@ def _scalar_str(dtype, short):
 
         # Longer repr, like 'float64'
         else:
-            kindstrs = {
-                'u': "uint",
-                'i': "int",
-                'f': "float",
-                'c': "complex"
-            }
-            try:
-                kindstr = kindstrs[dtype.kind]
-            except KeyError:
-                raise RuntimeError(
-                    "internal dtype repr error, unknown kind {!r}"
-                    .format(dtype.kind)
-                )
-            return "'%s%d'" % (kindstr, 8*dtype.itemsize)
+            return "'%s%d'" % (_kind_name(dtype), 8*dtype.itemsize)
 
     elif dtype.isbuiltin == 2:
         return dtype.type.__name__
diff --git a/numpy/core/_dtype_ctypes.py b/numpy/core/_dtype_ctypes.py
new file mode 100644
index 000000000..f10b4e99f
--- /dev/null
+++ b/numpy/core/_dtype_ctypes.py
@@ -0,0 +1,68 @@
+"""
+Conversion from ctypes to dtype.
+
+In an ideal world, we could acheive this through the PEP3118 buffer protocol,
+something like::
+
+    def dtype_from_ctypes_type(t):
+        # needed to ensure that the shape of `t` is within memoryview.format
+        class DummyStruct(ctypes.Structure):
+            _fields_ = [('a', t)]
+
+        # empty to avoid memory allocation
+        ctype_0 = (DummyStruct * 0)()
+        mv = memoryview(ctype_0)
+
+        # convert the struct, and slice back out the field
+        return _dtype_from_pep3118(mv.format)['a']
+
+Unfortunately, this fails because:
+
+* ctypes cannot handle length-0 arrays with PEP3118 (bpo-32782)
+* PEP3118 cannot represent unions, but both numpy and ctypes can
+* ctypes cannot handle big-endian structs with PEP3118 (bpo-32780)
+"""
+import _ctypes
+import ctypes
+
+import numpy as np
+
+
+def _from_ctypes_array(t):
+    return np.dtype((dtype_from_ctypes_type(t._type_), (t._length_,)))
+
+
+def _from_ctypes_structure(t):
+    # TODO: gh-10533, gh-10532
+    fields = []
+    for item in t._fields_:
+        if len(item) > 2:
+            raise TypeError(
+                "ctypes bitfields have no dtype equivalent")
+        fname, ftyp = item
+        fields.append((fname, dtype_from_ctypes_type(ftyp)))
+
+    # by default, ctypes structs are aligned
+    return np.dtype(fields, align=True)
+
+
+def dtype_from_ctypes_type(t):
+    """
+    Construct a dtype object from a ctypes type
+    """
+    if issubclass(t, _ctypes.Array):
+        return _from_ctypes_array(t)
+    elif issubclass(t, _ctypes._Pointer):
+        raise TypeError("ctypes pointers have no dtype equivalent")
+    elif issubclass(t, _ctypes.Structure):
+        return _from_ctypes_structure(t)
+    elif issubclass(t, _ctypes.Union):
+        # TODO
+        raise NotImplementedError(
+            "conversion from ctypes.Union types like {} to dtype"
+            .format(t.__name__))
+    elif isinstance(t._type_, str):
+        return np.dtype(t._type_)
+    else:
+        raise NotImplementedError(
+            "Unknown ctypes type {}".format(t.__name__))
diff --git a/numpy/core/_internal.py b/numpy/core/_internal.py
index c4d967dc2..30069f0ca 100644
--- a/numpy/core/_internal.py
+++ b/numpy/core/_internal.py
@@ -796,13 +796,13 @@ def _ufunc_doc_signature_formatter(ufunc):
     )
 
 
-def _is_from_ctypes(obj):
-    # determine if an object comes from ctypes, in order to work around
+def npy_ctypes_check(cls):
+    # determine if a class comes from ctypes, in order to work around
     # a bug in the buffer protocol for those objects, bpo-10746
     try:
         # ctypes class are new-style, so have an __mro__. This probably fails
         # for ctypes classes with multiple inheritance.
-        ctype_base = type(obj).__mro__[-2]
+        ctype_base = cls.__mro__[-2]
         # right now, they're part of the _ctypes module
         return 'ctypes' in ctype_base.__module__
     except Exception:
diff --git a/numpy/core/_type_aliases.py b/numpy/core/_type_aliases.py
index 8d629aa07..cce6c0425 100644
--- a/numpy/core/_type_aliases.py
+++ b/numpy/core/_type_aliases.py
@@ -29,6 +29,7 @@ from numpy.compat import unicode
 from numpy._globals import VisibleDeprecationWarning
 from numpy.core._string_helpers import english_lower, english_capitalize
 from numpy.core.multiarray import typeinfo, dtype
+from numpy.core._dtype import _kind_name
 
 
 sctypeDict = {}      # Contains all leaf-node scalar types with aliases
@@ -61,28 +62,6 @@ for k, v in typeinfo.items():
 
 _concrete_types = set(v.type for k, v in _concrete_typeinfo.items())
 
-_kind_to_stem = {
-    'u': 'uint',
-    'i': 'int',
-    'c': 'complex',
-    'f': 'float',
-    'b': 'bool',
-    'V': 'void',
-    'O': 'object',
-    'M': 'datetime',
-    'm': 'timedelta'
-}
-if sys.version_info[0] >= 3:
-    _kind_to_stem.update({
-        'S': 'bytes',
-        'U': 'str'
-    })
-else:
-    _kind_to_stem.update({
-        'S': 'string',
-        'U': 'unicode'
-    })
-
 
 def _bits_of(obj):
     try:
@@ -100,8 +79,9 @@ def _bits_of(obj):
 def bitname(obj):
     """Return a bit-width name for a given type object"""
     bits = _bits_of(obj)
-    char = dtype(obj).kind
-    base = _kind_to_stem[char]
+    dt = dtype(obj)
+    char = dt.kind
+    base = _kind_name(dt)
 
     if base == 'object':
         bits = 0
diff --git a/numpy/core/arrayprint.py b/numpy/core/arrayprint.py
index 960e64ca3..ccc1468c4 100644
--- a/numpy/core/arrayprint.py
+++ b/numpy/core/arrayprint.py
@@ -48,6 +48,7 @@ from .fromnumeric import ravel, any
 from .numeric import concatenate, asarray, errstate
 from .numerictypes import (longlong, intc, int_, float_, complex_, bool_,
                            flexible)
+from .overrides import array_function_dispatch
 import warnings
 import contextlib
 
@@ -496,6 +497,16 @@ def _array2string(a, options, separator=' ', prefix=""):
     return lst
 
 
+def _array2string_dispatcher(
+        a, max_line_width=None, precision=None,
+        suppress_small=None, separator=None, prefix=None,
+        style=None, formatter=None, threshold=None,
+        edgeitems=None, sign=None, floatmode=None, suffix=None,
+        **kwarg):
+    return (a,)
+
+
+@array_function_dispatch(_array2string_dispatcher, module='numpy')
 def array2string(a, max_line_width=None, precision=None,
                  suppress_small=None, separator=' ', prefix="",
                  style=np._NoValue, formatter=None, threshold=None,
@@ -1370,6 +1381,59 @@ def dtype_short_repr(dtype):
     return typename
 
 
+def _array_repr_implementation(
+        arr, max_line_width=None, precision=None, suppress_small=None,
+        array2string=array2string):
+    """Internal version of array_repr() that allows overriding array2string."""
+    if max_line_width is None:
+        max_line_width = _format_options['linewidth']
+
+    if type(arr) is not ndarray:
+        class_name = type(arr).__name__
+    else:
+        class_name = "array"
+
+    skipdtype = dtype_is_implied(arr.dtype) and arr.size > 0
+
+    prefix = class_name + "("
+    suffix = ")" if skipdtype else ","
+
+    if (_format_options['legacy'] == '1.13' and
+            arr.shape == () and not arr.dtype.names):
+        lst = repr(arr.item())
+    elif arr.size > 0 or arr.shape == (0,):
+        lst = array2string(arr, max_line_width, precision, suppress_small,
+                           ', ', prefix, suffix=suffix)
+    else:  # show zero-length shape unless it is (0,)
+        lst = "[], shape=%s" % (repr(arr.shape),)
+
+    arr_str = prefix + lst + suffix
+
+    if skipdtype:
+        return arr_str
+
+    dtype_str = "dtype={})".format(dtype_short_repr(arr.dtype))
+
+    # compute whether we should put dtype on a new line: Do so if adding the
+    # dtype would extend the last line past max_line_width.
+    # Note: This line gives the correct result even when rfind returns -1.
+    last_line_len = len(arr_str) - (arr_str.rfind('\n') + 1)
+    spacer = " "
+    if _format_options['legacy'] == '1.13':
+        if issubclass(arr.dtype.type, flexible):
+            spacer = '\n' + ' '*len(class_name + "(")
+    elif last_line_len + len(dtype_str) + 1 > max_line_width:
+        spacer = '\n' + ' '*len(class_name + "(")
+
+    return arr_str + spacer + dtype_str
+
+
+def _array_repr_dispatcher(
+        arr, max_line_width=None, precision=None, suppress_small=None):
+    return (arr,)
+
+
+@array_function_dispatch(_array_repr_dispatcher, module='numpy')
 def array_repr(arr, max_line_width=None, precision=None, suppress_small=None):
     """
     Return the string representation of an array.
@@ -1412,50 +1476,39 @@ def array_repr(arr, max_line_width=None, precision=None, suppress_small=None):
     'array([ 0.000001,  0.      ,  2.      ,  3.      ])'
 
     """
-    if max_line_width is None:
-        max_line_width = _format_options['linewidth']
+    return _array_repr_implementation(
+        arr, max_line_width, precision, suppress_small)
 
-    if type(arr) is not ndarray:
-        class_name = type(arr).__name__
-    else:
-        class_name = "array"
 
-    skipdtype = dtype_is_implied(arr.dtype) and arr.size > 0
+_guarded_str = _recursive_guard()(str)
 
-    prefix = class_name + "("
-    suffix = ")" if skipdtype else ","
 
+def _array_str_implementation(
+        a, max_line_width=None, precision=None, suppress_small=None,
+        array2string=array2string):
+    """Internal version of array_str() that allows overriding array2string."""
     if (_format_options['legacy'] == '1.13' and
-            arr.shape == () and not arr.dtype.names):
-        lst = repr(arr.item())
-    elif arr.size > 0 or arr.shape == (0,):
-        lst = array2string(arr, max_line_width, precision, suppress_small,
-                           ', ', prefix, suffix=suffix)
-    else:  # show zero-length shape unless it is (0,)
-        lst = "[], shape=%s" % (repr(arr.shape),)
-
-    arr_str = prefix + lst + suffix
+            a.shape == () and not a.dtype.names):
+        return str(a.item())
 
-    if skipdtype:
-        return arr_str
+    # the str of 0d arrays is a special case: It should appear like a scalar,
+    # so floats are not truncated by `precision`, and strings are not wrapped
+    # in quotes. So we return the str of the scalar value.
+    if a.shape == ():
+        # obtain a scalar and call str on it, avoiding problems for subclasses
+        # for which indexing with () returns a 0d instead of a scalar by using
+        # ndarray's getindex. Also guard against recursive 0d object arrays.
+        return _guarded_str(np.ndarray.__getitem__(a, ()))
 
-    dtype_str = "dtype={})".format(dtype_short_repr(arr.dtype))
+    return array2string(a, max_line_width, precision, suppress_small, ' ', "")
 
-    # compute whether we should put dtype on a new line: Do so if adding the
-    # dtype would extend the last line past max_line_width.
-    # Note: This line gives the correct result even when rfind returns -1.
-    last_line_len = len(arr_str) - (arr_str.rfind('\n') + 1)
-    spacer = " "
-    if _format_options['legacy'] == '1.13':
-        if issubclass(arr.dtype.type, flexible):
-            spacer = '\n' + ' '*len(class_name + "(")
-    elif last_line_len + len(dtype_str) + 1 > max_line_width:
-        spacer = '\n' + ' '*len(class_name + "(")
 
-    return arr_str + spacer + dtype_str
+def _array_str_dispatcher(
+        a, max_line_width=None, precision=None, suppress_small=None):
+    return (a,)
 
-_guarded_str = _recursive_guard()(str)
 
+@array_function_dispatch(_array_str_dispatcher, module='numpy')
 def array_str(a, max_line_width=None, precision=None, suppress_small=None):
     """
     Return a string representation of the data in an array.
@@ -1490,20 +1543,15 @@ def array_str(a, max_line_width=None, precision=None, suppress_small=None):
     '[0 1 2]'
 
     """
-    if (_format_options['legacy'] == '1.13' and
-            a.shape == () and not a.dtype.names):
-        return str(a.item())
+    return _array_str_implementation(
+        a, max_line_width, precision, suppress_small)
 
-    # the str of 0d arrays is a special case: It should appear like a scalar,
-    # so floats are not truncated by `precision`, and strings are not wrapped
-    # in quotes. So we return the str of the scalar value.
-    if a.shape == ():
-        # obtain a scalar and call str on it, avoiding problems for subclasses
-        # for which indexing with () returns a 0d instead of a scalar by using
-        # ndarray's getindex. Also guard against recursive 0d object arrays.
-        return _guarded_str(np.ndarray.__getitem__(a, ()))
 
-    return array2string(a, max_line_width, precision, suppress_small, ' ', "")
+_default_array_str = functools.partial(_array_str_implementation,
+                                       array2string=array2string.__wrapped__)
+_default_array_repr = functools.partial(_array_repr_implementation,
+                                        array2string=array2string.__wrapped__)
+
 
 def set_string_function(f, repr=True):
     """
@@ -1558,11 +1606,11 @@ def set_string_function(f, repr=True):
     """
     if f is None:
         if repr:
-            return multiarray.set_string_function(array_repr, 1)
+            return multiarray.set_string_function(_default_array_repr, 1)
         else:
-            return multiarray.set_string_function(array_str, 0)
+            return multiarray.set_string_function(_default_array_str, 0)
     else:
         return multiarray.set_string_function(f, repr)
 
-set_string_function(array_str, 0)
-set_string_function(array_repr, 1)
+set_string_function(_default_array_str, 0)
+set_string_function(_default_array_repr, 1)
diff --git a/numpy/core/code_generators/cversions.txt b/numpy/core/code_generators/cversions.txt
index 43c32eac6..c8b998bfc 100644
--- a/numpy/core/code_generators/cversions.txt
+++ b/numpy/core/code_generators/cversions.txt
@@ -43,3 +43,5 @@
 # PyArray_SetWritebackIfCopyBase and deprecated PyArray_SetUpdateIfCopyBase.
 0x0000000c = a1bc756c5782853ec2e3616cf66869d8
 
+# Version 13 (Numpy 1.16) Added fields core_dim_flags and core_dim_sizes to PyUFuncObject
+0x0000000d = a1bc756c5782853ec2e3616cf66869d8
diff --git a/numpy/core/code_generators/generate_umath.py b/numpy/core/code_generators/generate_umath.py
index 6dc01877b..199ad831b 100644
--- a/numpy/core/code_generators/generate_umath.py
+++ b/numpy/core/code_generators/generate_umath.py
@@ -791,8 +791,9 @@ defdict = {
 'remainder':
     Ufunc(2, 1, None,
           docstrings.get('numpy.core.umath.remainder'),
-          None,
+          'PyUFunc_RemainderTypeResolver',
           TD(intflt),
+          [TypeDescription('m', FullTypeDescr, 'mm', 'm')],
           TD(O, f='PyNumber_Remainder'),
           ),
 'divmod':
diff --git a/numpy/core/code_generators/ufunc_docstrings.py b/numpy/core/code_generators/ufunc_docstrings.py
index 6e5cb25af..13231de29 100644
--- a/numpy/core/code_generators/ufunc_docstrings.py
+++ b/numpy/core/code_generators/ufunc_docstrings.py
@@ -2594,8 +2594,7 @@ add_newdoc('numpy.core.umath', 'multiply',
     Returns
     -------
     y : ndarray
-        The product of `x1` and `x2`, element-wise. Returns a scalar if
-        both `x1` and `x2` are scalars.
+        The product of `x1` and `x2`, element-wise.
         $OUT_SCALAR_2
 
     Notes
diff --git a/numpy/core/defchararray.py b/numpy/core/defchararray.py
index 6d0a0add5..e86086012 100644
--- a/numpy/core/defchararray.py
+++ b/numpy/core/defchararray.py
@@ -17,11 +17,13 @@ The preferred alias for `defchararray` is `numpy.char`.
 """
 from __future__ import division, absolute_import, print_function
 
+import functools
 import sys
 from .numerictypes import string_, unicode_, integer, object_, bool_, character
 from .numeric import ndarray, compare_chararrays
 from .numeric import array as narray
 from numpy.core.multiarray import _vec_string
+from numpy.core import overrides
 from numpy.compat import asbytes, long
 import numpy
 
@@ -47,6 +49,10 @@ else:
     _bytes = str
 _len = len
 
+array_function_dispatch = functools.partial(
+    overrides.array_function_dispatch, module='numpy.char')
+
+
 def _use_unicode(*args):
     """
     Helper function for determining the output type of some string
@@ -95,6 +101,11 @@ def _get_num_chars(a):
     return a.itemsize
 
 
+def _binary_op_dispatcher(x1, x2):
+    return (x1, x2)
+
+
+@array_function_dispatch(_binary_op_dispatcher)
 def equal(x1, x2):
     """
     Return (x1 == x2) element-wise.
@@ -119,6 +130,8 @@ def equal(x1, x2):
     """
     return compare_chararrays(x1, x2, '==', True)
 
+
+@array_function_dispatch(_binary_op_dispatcher)
 def not_equal(x1, x2):
     """
     Return (x1 != x2) element-wise.
@@ -143,6 +156,8 @@ def not_equal(x1, x2):
     """
     return compare_chararrays(x1, x2, '!=', True)
 
+
+@array_function_dispatch(_binary_op_dispatcher)
 def greater_equal(x1, x2):
     """
     Return (x1 >= x2) element-wise.
@@ -168,6 +183,8 @@ def greater_equal(x1, x2):
     """
     return compare_chararrays(x1, x2, '>=', True)
 
+
+@array_function_dispatch(_binary_op_dispatcher)
 def less_equal(x1, x2):
     """
     Return (x1 <= x2) element-wise.
@@ -192,6 +209,8 @@ def less_equal(x1, x2):
     """
     return compare_chararrays(x1, x2, '<=', True)
 
+
+@array_function_dispatch(_binary_op_dispatcher)
 def greater(x1, x2):
     """
     Return (x1 > x2) element-wise.
@@ -216,6 +235,8 @@ def greater(x1, x2):
     """
     return compare_chararrays(x1, x2, '>', True)
 
+
+@array_function_dispatch(_binary_op_dispatcher)
 def less(x1, x2):
     """
     Return (x1 < x2) element-wise.
@@ -240,6 +261,12 @@ def less(x1, x2):
     """
     return compare_chararrays(x1, x2, '<', True)
 
+
+def _unary_op_dispatcher(a):
+    return (a,)
+
+
+@array_function_dispatch(_unary_op_dispatcher)
 def str_len(a):
     """
     Return len(a) element-wise.
@@ -259,6 +286,8 @@ def str_len(a):
     """
     return _vec_string(a, integer, '__len__')
 
+
+@array_function_dispatch(_binary_op_dispatcher)
 def add(x1, x2):
     """
     Return element-wise string concatenation for two arrays of str or unicode.
@@ -285,6 +314,12 @@ def add(x1, x2):
     dtype = _use_unicode(arr1, arr2)
     return _vec_string(arr1, (dtype, out_size), '__add__', (arr2,))
 
+
+def _multiply_dispatcher(a, i):
+    return (a,)
+
+
+@array_function_dispatch(_multiply_dispatcher)
 def multiply(a, i):
     """
     Return (a * i), that is string multiple concatenation,
@@ -313,6 +348,12 @@ def multiply(a, i):
     return _vec_string(
         a_arr, (a_arr.dtype.type, out_size), '__mul__', (i_arr,))
 
+
+def _mod_dispatcher(a, values):
+    return (a, values)
+
+
+@array_function_dispatch(_mod_dispatcher)
 def mod(a, values):
     """
     Return (a % i), that is pre-Python 2.6 string formatting
@@ -339,6 +380,8 @@ def mod(a, values):
     return _to_string_or_unicode_array(
         _vec_string(a, object_, '__mod__', (values,)))
 
+
+@array_function_dispatch(_unary_op_dispatcher)
 def capitalize(a):
     """
     Return a copy of `a` with only the first character of each element
@@ -377,6 +420,11 @@ def capitalize(a):
     return _vec_string(a_arr, a_arr.dtype, 'capitalize')
 
 
+def _center_dispatcher(a, width, fillchar=None):
+    return (a,)
+
+
+@array_function_dispatch(_center_dispatcher)
 def center(a, width, fillchar=' '):
     """
     Return a copy of `a` with its elements centered in a string of
@@ -413,6 +461,11 @@ def center(a, width, fillchar=' '):
         a_arr, (a_arr.dtype.type, size), 'center', (width_arr, fillchar))
 
 
+def _count_dispatcher(a, sub, start=None, end=None):
+    return (a,)
+
+
+@array_function_dispatch(_count_dispatcher)
 def count(a, sub, start=0, end=None):
     """
     Returns an array with the number of non-overlapping occurrences of
@@ -459,6 +512,11 @@ def count(a, sub, start=0, end=None):
     return _vec_string(a, integer, 'count', [sub, start] + _clean_args(end))
 
 
+def _code_dispatcher(a, encoding=None, errors=None):
+    return (a,)
+
+
+@array_function_dispatch(_code_dispatcher)
 def decode(a, encoding=None, errors=None):
     """
     Calls `str.decode` element-wise.
@@ -505,6 +563,7 @@ def decode(a, encoding=None, errors=None):
         _vec_string(a, object_, 'decode', _clean_args(encoding, errors)))
 
 
+@array_function_dispatch(_code_dispatcher)
 def encode(a, encoding=None, errors=None):
     """
     Calls `str.encode` element-wise.
@@ -540,6 +599,11 @@ def encode(a, encoding=None, errors=None):
         _vec_string(a, object_, 'encode', _clean_args(encoding, errors)))
 
 
+def _endswith_dispatcher(a, suffix, start=None, end=None):
+    return (a,)
+
+
+@array_function_dispatch(_endswith_dispatcher)
 def endswith(a, suffix, start=0, end=None):
     """
     Returns a boolean array which is `True` where the string element
@@ -584,6 +648,11 @@ def endswith(a, suffix, start=0, end=None):
         a, bool_, 'endswith', [suffix, start] + _clean_args(end))
 
 
+def _expandtabs_dispatcher(a, tabsize=None):
+    return (a,)
+
+
+@array_function_dispatch(_expandtabs_dispatcher)
 def expandtabs(a, tabsize=8):
     """
     Return a copy of each string element where all tab characters are
@@ -619,6 +688,7 @@ def expandtabs(a, tabsize=8):
         _vec_string(a, object_, 'expandtabs', (tabsize,)))
 
 
+@array_function_dispatch(_count_dispatcher)
 def find(a, sub, start=0, end=None):
     """
     For each element, return the lowest index in the string where
@@ -654,6 +724,7 @@ def find(a, sub, start=0, end=None):
         a, integer, 'find', [sub, start] + _clean_args(end))
 
 
+@array_function_dispatch(_count_dispatcher)
 def index(a, sub, start=0, end=None):
     """
     Like `find`, but raises `ValueError` when the substring is not found.
@@ -681,6 +752,8 @@ def index(a, sub, start=0, end=None):
     return _vec_string(
         a, integer, 'index', [sub, start] + _clean_args(end))
 
+
+@array_function_dispatch(_unary_op_dispatcher)
 def isalnum(a):
     """
     Returns true for each element if all characters in the string are
@@ -705,6 +778,8 @@ def isalnum(a):
     """
     return _vec_string(a, bool_, 'isalnum')
 
+
+@array_function_dispatch(_unary_op_dispatcher)
 def isalpha(a):
     """
     Returns true for each element if all characters in the string are
@@ -729,6 +804,8 @@ def isalpha(a):
     """
     return _vec_string(a, bool_, 'isalpha')
 
+
+@array_function_dispatch(_unary_op_dispatcher)
 def isdigit(a):
     """
     Returns true for each element if all characters in the string are
@@ -753,6 +830,8 @@ def isdigit(a):
     """
     return _vec_string(a, bool_, 'isdigit')
 
+
+@array_function_dispatch(_unary_op_dispatcher)
 def islower(a):
     """
     Returns true for each element if all cased characters in the
@@ -778,6 +857,8 @@ def islower(a):
     """
     return _vec_string(a, bool_, 'islower')
 
+
+@array_function_dispatch(_unary_op_dispatcher)
 def isspace(a):
     """
     Returns true for each element if there are only whitespace
@@ -803,6 +884,8 @@ def isspace(a):
     """
     return _vec_string(a, bool_, 'isspace')
 
+
+@array_function_dispatch(_unary_op_dispatcher)
 def istitle(a):
     """
     Returns true for each element if the element is a titlecased
@@ -827,6 +910,8 @@ def istitle(a):
     """
     return _vec_string(a, bool_, 'istitle')
 
+
+@array_function_dispatch(_unary_op_dispatcher)
 def isupper(a):
     """
     Returns true for each element if all cased characters in the
@@ -852,6 +937,12 @@ def isupper(a):
     """
     return _vec_string(a, bool_, 'isupper')
 
+
+def _join_dispatcher(sep, seq):
+    return (sep, seq)
+
+
+@array_function_dispatch(_join_dispatcher)
 def join(sep, seq):
     """
     Return a string which is the concatenation of the strings in the
@@ -877,6 +968,12 @@ def join(sep, seq):
         _vec_string(sep, object_, 'join', (seq,)))
 
 
+
+def _just_dispatcher(a, width, fillchar=None):
+    return (a,)
+
+
+@array_function_dispatch(_just_dispatcher)
 def ljust(a, width, fillchar=' '):
     """
     Return an array with the elements of `a` left-justified in a
@@ -912,6 +1009,7 @@ def ljust(a, width, fillchar=' '):
         a_arr, (a_arr.dtype.type, size), 'ljust', (width_arr, fillchar))
 
 
+@array_function_dispatch(_unary_op_dispatcher)
 def lower(a):
     """
     Return an array with the elements converted to lowercase.
@@ -948,6 +1046,11 @@ def lower(a):
     return _vec_string(a_arr, a_arr.dtype, 'lower')
 
 
+def _strip_dispatcher(a, chars=None):
+    return (a,)
+
+
+@array_function_dispatch(_strip_dispatcher)
 def lstrip(a, chars=None):
     """
     For each element in `a`, return a copy with the leading characters
@@ -1005,6 +1108,11 @@ def lstrip(a, chars=None):
     return _vec_string(a_arr, a_arr.dtype, 'lstrip', (chars,))
 
 
+def _partition_dispatcher(a, sep):
+    return (a,)
+
+
+@array_function_dispatch(_partition_dispatcher)
 def partition(a, sep):
     """
     Partition each element in `a` around `sep`.
@@ -1040,6 +1148,11 @@ def partition(a, sep):
         _vec_string(a, object_, 'partition', (sep,)))
 
 
+def _replace_dispatcher(a, old, new, count=None):
+    return (a,)
+
+
+@array_function_dispatch(_replace_dispatcher)
 def replace(a, old, new, count=None):
     """
     For each element in `a`, return a copy of the string with all
@@ -1072,6 +1185,7 @@ def replace(a, old, new, count=None):
             a, object_, 'replace', [old, new] + _clean_args(count)))
 
 
+@array_function_dispatch(_count_dispatcher)
 def rfind(a, sub, start=0, end=None):
     """
     For each element in `a`, return the highest index in the string
@@ -1104,6 +1218,7 @@ def rfind(a, sub, start=0, end=None):
         a, integer, 'rfind', [sub, start] + _clean_args(end))
 
 
+@array_function_dispatch(_count_dispatcher)
 def rindex(a, sub, start=0, end=None):
     """
     Like `rfind`, but raises `ValueError` when the substring `sub` is
@@ -1133,6 +1248,7 @@ def rindex(a, sub, start=0, end=None):
         a, integer, 'rindex', [sub, start] + _clean_args(end))
 
 
+@array_function_dispatch(_just_dispatcher)
 def rjust(a, width, fillchar=' '):
     """
     Return an array with the elements of `a` right-justified in a
@@ -1168,6 +1284,7 @@ def rjust(a, width, fillchar=' '):
         a_arr, (a_arr.dtype.type, size), 'rjust', (width_arr, fillchar))
 
 
+@array_function_dispatch(_partition_dispatcher)
 def rpartition(a, sep):
     """
     Partition (split) each element around the right-most separator.
@@ -1203,6 +1320,11 @@ def rpartition(a, sep):
         _vec_string(a, object_, 'rpartition', (sep,)))
 
 
+def _split_dispatcher(a, sep=None, maxsplit=None):
+    return (a,)
+
+
+@array_function_dispatch(_split_dispatcher)
 def rsplit(a, sep=None, maxsplit=None):
     """
     For each element in `a`, return a list of the words in the
@@ -1240,6 +1362,11 @@ def rsplit(a, sep=None, maxsplit=None):
         a, object_, 'rsplit', [sep] + _clean_args(maxsplit))
 
 
+def _strip_dispatcher(a, chars=None):
+    return (a,)
+
+
+@array_function_dispatch(_strip_dispatcher)
 def rstrip(a, chars=None):
     """
     For each element in `a`, return a copy with the trailing
@@ -1284,6 +1411,7 @@ def rstrip(a, chars=None):
     return _vec_string(a_arr, a_arr.dtype, 'rstrip', (chars,))
 
 
+@array_function_dispatch(_split_dispatcher)
 def split(a, sep=None, maxsplit=None):
     """
     For each element in `a`, return a list of the words in the
@@ -1318,6 +1446,11 @@ def split(a, sep=None, maxsplit=None):
         a, object_, 'split', [sep] + _clean_args(maxsplit))
 
 
+def _splitlines_dispatcher(a, keepends=None):
+    return (a,)
+
+
+@array_function_dispatch(_splitlines_dispatcher)
 def splitlines(a, keepends=None):
     """
     For each element in `a`, return a list of the lines in the
@@ -1347,6 +1480,11 @@ def splitlines(a, keepends=None):
         a, object_, 'splitlines', _clean_args(keepends))
 
 
+def _startswith_dispatcher(a, prefix, start=None, end=None):
+    return (a,)
+
+
+@array_function_dispatch(_startswith_dispatcher)
 def startswith(a, prefix, start=0, end=None):
     """
     Returns a boolean array which is `True` where the string element
@@ -1378,6 +1516,7 @@ def startswith(a, prefix, start=0, end=None):
         a, bool_, 'startswith', [prefix, start] + _clean_args(end))
 
 
+@array_function_dispatch(_strip_dispatcher)
 def strip(a, chars=None):
     """
     For each element in `a`, return a copy with the leading and
@@ -1426,6 +1565,7 @@ def strip(a, chars=None):
     return _vec_string(a_arr, a_arr.dtype, 'strip', _clean_args(chars))
 
 
+@array_function_dispatch(_unary_op_dispatcher)
 def swapcase(a):
     """
     Return element-wise a copy of the string with
@@ -1463,6 +1603,7 @@ def swapcase(a):
     return _vec_string(a_arr, a_arr.dtype, 'swapcase')
 
 
+@array_function_dispatch(_unary_op_dispatcher)
 def title(a):
     """
     Return element-wise title cased version of string or unicode.
@@ -1502,6 +1643,11 @@ def title(a):
     return _vec_string(a_arr, a_arr.dtype, 'title')
 
 
+def _translate_dispatcher(a, table, deletechars=None):
+    return (a,)
+
+
+@array_function_dispatch(_translate_dispatcher)
 def translate(a, table, deletechars=None):
     """
     For each element in `a`, return a copy of the string where all
@@ -1538,6 +1684,7 @@ def translate(a, table, deletechars=None):
             a_arr, a_arr.dtype, 'translate', [table] + _clean_args(deletechars))
 
 
+@array_function_dispatch(_unary_op_dispatcher)
 def upper(a):
     """
     Return an array with the elements converted to uppercase.
@@ -1574,6 +1721,11 @@ def upper(a):
     return _vec_string(a_arr, a_arr.dtype, 'upper')
 
 
+def _zfill_dispatcher(a, width):
+    return (a,)
+
+
+@array_function_dispatch(_zfill_dispatcher)
 def zfill(a, width):
     """
     Return the numeric string left-filled with zeros
@@ -1604,6 +1756,7 @@ def zfill(a, width):
         a_arr, (a_arr.dtype.type, size), 'zfill', (width_arr,))
 
 
+@array_function_dispatch(_unary_op_dispatcher)
 def isnumeric(a):
     """
     For each element, return True if there are only numeric
@@ -1635,6 +1788,7 @@ def isnumeric(a):
     return _vec_string(a, bool_, 'isnumeric')
 
 
+@array_function_dispatch(_unary_op_dispatcher)
 def isdecimal(a):
     """
     For each element, return True if there are only decimal
diff --git a/numpy/core/einsumfunc.py b/numpy/core/einsumfunc.py
index 1281b3c98..d9f88cb1c 100644
--- a/numpy/core/einsumfunc.py
+++ b/numpy/core/einsumfunc.py
@@ -9,6 +9,7 @@ import itertools
 from numpy.compat import basestring
 from numpy.core.multiarray import c_einsum
 from numpy.core.numeric import asanyarray, tensordot
+from numpy.core.overrides import array_function_dispatch
 
 __all__ = ['einsum', 'einsum_path']
 
@@ -689,6 +690,17 @@ def _parse_einsum_input(operands):
     return (input_subscripts, output_subscript, operands)
 
 
+def _einsum_path_dispatcher(*operands, **kwargs):
+    # NOTE: technically, we should only dispatch on array-like arguments, not
+    # subscripts (given as strings). But separating operands into
+    # arrays/subscripts is a little tricky/slow (given einsum's two supported
+    # signatures), so as a practical shortcut we dispatch on everything.
+    # Strings will be ignored for dispatching since they don't define
+    # __array_function__.
+    return operands
+
+
+@array_function_dispatch(_einsum_path_dispatcher, module='numpy')
 def einsum_path(*operands, **kwargs):
     """
     einsum_path(subscripts, *operands, optimize='greedy')
@@ -980,7 +992,16 @@ def einsum_path(*operands, **kwargs):
     return (path, path_print)
 
 
+def _einsum_dispatcher(*operands, **kwargs):
+    # Arguably we dispatch on more arguments that we really should; see note in
+    # _einsum_path_dispatcher for why.
+    for op in operands:
+        yield op
+    yield kwargs.get('out')
+
+
 # Rewrite einsum to handle different cases
+@array_function_dispatch(_einsum_dispatcher, module='numpy')
 def einsum(*operands, **kwargs):
     """
     einsum(subscripts, *operands, out=None, dtype=None, order='K',
diff --git a/numpy/core/fromnumeric.py b/numpy/core/fromnumeric.py
index b9cc98cae..7dfb52fea 100644
--- a/numpy/core/fromnumeric.py
+++ b/numpy/core/fromnumeric.py
@@ -3,12 +3,14 @@
 """
 from __future__ import division, absolute_import, print_function
 
+import functools
 import types
 import warnings
 
 import numpy as np
 from .. import VisibleDeprecationWarning
 from . import multiarray as mu
+from . import overrides
 from . import umath as um
 from . import numerictypes as nt
 from .numeric import asarray, array, asanyarray, concatenate
@@ -31,6 +33,9 @@ _gentype = types.GeneratorType
 # save away Python sum
 _sum_ = sum
 
+array_function_dispatch = functools.partial(
+    overrides.array_function_dispatch, module='numpy')
+
 
 # functions that are now methods
 def _wrapit(obj, method, *args, **kwds):
@@ -83,6 +88,11 @@ def _wrapreduction(obj, ufunc, method, axis, dtype, out, **kwargs):
     return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
 
 
+def _take_dispatcher(a, indices, axis=None, out=None, mode=None):
+    return (a, out)
+
+
+@array_function_dispatch(_take_dispatcher)
 def take(a, indices, axis=None, out=None, mode='raise'):
     """
     Take elements from an array along an axis.
@@ -181,7 +191,12 @@ def take(a, indices, axis=None, out=None, mode='raise'):
     return _wrapfunc(a, 'take', indices, axis=axis, out=out, mode=mode)
 
 
+def _reshape_dispatcher(a, newshape, order=None):
+    return (a,)
+
+
 # not deprecated --- copy if necessary, view otherwise
+@array_function_dispatch(_reshape_dispatcher)
 def reshape(a, newshape, order='C'):
     """
     Gives a new shape to an array without changing its data.
@@ -279,6 +294,14 @@ def reshape(a, newshape, order='C'):
     return _wrapfunc(a, 'reshape', newshape, order=order)
 
 
+def _choose_dispatcher(a, choices, out=None, mode=None):
+    yield a
+    for c in choices:
+        yield c
+    yield out
+
+
+@array_function_dispatch(_choose_dispatcher)
 def choose(a, choices, out=None, mode='raise'):
     """
     Construct an array from an index array and a set of arrays to choose from.
@@ -401,6 +424,11 @@ def choose(a, choices, out=None, mode='raise'):
     return _wrapfunc(a, 'choose', choices, out=out, mode=mode)
 
 
+def _repeat_dispatcher(a, repeats, axis=None):
+    return (a,)
+
+
+@array_function_dispatch(_repeat_dispatcher)
 def repeat(a, repeats, axis=None):
     """
     Repeat elements of an array.
@@ -445,6 +473,11 @@ def repeat(a, repeats, axis=None):
     return _wrapfunc(a, 'repeat', repeats, axis=axis)
 
 
+def _put_dispatcher(a, ind, v, mode=None):
+    return (a, ind, v)
+
+
+@array_function_dispatch(_put_dispatcher)
 def put(a, ind, v, mode='raise'):
     """
     Replaces specified elements of an array with given values.
@@ -503,6 +536,11 @@ def put(a, ind, v, mode='raise'):
     return put(ind, v, mode=mode)
 
 
+def _swapaxes_dispatcher(a, axis1, axis2):
+    return (a,)
+
+
+@array_function_dispatch(_swapaxes_dispatcher)
 def swapaxes(a, axis1, axis2):
     """
     Interchange two axes of an array.
@@ -549,6 +587,11 @@ def swapaxes(a, axis1, axis2):
     return _wrapfunc(a, 'swapaxes', axis1, axis2)
 
 
+def _transpose_dispatcher(a, axes=None):
+    return (a,)
+
+
+@array_function_dispatch(_transpose_dispatcher)
 def transpose(a, axes=None):
     """
     Permute the dimensions of an array.
@@ -598,6 +641,11 @@ def transpose(a, axes=None):
     return _wrapfunc(a, 'transpose', axes)
 
 
+def _partition_dispatcher(a, kth, axis=None, kind=None, order=None):
+    return (a,)
+
+
+@array_function_dispatch(_partition_dispatcher)
 def partition(a, kth, axis=-1, kind='introselect', order=None):
     """
     Return a partitioned copy of an array.
@@ -689,6 +737,11 @@ def partition(a, kth, axis=-1, kind='introselect', order=None):
     return a
 
 
+def _argpartition_dispatcher(a, kth, axis=None, kind=None, order=None):
+    return (a,)
+
+
+@array_function_dispatch(_argpartition_dispatcher)
 def argpartition(a, kth, axis=-1, kind='introselect', order=None):
     """
     Perform an indirect partition along the given axis using the
@@ -757,6 +810,11 @@ def argpartition(a, kth, axis=-1, kind='introselect', order=None):
     return _wrapfunc(a, 'argpartition', kth, axis=axis, kind=kind, order=order)
 
 
+def _sort_dispatcher(a, axis=None, kind=None, order=None):
+    return (a,)
+
+
+@array_function_dispatch(_sort_dispatcher)
 def sort(a, axis=-1, kind='quicksort', order=None):
     """
     Return a sorted copy of an array.
@@ -879,6 +937,11 @@ def sort(a, axis=-1, kind='quicksort', order=None):
     return a
 
 
+def _argsort_dispatcher(a, axis=None, kind=None, order=None):
+    return (a,)
+
+
+@array_function_dispatch(_argsort_dispatcher)
 def argsort(a, axis=-1, kind='quicksort', order=None):
     """
     Returns the indices that would sort an array.
@@ -973,6 +1036,11 @@ def argsort(a, axis=-1, kind='quicksort', order=None):
     return _wrapfunc(a, 'argsort', axis=axis, kind=kind, order=order)
 
 
+def _argmax_dispatcher(a, axis=None, out=None):
+    return (a, out)
+
+
+@array_function_dispatch(_argmax_dispatcher)
 def argmax(a, axis=None, out=None):
     """
     Returns the indices of the maximum values along an axis.
@@ -1007,10 +1075,10 @@ def argmax(a, axis=None, out=None):
 
     Examples
     --------
-    >>> a = np.arange(6).reshape(2,3)
+    >>> a = np.arange(6).reshape(2,3) + 10
     >>> a
-    array([[0, 1, 2],
-           [3, 4, 5]])
+    array([[10, 11, 12],
+           [13, 14, 15]])
     >>> np.argmax(a)
     5
     >>> np.argmax(a, axis=0)
@@ -1024,7 +1092,7 @@ def argmax(a, axis=None, out=None):
     >>> ind
     (1, 2)
     >>> a[ind]
-    5
+    15
 
     >>> b = np.arange(6)
     >>> b[1] = 5
@@ -1037,6 +1105,11 @@ def argmax(a, axis=None, out=None):
     return _wrapfunc(a, 'argmax', axis=axis, out=out)
 
 
+def _argmin_dispatcher(a, axis=None, out=None):
+    return (a, out)
+
+
+@array_function_dispatch(_argmin_dispatcher)
 def argmin(a, axis=None, out=None):
     """
     Returns the indices of the minimum values along an axis.
@@ -1071,10 +1144,10 @@ def argmin(a, axis=None, out=None):
 
     Examples
     --------
-    >>> a = np.arange(6).reshape(2,3)
+    >>> a = np.arange(6).reshape(2,3) + 10
     >>> a
-    array([[0, 1, 2],
-           [3, 4, 5]])
+    array([[10, 11, 12],
+           [13, 14, 15]])
     >>> np.argmin(a)
     0
     >>> np.argmin(a, axis=0)
@@ -1088,12 +1161,12 @@ def argmin(a, axis=None, out=None):
     >>> ind
     (0, 0)
     >>> a[ind]
-    0
+    10
 
-    >>> b = np.arange(6)
-    >>> b[4] = 0
+    >>> b = np.arange(6) + 10
+    >>> b[4] = 10
     >>> b
-    array([0, 1, 2, 3, 0, 5])
+    array([10, 11, 12, 13, 10, 15])
     >>> np.argmin(b)  # Only the first occurrence is returned.
     0
 
@@ -1101,6 +1174,11 @@ def argmin(a, axis=None, out=None):
     return _wrapfunc(a, 'argmin', axis=axis, out=out)
 
 
+def _searchsorted_dispatcher(a, v, side=None, sorter=None):
+    return (a, v, sorter)
+
+
+@array_function_dispatch(_searchsorted_dispatcher)
 def searchsorted(a, v, side='left', sorter=None):
     """
     Find indices where elements should be inserted to maintain order.
@@ -1170,6 +1248,11 @@ def searchsorted(a, v, side='left', sorter=None):
     return _wrapfunc(a, 'searchsorted', v, side=side, sorter=sorter)
 
 
+def _resize_dispatcher(a, new_shape):
+    return (a,)
+
+
+@array_function_dispatch(_resize_dispatcher)
 def resize(a, new_shape):
     """
     Return a new array with the specified shape.
@@ -1243,6 +1326,11 @@ def resize(a, new_shape):
     return reshape(a, new_shape)
 
 
+def _squeeze_dispatcher(a, axis=None):
+    return (a,)
+
+
+@array_function_dispatch(_squeeze_dispatcher)
 def squeeze(a, axis=None):
     """
     Remove single-dimensional entries from the shape of an array.
@@ -1301,6 +1389,12 @@ def squeeze(a, axis=None):
     else:
         return squeeze(axis=axis)
 
+
+def _diagonal_dispatcher(a, offset=None, axis1=None, axis2=None):
+    return (a,)
+
+
+@array_function_dispatch(_diagonal_dispatcher)
 def diagonal(a, offset=0, axis1=0, axis2=1):
     """
     Return specified diagonals.
@@ -1415,6 +1509,12 @@ def diagonal(a, offset=0, axis1=0, axis2=1):
         return asanyarray(a).diagonal(offset=offset, axis1=axis1, axis2=axis2)
 
 
+def _trace_dispatcher(
+        a, offset=None, axis1=None, axis2=None, dtype=None, out=None):
+    return (a, out)
+
+
+@array_function_dispatch(_trace_dispatcher)
 def trace(a, offset=0, axis1=0, axis2=1, dtype=None, out=None):
     """
     Return the sum along diagonals of the array.
@@ -1478,6 +1578,11 @@ def trace(a, offset=0, axis1=0, axis2=1, dtype=None, out=None):
         return asanyarray(a).trace(offset=offset, axis1=axis1, axis2=axis2, dtype=dtype, out=out)
 
 
+def _ravel_dispatcher(a, order=None):
+    return (a,)
+
+
+@array_function_dispatch(_ravel_dispatcher)
 def ravel(a, order='C'):
     """Return a contiguous flattened array.
 
@@ -1584,6 +1689,11 @@ def ravel(a, order='C'):
         return asanyarray(a).ravel(order=order)
 
 
+def _nonzero_dispatcher(a):
+    return (a,)
+
+
+@array_function_dispatch(_nonzero_dispatcher)
 def nonzero(a):
     """
     Return the indices of the elements that are non-zero.
@@ -1670,6 +1780,11 @@ def nonzero(a):
     return _wrapfunc(a, 'nonzero')
 
 
+def _shape_dispatcher(a):
+    return (a,)
+
+
+@array_function_dispatch(_shape_dispatcher)
 def shape(a):
     """
     Return the shape of an array.
@@ -1715,6 +1830,11 @@ def shape(a):
     return result
 
 
+def _compress_dispatcher(condition, a, axis=None, out=None):
+    return (condition, a, out)
+
+
+@array_function_dispatch(_compress_dispatcher)
 def compress(condition, a, axis=None, out=None):
     """
     Return selected slices of an array along given axis.
@@ -1778,6 +1898,11 @@ def compress(condition, a, axis=None, out=None):
     return _wrapfunc(a, 'compress', condition, axis=axis, out=out)
 
 
+def _clip_dispatcher(a, a_min, a_max, out=None):
+    return (a, a_min, a_max)
+
+
+@array_function_dispatch(_clip_dispatcher)
 def clip(a, a_min, a_max, out=None):
     """
     Clip (limit) the values in an array.
@@ -1835,6 +1960,12 @@ def clip(a, a_min, a_max, out=None):
     return _wrapfunc(a, 'clip', a_min, a_max, out=out)
 
 
+def _sum_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None,
+                    initial=None):
+    return (a, out)
+
+
+@array_function_dispatch(_sum_dispatcher)
 def sum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue, initial=np._NoValue):
     """
     Sum of array elements over a given axis.
@@ -1934,7 +2065,7 @@ def sum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue, initial=np._No
         # 2018-02-25, 1.15.0
         warnings.warn(
             "Calling np.sum(generator) is deprecated, and in the future will give a different result. "
-            "Use np.sum(np.from_iter(generator)) or the python sum builtin instead.",
+            "Use np.sum(np.fromiter(generator)) or the python sum builtin instead.",
             DeprecationWarning, stacklevel=2)
 
         res = _sum_(a)
@@ -1947,6 +2078,11 @@ def sum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue, initial=np._No
                           initial=initial)
 
 
+def _any_dispatcher(a, axis=None, out=None, keepdims=None):
+    return (a, out)
+
+
+@array_function_dispatch(_any_dispatcher)
 def any(a, axis=None, out=None, keepdims=np._NoValue):
     """
     Test whether any array element along a given axis evaluates to True.
@@ -2030,6 +2166,11 @@ def any(a, axis=None, out=None, keepdims=np._NoValue):
     return _wrapreduction(a, np.logical_or, 'any', axis, None, out, keepdims=keepdims)
 
 
+def _all_dispatcher(a, axis=None, out=None, keepdims=None):
+    return (a, out)
+
+
+@array_function_dispatch(_all_dispatcher)
 def all(a, axis=None, out=None, keepdims=np._NoValue):
     """
     Test whether all array elements along a given axis evaluate to True.
@@ -2106,6 +2247,11 @@ def all(a, axis=None, out=None, keepdims=np._NoValue):
     return _wrapreduction(a, np.logical_and, 'all', axis, None, out, keepdims=keepdims)
 
 
+def _cumsum_dispatcher(a, axis=None, dtype=None, out=None):
+    return (a, out)
+
+
+@array_function_dispatch(_cumsum_dispatcher)
 def cumsum(a, axis=None, dtype=None, out=None):
     """
     Return the cumulative sum of the elements along a given axis.
@@ -2173,6 +2319,11 @@ def cumsum(a, axis=None, dtype=None, out=None):
     return _wrapfunc(a, 'cumsum', axis=axis, dtype=dtype, out=out)
 
 
+def _ptp_dispatcher(a, axis=None, out=None, keepdims=None):
+    return (a, out)
+
+
+@array_function_dispatch(_ptp_dispatcher)
 def ptp(a, axis=None, out=None, keepdims=np._NoValue):
     """
     Range of values (maximum - minimum) along an axis.
@@ -2241,6 +2392,11 @@ def ptp(a, axis=None, out=None, keepdims=np._NoValue):
     return _methods._ptp(a, axis=axis, out=out, **kwargs)
 
 
+def _amax_dispatcher(a, axis=None, out=None, keepdims=None, initial=None):
+    return (a, out)
+
+
+@array_function_dispatch(_amax_dispatcher)
 def amax(a, axis=None, out=None, keepdims=np._NoValue, initial=np._NoValue):
     """
     Return the maximum of an array or maximum along an axis.
@@ -2351,6 +2507,11 @@ def amax(a, axis=None, out=None, keepdims=np._NoValue, initial=np._NoValue):
                           initial=initial)
 
 
+def _amin_dispatcher(a, axis=None, out=None, keepdims=None, initial=None):
+    return (a, out)
+
+
+@array_function_dispatch(_amin_dispatcher)
 def amin(a, axis=None, out=None, keepdims=np._NoValue, initial=np._NoValue):
     """
     Return the minimum of an array or minimum along an axis.
@@ -2459,6 +2620,11 @@ def amin(a, axis=None, out=None, keepdims=np._NoValue, initial=np._NoValue):
                           initial=initial)
 
 
+def _alen_dispathcer(a):
+    return (a,)
+
+
+@array_function_dispatch(_alen_dispathcer)
 def alen(a):
     """
     Return the length of the first dimension of the input array.
@@ -2492,6 +2658,12 @@ def alen(a):
         return len(array(a, ndmin=1))
 
 
+def _prod_dispatcher(
+        a, axis=None, dtype=None, out=None, keepdims=None, initial=None):
+    return (a, out)
+
+
+@array_function_dispatch(_prod_dispatcher)
 def prod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue, initial=np._NoValue):
     """
     Return the product of array elements over a given axis.
@@ -2602,6 +2774,11 @@ def prod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue, initial=np._N
                           initial=initial)
 
 
+def _cumprod_dispatcher(a, axis=None, dtype=None, out=None):
+    return (a, out)
+
+
+@array_function_dispatch(_cumprod_dispatcher)
 def cumprod(a, axis=None, dtype=None, out=None):
     """
     Return the cumulative product of elements along a given axis.
@@ -2665,6 +2842,11 @@ def cumprod(a, axis=None, dtype=None, out=None):
     return _wrapfunc(a, 'cumprod', axis=axis, dtype=dtype, out=out)
 
 
+def _ndim_dispatcher(a):
+    return (a,)
+
+
+@array_function_dispatch(_ndim_dispatcher)
 def ndim(a):
     """
     Return the number of dimensions of an array.
@@ -2702,6 +2884,11 @@ def ndim(a):
         return asarray(a).ndim
 
 
+def _size_dispatcher(a, axis=None):
+    return (a,)
+
+
+@array_function_dispatch(_size_dispatcher)
 def size(a, axis=None):
     """
     Return the number of elements along a given axis.
@@ -2748,6 +2935,11 @@ def size(a, axis=None):
             return asarray(a).shape[axis]
 
 
+def _around_dispatcher(a, decimals=None, out=None):
+    return (a, out)
+
+
+@array_function_dispatch(_around_dispatcher)
 def around(a, decimals=0, out=None):
     """
     Evenly round to the given number of decimals.
@@ -2817,20 +3009,11 @@ def around(a, decimals=0, out=None):
     return _wrapfunc(a, 'round', decimals=decimals, out=out)
 
 
-def round_(a, decimals=0, out=None):
-    """
-    Round an array to the given number of decimals.
-
-    Refer to `around` for full documentation.
-
-    See Also
-    --------
-    around : equivalent function
-
-    """
-    return around(a, decimals=decimals, out=out)
+def _mean_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None):
+    return (a, out)
 
 
+@array_function_dispatch(_mean_dispatcher)
 def mean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
     """
     Compute the arithmetic mean along the specified axis.
@@ -2937,6 +3120,12 @@ def mean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
                           out=out, **kwargs)
 
 
+def _std_dispatcher(
+        a, axis=None, dtype=None, out=None, ddof=None, keepdims=None):
+    return (a, out)
+
+
+@array_function_dispatch(_std_dispatcher)
 def std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
     """
     Compute the standard deviation along the specified axis.
@@ -3055,6 +3244,12 @@ def std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
                          **kwargs)
 
 
+def _var_dispatcher(
+        a, axis=None, dtype=None, out=None, ddof=None, keepdims=None):
+    return (a, out)
+
+
+@array_function_dispatch(_var_dispatcher)
 def var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
     """
     Compute the variance along the specified axis.
@@ -3177,6 +3372,19 @@ def var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
 # Aliases of other functions. These have their own definitions only so that
 # they can have unique docstrings.
 
+@array_function_dispatch(_around_dispatcher)
+def round_(a, decimals=0, out=None):
+    """
+    Round an array to the given number of decimals.
+
+    See Also
+    --------
+    around : equivalent function; see for details.
+    """
+    return around(a, decimals=decimals, out=out)
+
+
+@array_function_dispatch(_prod_dispatcher, verify=False)
 def product(*args, **kwargs):
     """
     Return the product of array elements over a given axis.
@@ -3188,6 +3396,7 @@ def product(*args, **kwargs):
     return prod(*args, **kwargs)
 
 
+@array_function_dispatch(_cumprod_dispatcher, verify=False)
 def cumproduct(*args, **kwargs):
     """
     Return the cumulative product over the given axis.
@@ -3199,6 +3408,7 @@ def cumproduct(*args, **kwargs):
     return cumprod(*args, **kwargs)
 
 
+@array_function_dispatch(_any_dispatcher, verify=False)
 def sometrue(*args, **kwargs):
     """
     Check whether some values are true.
@@ -3212,6 +3422,7 @@ def sometrue(*args, **kwargs):
     return any(*args, **kwargs)
 
 
+@array_function_dispatch(_all_dispatcher, verify=False)
 def alltrue(*args, **kwargs):
     """
     Check if all elements of input array are true.
@@ -3223,6 +3434,7 @@ def alltrue(*args, **kwargs):
     return all(*args, **kwargs)
 
 
+@array_function_dispatch(_ndim_dispatcher)
 def rank(a):
     """
     Return the number of dimensions of an array.
diff --git a/numpy/core/getlimits.py b/numpy/core/getlimits.py
index e450a660d..389f16ff5 100644
--- a/numpy/core/getlimits.py
+++ b/numpy/core/getlimits.py
@@ -30,6 +30,32 @@ def _fr1(a):
         a.shape = ()
     return a
 
+class MachArLike(object):
+    """ Object to simulate MachAr instance """
+
+    def __init__(self,
+                 ftype,
+                 **kwargs):
+        params = _MACHAR_PARAMS[ftype]
+        float_conv = lambda v: array([v], ftype)
+        float_to_float = lambda v : _fr1(float_conv(v))
+        float_to_str = lambda v: (params['fmt'] % array(_fr0(v)[0], ftype))
+
+        self.title = params['title']
+        # Parameter types same as for discovered MachAr object.
+        self.epsilon = self.eps = float_to_float(kwargs.pop('eps'))
+        self.epsneg = float_to_float(kwargs.pop('epsneg'))
+        self.xmax = self.huge = float_to_float(kwargs.pop('huge'))
+        self.xmin = self.tiny = float_to_float(kwargs.pop('tiny'))
+        self.ibeta = params['itype'](kwargs.pop('ibeta'))
+        self.__dict__.update(kwargs)
+        self.precision = int(-log10(self.eps))
+        self.resolution = float_to_float(float_conv(10) ** (-self.precision))
+        self._str_eps = float_to_str(self.eps)
+        self._str_epsneg = float_to_str(self.epsneg)
+        self._str_xmin = float_to_str(self.xmin)
+        self._str_xmax = float_to_str(self.xmax)
+        self._str_resolution = float_to_str(self.resolution)
 
 _convert_to_float = {
     ntypes.csingle: ntypes.single,
@@ -37,7 +63,6 @@ _convert_to_float = {
     ntypes.clongfloat: ntypes.longfloat
     }
 
-
 # Parameters for creating MachAr / MachAr-like objects
 _title_fmt = 'numpy {} precision floating point number'
 _MACHAR_PARAMS = {
@@ -58,194 +83,156 @@ _MACHAR_PARAMS = {
         fmt = '%12.5e',
         title = _title_fmt.format('half'))}
 
-
-class MachArLike(object):
-    """ Object to simulate MachAr instance """
-
-    def __init__(self,
-                 ftype,
-                 **kwargs):
-        params = _MACHAR_PARAMS[ftype]
-        float_conv = lambda v: array([v], ftype)
-        float_to_float = lambda v : _fr1(float_conv(v))
-        self._float_to_str = lambda v: (params['fmt'] %
-                                        array(_fr0(v)[0], ftype))
-        self.title = params['title']
-        # Parameter types same as for discovered MachAr object.
-        self.epsilon = self.eps = float_to_float(kwargs.pop('eps'))
-        self.epsneg = float_to_float(kwargs.pop('epsneg'))
-        self.xmax = self.huge = float_to_float(kwargs.pop('huge'))
-        self.xmin = self.tiny = float_to_float(kwargs.pop('tiny'))
-        self.ibeta = params['itype'](kwargs.pop('ibeta'))
-        self.__dict__.update(kwargs)
-        self.precision = int(-log10(self.eps))
-        self.resolution = float_to_float(float_conv(10) ** (-self.precision))
-
-    # Properties below to delay need for float_to_str, and thus avoid circular
-    # imports during early numpy module loading.
-    # See: https://github.com/numpy/numpy/pull/8983#discussion_r115838683
-
-    @property
-    def _str_eps(self):
-        return self._float_to_str(self.eps)
-
-    @property
-    def _str_epsneg(self):
-        return self._float_to_str(self.epsneg)
-
-    @property
-    def _str_xmin(self):
-        return self._float_to_str(self.xmin)
-
-    @property
-    def _str_xmax(self):
-        return self._float_to_str(self.xmax)
-
-    @property
-    def _str_resolution(self):
-        return self._float_to_str(self.resolution)
-
-
-# Known parameters for float16
-# See docstring of MachAr class for description of parameters.
-_f16 = ntypes.float16
-_float16_ma = MachArLike(_f16,
-                         machep=-10,
-                         negep=-11,
-                         minexp=-14,
-                         maxexp=16,
-                         it=10,
-                         iexp=5,
-                         ibeta=2,
-                         irnd=5,
-                         ngrd=0,
-                         eps=exp2(_f16(-10)),
-                         epsneg=exp2(_f16(-11)),
-                         huge=_f16(65504),
-                         tiny=_f16(2 ** -14))
-
-# Known parameters for float32
-_f32 = ntypes.float32
-_float32_ma = MachArLike(_f32,
-                         machep=-23,
-                         negep=-24,
-                         minexp=-126,
-                         maxexp=128,
-                         it=23,
-                         iexp=8,
-                         ibeta=2,
-                         irnd=5,
-                         ngrd=0,
-                         eps=exp2(_f32(-23)),
-                         epsneg=exp2(_f32(-24)),
-                         huge=_f32((1 - 2 ** -24) * 2**128),
-                         tiny=exp2(_f32(-126)))
-
-# Known parameters for float64
-_f64 = ntypes.float64
-_epsneg_f64 = 2.0 ** -53.0
-_tiny_f64 = 2.0 ** -1022.0
-_float64_ma = MachArLike(_f64,
-                         machep=-52,
-                         negep=-53,
-                         minexp=-1022,
-                         maxexp=1024,
-                         it=52,
-                         iexp=11,
-                         ibeta=2,
-                         irnd=5,
-                         ngrd=0,
-                         eps=2.0 ** -52.0,
-                         epsneg=_epsneg_f64,
-                         huge=(1.0 - _epsneg_f64) / _tiny_f64 * _f64(4),
-                         tiny=_tiny_f64)
-
-# Known parameters for IEEE 754 128-bit binary float
-_ld = ntypes.longdouble
-_epsneg_f128 = exp2(_ld(-113))
-_tiny_f128 = exp2(_ld(-16382))
-# Ignore runtime error when this is not f128
-with numeric.errstate(all='ignore'):
-    _huge_f128 = (_ld(1) - _epsneg_f128) / _tiny_f128 * _ld(4)
-_float128_ma = MachArLike(_ld,
-                         machep=-112,
-                         negep=-113,
-                         minexp=-16382,
-                         maxexp=16384,
-                         it=112,
-                         iexp=15,
-                         ibeta=2,
-                         irnd=5,
-                         ngrd=0,
-                         eps=exp2(_ld(-112)),
-                         epsneg=_epsneg_f128,
-                         huge=_huge_f128,
-                         tiny=_tiny_f128)
-
-# Known parameters for float80 (Intel 80-bit extended precision)
-_epsneg_f80 = exp2(_ld(-64))
-_tiny_f80 = exp2(_ld(-16382))
-# Ignore runtime error when this is not f80
-with numeric.errstate(all='ignore'):
-    _huge_f80 = (_ld(1) - _epsneg_f80) / _tiny_f80 * _ld(4)
-_float80_ma = MachArLike(_ld,
-                         machep=-63,
-                         negep=-64,
-                         minexp=-16382,
-                         maxexp=16384,
-                         it=63,
-                         iexp=15,
-                         ibeta=2,
-                         irnd=5,
-                         ngrd=0,
-                         eps=exp2(_ld(-63)),
-                         epsneg=_epsneg_f80,
-                         huge=_huge_f80,
-                         tiny=_tiny_f80)
-
-# Guessed / known parameters for double double; see:
-# https://en.wikipedia.org/wiki/Quadruple-precision_floating-point_format#Double-double_arithmetic
-# These numbers have the same exponent range as float64, but extended number of
-# digits in the significand.
-_huge_dd = (umath.nextafter(_ld(inf), _ld(0))
-            if hasattr(umath, 'nextafter')  # Missing on some platforms?
-            else _float64_ma.huge)
-_float_dd_ma = MachArLike(_ld,
-                          machep=-105,
-                          negep=-106,
-                          minexp=-1022,
-                          maxexp=1024,
-                          it=105,
-                          iexp=11,
-                          ibeta=2,
-                          irnd=5,
-                          ngrd=0,
-                          eps=exp2(_ld(-105)),
-                          epsneg= exp2(_ld(-106)),
-                          huge=_huge_dd,
-                          tiny=exp2(_ld(-1022)))
-
-
 # Key to identify the floating point type.  Key is result of
 # ftype('-0.1').newbyteorder('<').tobytes()
 # See:
 # https://perl5.git.perl.org/perl.git/blob/3118d7d684b56cbeb702af874f4326683c45f045:/Configure
-_KNOWN_TYPES = {
-    b'\x9a\x99\x99\x99\x99\x99\xb9\xbf' : _float64_ma,
-    b'\xcd\xcc\xcc\xbd' : _float32_ma,
-    b'f\xae' : _float16_ma,
+_KNOWN_TYPES = {}
+def _register_type(machar, bytepat):
+    _KNOWN_TYPES[bytepat] = machar
+_float_ma = {}
+
+def _register_known_types():
+    # Known parameters for float16
+    # See docstring of MachAr class for description of parameters.
+    f16 = ntypes.float16
+    float16_ma = MachArLike(f16,
+                            machep=-10,
+                            negep=-11,
+                            minexp=-14,
+                            maxexp=16,
+                            it=10,
+                            iexp=5,
+                            ibeta=2,
+                            irnd=5,
+                            ngrd=0,
+                            eps=exp2(f16(-10)),
+                            epsneg=exp2(f16(-11)),
+                            huge=f16(65504),
+                            tiny=f16(2 ** -14))
+    _register_type(float16_ma, b'f\xae')
+    _float_ma[16] = float16_ma
+
+    # Known parameters for float32
+    f32 = ntypes.float32
+    float32_ma = MachArLike(f32,
+                            machep=-23,
+                            negep=-24,
+                            minexp=-126,
+                            maxexp=128,
+                            it=23,
+                            iexp=8,
+                            ibeta=2,
+                            irnd=5,
+                            ngrd=0,
+                            eps=exp2(f32(-23)),
+                            epsneg=exp2(f32(-24)),
+                            huge=f32((1 - 2 ** -24) * 2**128),
+                            tiny=exp2(f32(-126)))
+    _register_type(float32_ma, b'\xcd\xcc\xcc\xbd')
+    _float_ma[32] = float32_ma
+
+    # Known parameters for float64
+    f64 = ntypes.float64
+    epsneg_f64 = 2.0 ** -53.0
+    tiny_f64 = 2.0 ** -1022.0
+    float64_ma = MachArLike(f64,
+                            machep=-52,
+                            negep=-53,
+                            minexp=-1022,
+                            maxexp=1024,
+                            it=52,
+                            iexp=11,
+                            ibeta=2,
+                            irnd=5,
+                            ngrd=0,
+                            eps=2.0 ** -52.0,
+                            epsneg=epsneg_f64,
+                            huge=(1.0 - epsneg_f64) / tiny_f64 * f64(4),
+                            tiny=tiny_f64)
+    _register_type(float64_ma, b'\x9a\x99\x99\x99\x99\x99\xb9\xbf')
+    _float_ma[64] = float64_ma
+
+    # Known parameters for IEEE 754 128-bit binary float
+    ld = ntypes.longdouble
+    epsneg_f128 = exp2(ld(-113))
+    tiny_f128 = exp2(ld(-16382))
+    # Ignore runtime error when this is not f128
+    with numeric.errstate(all='ignore'):
+        huge_f128 = (ld(1) - epsneg_f128) / tiny_f128 * ld(4)
+    float128_ma = MachArLike(ld,
+                             machep=-112,
+                             negep=-113,
+                             minexp=-16382,
+                             maxexp=16384,
+                             it=112,
+                             iexp=15,
+                             ibeta=2,
+                             irnd=5,
+                             ngrd=0,
+                             eps=exp2(ld(-112)),
+                             epsneg=epsneg_f128,
+                             huge=huge_f128,
+                             tiny=tiny_f128)
+    # IEEE 754 128-bit binary float
+    _register_type(float128_ma,
+        b'\x9a\x99\x99\x99\x99\x99\x99\x99\x99\x99\x99\x99\x99\x99\xfb\xbf')
+    _register_type(float128_ma,
+        b'\x9a\x99\x99\x99\x99\x99\x99\x99\x99\x99\x99\x99\x99\x99\xfb\xbf')
+    _float_ma[128] = float128_ma
+
+    # Known parameters for float80 (Intel 80-bit extended precision)
+    epsneg_f80 = exp2(ld(-64))
+    tiny_f80 = exp2(ld(-16382))
+    # Ignore runtime error when this is not f80
+    with numeric.errstate(all='ignore'):
+        huge_f80 = (ld(1) - epsneg_f80) / tiny_f80 * ld(4)
+    float80_ma = MachArLike(ld,
+                            machep=-63,
+                            negep=-64,
+                            minexp=-16382,
+                            maxexp=16384,
+                            it=63,
+                            iexp=15,
+                            ibeta=2,
+                            irnd=5,
+                            ngrd=0,
+                            eps=exp2(ld(-63)),
+                            epsneg=epsneg_f80,
+                            huge=huge_f80,
+                            tiny=tiny_f80)
     # float80, first 10 bytes containing actual storage
-    b'\xcd\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xfb\xbf' : _float80_ma,
+    _register_type(float80_ma, b'\xcd\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xfb\xbf')
+    _float_ma[80] = float80_ma
+
+    # Guessed / known parameters for double double; see:
+    # https://en.wikipedia.org/wiki/Quadruple-precision_floating-point_format#Double-double_arithmetic
+    # These numbers have the same exponent range as float64, but extended number of
+    # digits in the significand.
+    huge_dd = (umath.nextafter(ld(inf), ld(0))
+                if hasattr(umath, 'nextafter')  # Missing on some platforms?
+                else float64_ma.huge)
+    float_dd_ma = MachArLike(ld,
+                              machep=-105,
+                              negep=-106,
+                              minexp=-1022,
+                              maxexp=1024,
+                              it=105,
+                              iexp=11,
+                              ibeta=2,
+                              irnd=5,
+                              ngrd=0,
+                              eps=exp2(ld(-105)),
+                              epsneg= exp2(ld(-106)),
+                              huge=huge_dd,
+                              tiny=exp2(ld(-1022)))
     # double double; low, high order (e.g. PPC 64)
-    b'\x9a\x99\x99\x99\x99\x99Y<\x9a\x99\x99\x99\x99\x99\xb9\xbf' :
-    _float_dd_ma,
+    _register_type(float_dd_ma,
+        b'\x9a\x99\x99\x99\x99\x99Y<\x9a\x99\x99\x99\x99\x99\xb9\xbf')
     # double double; high, low order (e.g. PPC 64 le)
-    b'\x9a\x99\x99\x99\x99\x99\xb9\xbf\x9a\x99\x99\x99\x99\x99Y<' :
-    _float_dd_ma,
-    # IEEE 754 128-bit binary float
-    b'\x9a\x99\x99\x99\x99\x99\x99\x99\x99\x99\x99\x99\x99\x99\xfb\xbf' :
-    _float128_ma,
-}
+    _register_type(float_dd_ma,
+        b'\x9a\x99\x99\x99\x99\x99\xb9\xbf\x9a\x99\x99\x99\x99\x99Y<')
+    _float_ma['dd'] = float_dd_ma
 
 
 def _get_machar(ftype):
@@ -513,7 +500,7 @@ class iinfo(object):
         self.bits = self.dtype.itemsize * 8
         self.key = "%s%d" % (self.kind, self.bits)
         if self.kind not in 'iu':
-            raise ValueError("Invalid integer data type.")
+            raise ValueError("Invalid integer data type %r." % (self.kind,))
 
     def min(self):
         """Minimum value of given dtype."""
diff --git a/numpy/core/include/numpy/ndarrayobject.h b/numpy/core/include/numpy/ndarrayobject.h
index 12fc7098c..45f008b1d 100644
--- a/numpy/core/include/numpy/ndarrayobject.h
+++ b/numpy/core/include/numpy/ndarrayobject.h
@@ -5,13 +5,7 @@
 #ifndef NPY_NDARRAYOBJECT_H
 #define NPY_NDARRAYOBJECT_H
 #ifdef __cplusplus
-#define CONFUSE_EMACS {
-#define CONFUSE_EMACS2 }
-extern "C" CONFUSE_EMACS
-#undef CONFUSE_EMACS
-#undef CONFUSE_EMACS2
-/* ... otherwise a semi-smart identer (like emacs) tries to indent
-       everything when you're typing */
+extern "C" {
 #endif
 
 #include <Python.h>
diff --git a/numpy/core/include/numpy/ndarraytypes.h b/numpy/core/include/numpy/ndarraytypes.h
index ec2893b21..da006909a 100644
--- a/numpy/core/include/numpy/ndarraytypes.h
+++ b/numpy/core/include/numpy/ndarraytypes.h
@@ -1670,7 +1670,7 @@ PyArray_CLEARFLAGS(PyArrayObject *arr, int flags)
 #define PyTypeNum_ISOBJECT(type) ((type) == NPY_OBJECT)
 
 
-#define PyDataType_ISBOOL(obj) PyTypeNum_ISBOOL(_PyADt(obj))
+#define PyDataType_ISBOOL(obj) PyTypeNum_ISBOOL(((PyArray_Descr*)(obj))->type_num)
 #define PyDataType_ISUNSIGNED(obj) PyTypeNum_ISUNSIGNED(((PyArray_Descr*)(obj))->type_num)
 #define PyDataType_ISSIGNED(obj) PyTypeNum_ISSIGNED(((PyArray_Descr*)(obj))->type_num)
 #define PyDataType_ISINTEGER(obj) PyTypeNum_ISINTEGER(((PyArray_Descr*)(obj))->type_num )
diff --git a/numpy/core/include/numpy/npy_3kcompat.h b/numpy/core/include/numpy/npy_3kcompat.h
index 2d0ccd3b9..a3c69f44e 100644
--- a/numpy/core/include/numpy/npy_3kcompat.h
+++ b/numpy/core/include/numpy/npy_3kcompat.h
@@ -69,6 +69,16 @@ static NPY_INLINE int PyInt_Check(PyObject *op) {
     #define Npy_EnterRecursiveCall(x) Py_EnterRecursiveCall(x)
 #endif
 
+/* Py_SETREF was added in 3.5.2, and only if Py_LIMITED_API is absent */
+#if PY_VERSION_HEX < 0x03050200
+    #define Py_SETREF(op, op2)                      \
+        do {                                        \
+            PyObject *_py_tmp = (PyObject *)(op);   \
+            (op) = (op2);                           \
+            Py_DECREF(_py_tmp);                     \
+        } while (0)
+#endif
+
 /*
  * PyString -> PyBytes
  */
@@ -141,20 +151,14 @@ static NPY_INLINE int PyInt_Check(PyObject *op) {
 static NPY_INLINE void
 PyUnicode_ConcatAndDel(PyObject **left, PyObject *right)
 {
-    PyObject *newobj;
-    newobj = PyUnicode_Concat(*left, right);
-    Py_DECREF(*left);
+    Py_SETREF(*left, PyUnicode_Concat(*left, right));
     Py_DECREF(right);
-    *left = newobj;
 }
 
 static NPY_INLINE void
 PyUnicode_Concat2(PyObject **left, PyObject *right)
 {
-    PyObject *newobj;
-    newobj = PyUnicode_Concat(*left, right);
-    Py_DECREF(*left);
-    *left = newobj;
+    Py_SETREF(*left, PyUnicode_Concat(*left, right));
 }
 
 /*
@@ -378,6 +382,38 @@ npy_PyFile_CloseFile(PyObject *file)
     return 0;
 }
 
+
+/* This is a copy of _PyErr_ChainExceptions, with:
+ *  - a minimal implementation for python 2
+ *  - __cause__ used instead of __context__
+ */
+static NPY_INLINE void
+npy_PyErr_ChainExceptionsCause(PyObject *exc, PyObject *val, PyObject *tb)
+{
+    if (exc == NULL)
+        return;
+
+    if (PyErr_Occurred()) {
+        /* only py3 supports this anyway */
+        #ifdef NPY_PY3K
+            PyObject *exc2, *val2, *tb2;
+            PyErr_Fetch(&exc2, &val2, &tb2);
+            PyErr_NormalizeException(&exc, &val, &tb);
+            if (tb != NULL) {
+                PyException_SetTraceback(val, tb);
+                Py_DECREF(tb);
+            }
+            Py_DECREF(exc);
+            PyErr_NormalizeException(&exc2, &val2, &tb2);
+            PyException_SetCause(val2, val);
+            PyErr_Restore(exc2, val2, tb2);
+        #endif
+    }
+    else {
+        PyErr_Restore(exc, val, tb);
+    }
+}
+
 /*
  * PyObject_Cmp
  */
diff --git a/numpy/core/include/numpy/ufuncobject.h b/numpy/core/include/numpy/ufuncobject.h
index 4b1b3d325..85f8a6c08 100644
--- a/numpy/core/include/numpy/ufuncobject.h
+++ b/numpy/core/include/numpy/ufuncobject.h
@@ -209,9 +209,32 @@ typedef struct _tagPyUFuncObject {
          * set by nditer object.
          */
         npy_uint32 iter_flags;
+
+        /* New in NPY_API_VERSION 0x0000000D and above */
+
+        /*
+         * for each core_num_dim_ix distinct dimension names,
+         * the possible "frozen" size (-1 if not frozen).
+         */
+        npy_intp *core_dim_sizes;
+
+        /*
+         * for each distinct core dimension, a set of UFUNC_CORE_DIM* flags
+         */
+        npy_uint32 *core_dim_flags;
+
+
+
 } PyUFuncObject;
 
 #include "arrayobject.h"
+/* Generalized ufunc; 0x0001 reserved for possible use as CORE_ENABLED */
+/* the core dimension's size will be determined by the operands. */
+#define UFUNC_CORE_DIM_SIZE_INFERRED 0x0002
+/* the core dimension may be absent */
+#define UFUNC_CORE_DIM_CAN_IGNORE 0x0004
+/* flags inferred during execution */
+#define UFUNC_CORE_DIM_MISSING 0x00040000
 
 #define UFUNC_ERR_IGNORE 0
 #define UFUNC_ERR_WARN   1
@@ -314,22 +337,6 @@ typedef struct _loop1d_info {
                                 &(arg)->first))) \
                 goto fail;} while (0)
 
-
-/* keep in sync with ieee754.c.src */
-#if defined(sun) || defined(__BSD__) || defined(__OpenBSD__) || \
-      (defined(__FreeBSD__) && (__FreeBSD_version < 502114)) || \
-      defined(__NetBSD__) || \
-      defined(__GLIBC__) || defined(__APPLE__) || \
-      defined(__CYGWIN__) || defined(__MINGW32__) || \
-      (defined(__FreeBSD__) && (__FreeBSD_version >= 502114)) || \
-      defined(_AIX) || \
-      defined(_MSC_VER) || \
-      defined(__osf__) && defined(__alpha)
-#else
-#define NO_FLOATING_POINT_SUPPORT
-#endif
-
-
 /*
  * THESE MACROS ARE DEPRECATED.
  * Use npy_set_floatstatus_* in the npymath library.
diff --git a/numpy/core/memmap.py b/numpy/core/memmap.py
index 8269f537f..f5cc68bb9 100644
--- a/numpy/core/memmap.py
+++ b/numpy/core/memmap.py
@@ -3,7 +3,7 @@ from __future__ import division, absolute_import, print_function
 import numpy as np
 from .numeric import uint8, ndarray, dtype
 from numpy.compat import (
-    long, basestring, is_pathlib_path, contextlib_nullcontext
+    long, basestring, os_fspath, contextlib_nullcontext, is_pathlib_path
 )
 
 __all__ = ['memmap']
@@ -218,10 +218,8 @@ class memmap(ndarray):
 
         if hasattr(filename, 'read'):
             f_ctx = contextlib_nullcontext(filename)
-        elif is_pathlib_path(filename):
-            f_ctx = filename.open(('r' if mode == 'c' else mode)+'b')
         else:
-            f_ctx = open(filename, ('r' if mode == 'c' else mode)+'b')
+            f_ctx = open(os_fspath(filename), ('r' if mode == 'c' else mode)+'b')
 
         with f_ctx as fid:
             fid.seek(0, 2)
@@ -268,14 +266,13 @@ class memmap(ndarray):
             self.offset = offset
             self.mode = mode
 
-            if isinstance(filename, basestring):
-                self.filename = os.path.abspath(filename)
-            elif is_pathlib_path(filename):
+            if is_pathlib_path(filename):
+                # special case - if we were constructed with a pathlib.path,
+                # then filename is a path object, not a string
                 self.filename = filename.resolve()
-            # py3 returns int for TemporaryFile().name
-            elif (hasattr(filename, "name") and
-                  isinstance(filename.name, basestring)):
-                self.filename = os.path.abspath(filename.name)
+            elif hasattr(fid, "name") and isinstance(fid.name, basestring):
+                # py3 returns int for TemporaryFile().name
+                self.filename = os.path.abspath(fid.name)
             # same as memmap copies (e.g. memmap + 1)
             else:
                 self.filename = None
diff --git a/numpy/core/multiarray.py b/numpy/core/multiarray.py
index 673328397..25debd2f8 100644
--- a/numpy/core/multiarray.py
+++ b/numpy/core/multiarray.py
@@ -6,7 +6,11 @@ by importing from the extension module.
 
 """
 
+import functools
+
+from . import overrides
 from . import _multiarray_umath
+import numpy as np
 from numpy.core._multiarray_umath import *
 from numpy.core._multiarray_umath import (
     _fastCopyAndTranspose, _flagdict, _insert, _reconstruct, _vec_string,
@@ -35,3 +39,1120 @@ __all__ = [
     'tracemalloc_domain', 'typeinfo', 'unpackbits', 'unravel_index', 'vdot',
     'where', 'zeros']
 
+array_function_dispatch = functools.partial(
+    overrides.array_function_dispatch, module='numpy')
+
+
+def _empty_like_dispatcher(prototype, dtype=None, order=None, subok=None):
+    return (prototype,)
+
+
+@array_function_dispatch(_empty_like_dispatcher)
+def empty_like(prototype, dtype=None, order='K', subok=True):
+    """Return a new array with the same shape and type as a given array.
+
+    Parameters
+    ----------
+    prototype : array_like
+        The shape and data-type of `prototype` define these same attributes
+        of the returned array.
+    dtype : data-type, optional
+        Overrides the data type of the result.
+
+        .. versionadded:: 1.6.0
+    order : {'C', 'F', 'A', or 'K'}, optional
+        Overrides the memory layout of the result. 'C' means C-order,
+        'F' means F-order, 'A' means 'F' if ``prototype`` is Fortran
+        contiguous, 'C' otherwise. 'K' means match the layout of ``prototype``
+        as closely as possible.
+
+        .. versionadded:: 1.6.0
+    subok : bool, optional.
+        If True, then the newly created array will use the sub-class
+        type of 'a', otherwise it will be a base-class array. Defaults
+        to True.
+
+    Returns
+    -------
+    out : ndarray
+        Array of uninitialized (arbitrary) data with the same
+        shape and type as `prototype`.
+
+    See Also
+    --------
+    ones_like : Return an array of ones with shape and type of input.
+    zeros_like : Return an array of zeros with shape and type of input.
+    full_like : Return a new array with shape of input filled with value.
+    empty : Return a new uninitialized array.
+
+    Notes
+    -----
+    This function does *not* initialize the returned array; to do that use
+    `zeros_like` or `ones_like` instead.  It may be marginally faster than
+    the functions that do set the array values.
+
+    Examples
+    --------
+    >>> a = ([1,2,3], [4,5,6])                         # a is array-like
+    >>> np.empty_like(a)
+    array([[-1073741821, -1073741821,           3],    #random
+           [          0,           0, -1073741821]])
+    >>> a = np.array([[1., 2., 3.],[4.,5.,6.]])
+    >>> np.empty_like(a)
+    array([[ -2.00000715e+000,   1.48219694e-323,  -2.00000572e+000],#random
+           [  4.38791518e-305,  -2.00000715e+000,   4.17269252e-309]])
+
+    """
+    return _multiarray_umath.empty_like(prototype, dtype, order, subok)
+
+
+def _concatenate_dispatcher(arrays, axis=None, out=None):
+    for array in arrays:
+        yield array
+    yield out
+
+
+@array_function_dispatch(_concatenate_dispatcher)
+def concatenate(arrays, axis=0, out=None):
+    """
+    concatenate((a1, a2, ...), axis=0, out=None)
+
+    Join a sequence of arrays along an existing axis.
+
+    Parameters
+    ----------
+    a1, a2, ... : sequence of array_like
+        The arrays must have the same shape, except in the dimension
+        corresponding to `axis` (the first, by default).
+    axis : int, optional
+        The axis along which the arrays will be joined.  If axis is None,
+        arrays are flattened before use.  Default is 0.
+    out : ndarray, optional
+        If provided, the destination to place the result. The shape must be
+        correct, matching that of what concatenate would have returned if no
+        out argument were specified.
+
+    Returns
+    -------
+    res : ndarray
+        The concatenated array.
+
+    See Also
+    --------
+    ma.concatenate : Concatenate function that preserves input masks.
+    array_split : Split an array into multiple sub-arrays of equal or
+                  near-equal size.
+    split : Split array into a list of multiple sub-arrays of equal size.
+    hsplit : Split array into multiple sub-arrays horizontally (column wise)
+    vsplit : Split array into multiple sub-arrays vertically (row wise)
+    dsplit : Split array into multiple sub-arrays along the 3rd axis (depth).
+    stack : Stack a sequence of arrays along a new axis.
+    hstack : Stack arrays in sequence horizontally (column wise)
+    vstack : Stack arrays in sequence vertically (row wise)
+    dstack : Stack arrays in sequence depth wise (along third dimension)
+    block : Assemble arrays from blocks.
+
+    Notes
+    -----
+    When one or more of the arrays to be concatenated is a MaskedArray,
+    this function will return a MaskedArray object instead of an ndarray,
+    but the input masks are *not* preserved. In cases where a MaskedArray
+    is expected as input, use the ma.concatenate function from the masked
+    array module instead.
+
+    Examples
+    --------
+    >>> a = np.array([[1, 2], [3, 4]])
+    >>> b = np.array([[5, 6]])
+    >>> np.concatenate((a, b), axis=0)
+    array([[1, 2],
+           [3, 4],
+           [5, 6]])
+    >>> np.concatenate((a, b.T), axis=1)
+    array([[1, 2, 5],
+           [3, 4, 6]])
+    >>> np.concatenate((a, b), axis=None)
+    array([1, 2, 3, 4, 5, 6])
+
+    This function will not preserve masking of MaskedArray inputs.
+
+    >>> a = np.ma.arange(3)
+    >>> a[1] = np.ma.masked
+    >>> b = np.arange(2, 5)
+    >>> a
+    masked_array(data=[0, --, 2],
+                 mask=[False,  True, False],
+           fill_value=999999)
+    >>> b
+    array([2, 3, 4])
+    >>> np.concatenate([a, b])
+    masked_array(data=[0, 1, 2, 2, 3, 4],
+                 mask=False,
+           fill_value=999999)
+    >>> np.ma.concatenate([a, b])
+    masked_array(data=[0, --, 2, 2, 3, 4],
+                 mask=[False,  True, False, False, False, False],
+           fill_value=999999)
+
+    """
+    return _multiarray_umath.concatenate(arrays, axis, out)
+
+
+def _inner_dispatcher(a, b):
+    return (a, b)
+
+
+@array_function_dispatch(_inner_dispatcher)
+def inner(a, b):
+    """
+    Inner product of two arrays.
+
+    Ordinary inner product of vectors for 1-D arrays (without complex
+    conjugation), in higher dimensions a sum product over the last axes.
+
+    Parameters
+    ----------
+    a, b : array_like
+        If `a` and `b` are nonscalar, their last dimensions must match.
+
+    Returns
+    -------
+    out : ndarray
+        `out.shape = a.shape[:-1] + b.shape[:-1]`
+
+    Raises
+    ------
+    ValueError
+        If the last dimension of `a` and `b` has different size.
+
+    See Also
+    --------
+    tensordot : Sum products over arbitrary axes.
+    dot : Generalised matrix product, using second last dimension of `b`.
+    einsum : Einstein summation convention.
+
+    Notes
+    -----
+    For vectors (1-D arrays) it computes the ordinary inner-product::
+
+        np.inner(a, b) = sum(a[:]*b[:])
+
+    More generally, if `ndim(a) = r > 0` and `ndim(b) = s > 0`::
+
+        np.inner(a, b) = np.tensordot(a, b, axes=(-1,-1))
+
+    or explicitly::
+
+        np.inner(a, b)[i0,...,ir-1,j0,...,js-1]
+             = sum(a[i0,...,ir-1,:]*b[j0,...,js-1,:])
+
+    In addition `a` or `b` may be scalars, in which case::
+
+       np.inner(a,b) = a*b
+
+    Examples
+    --------
+    Ordinary inner product for vectors:
+
+    >>> a = np.array([1,2,3])
+    >>> b = np.array([0,1,0])
+    >>> np.inner(a, b)
+    2
+
+    A multidimensional example:
+
+    >>> a = np.arange(24).reshape((2,3,4))
+    >>> b = np.arange(4)
+    >>> np.inner(a, b)
+    array([[ 14,  38,  62],
+           [ 86, 110, 134]])
+
+    An example where `b` is a scalar:
+
+    >>> np.inner(np.eye(2), 7)
+    array([[ 7.,  0.],
+           [ 0.,  7.]])
+
+    """
+    return _multiarray_umath.inner(a, b)
+
+
+def _where_dispatcher(condition, x=None, y=None):
+    return (condition, x, y)
+
+
+@array_function_dispatch(_where_dispatcher)
+def where(condition, x=np._NoValue, y=np._NoValue):
+    """
+    where(condition, [x, y])
+
+    Return elements chosen from `x` or `y` depending on `condition`.
+
+    .. note::
+        When only `condition` is provided, this function is a shorthand for
+        ``np.asarray(condition).nonzero()``. Using `nonzero` directly should be
+        preferred, as it behaves correctly for subclasses. The rest of this
+        documentation covers only the case where all three arguments are
+        provided.
+
+    Parameters
+    ----------
+    condition : array_like, bool
+        Where True, yield `x`, otherwise yield `y`.
+    x, y : array_like
+        Values from which to choose. `x`, `y` and `condition` need to be
+        broadcastable to some shape.
+
+    Returns
+    -------
+    out : ndarray
+        An array with elements from `x` where `condition` is True, and elements
+        from `y` elsewhere.
+
+    See Also
+    --------
+    choose
+    nonzero : The function that is called when x and y are omitted
+
+    Notes
+    -----
+    If all the arrays are 1-D, `where` is equivalent to::
+
+        [xv if c else yv
+         for c, xv, yv in zip(condition, x, y)]
+
+    Examples
+    --------
+    >>> a = np.arange(10)
+    >>> a
+    array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
+    >>> np.where(a < 5, a, 10*a)
+    array([ 0,  1,  2,  3,  4, 50, 60, 70, 80, 90])
+
+    This can be used on multidimensional arrays too:
+
+    >>> np.where([[True, False], [True, True]],
+    ...          [[1, 2], [3, 4]],
+    ...          [[9, 8], [7, 6]])
+    array([[1, 8],
+           [3, 4]])
+
+    The shapes of x, y, and the condition are broadcast together:
+
+    >>> x, y = np.ogrid[:3, :4]
+    >>> np.where(x < y, x, 10 + y)  # both x and 10+y are broadcast
+    array([[10,  0,  0,  0],
+           [10, 11,  1,  1],
+           [10, 11, 12,  2]])
+
+    >>> a = np.array([[0, 1, 2],
+    ...               [0, 2, 4],
+    ...               [0, 3, 6]])
+    >>> np.where(a < 4, a, -1)  # -1 is broadcast
+    array([[ 0,  1,  2],
+           [ 0,  2, -1],
+           [ 0,  3, -1]])
+    """
+    # _multiarray_umath.where only accepts positional arguments
+    args = tuple(a for a in (x, y) if a is not np._NoValue)
+    return _multiarray_umath.where(condition, *args)
+
+
+def _lexsort_dispatcher(keys, axis=None):
+    if isinstance(keys, tuple):
+        return keys
+    else:
+        return (keys,)
+
+
+@array_function_dispatch(_lexsort_dispatcher)
+def lexsort(keys, axis=-1):
+    """
+    Perform an indirect stable sort using a sequence of keys.
+
+    Given multiple sorting keys, which can be interpreted as columns in a
+    spreadsheet, lexsort returns an array of integer indices that describes
+    the sort order by multiple columns. The last key in the sequence is used
+    for the primary sort order, the second-to-last key for the secondary sort
+    order, and so on. The keys argument must be a sequence of objects that
+    can be converted to arrays of the same shape. If a 2D array is provided
+    for the keys argument, it's rows are interpreted as the sorting keys and
+    sorting is according to the last row, second last row etc.
+
+    Parameters
+    ----------
+    keys : (k, N) array or tuple containing k (N,)-shaped sequences
+        The `k` different "columns" to be sorted.  The last column (or row if
+        `keys` is a 2D array) is the primary sort key.
+    axis : int, optional
+        Axis to be indirectly sorted.  By default, sort over the last axis.
+
+    Returns
+    -------
+    indices : (N,) ndarray of ints
+        Array of indices that sort the keys along the specified axis.
+
+    See Also
+    --------
+    argsort : Indirect sort.
+    ndarray.sort : In-place sort.
+    sort : Return a sorted copy of an array.
+
+    Examples
+    --------
+    Sort names: first by surname, then by name.
+
+    >>> surnames =    ('Hertz',    'Galilei', 'Hertz')
+    >>> first_names = ('Heinrich', 'Galileo', 'Gustav')
+    >>> ind = np.lexsort((first_names, surnames))
+    >>> ind
+    array([1, 2, 0])
+
+    >>> [surnames[i] + ", " + first_names[i] for i in ind]
+    ['Galilei, Galileo', 'Hertz, Gustav', 'Hertz, Heinrich']
+
+    Sort two columns of numbers:
+
+    >>> a = [1,5,1,4,3,4,4] # First column
+    >>> b = [9,4,0,4,0,2,1] # Second column
+    >>> ind = np.lexsort((b,a)) # Sort by a, then by b
+    >>> print(ind)
+    [2 0 4 6 5 3 1]
+
+    >>> [(a[i],b[i]) for i in ind]
+    [(1, 0), (1, 9), (3, 0), (4, 1), (4, 2), (4, 4), (5, 4)]
+
+    Note that sorting is first according to the elements of ``a``.
+    Secondary sorting is according to the elements of ``b``.
+
+    A normal ``argsort`` would have yielded:
+
+    >>> [(a[i],b[i]) for i in np.argsort(a)]
+    [(1, 9), (1, 0), (3, 0), (4, 4), (4, 2), (4, 1), (5, 4)]
+
+    Structured arrays are sorted lexically by ``argsort``:
+
+    >>> x = np.array([(1,9), (5,4), (1,0), (4,4), (3,0), (4,2), (4,1)],
+    ...              dtype=np.dtype([('x', int), ('y', int)]))
+
+    >>> np.argsort(x) # or np.argsort(x, order=('x', 'y'))
+    array([2, 0, 4, 6, 5, 3, 1])
+
+    """
+    return _multiarray_umath.lexsort(keys, axis)
+
+
+def _can_cast_dispatcher(from_, to, casting=None):
+    return (from_,)
+
+
+@array_function_dispatch(_can_cast_dispatcher)
+def can_cast(from_, to, casting='safe'):
+    """
+    Returns True if cast between data types can occur according to the
+    casting rule.  If from is a scalar or array scalar, also returns
+    True if the scalar value can be cast without overflow or truncation
+    to an integer.
+
+    Parameters
+    ----------
+    from_ : dtype, dtype specifier, scalar, or array
+        Data type, scalar, or array to cast from.
+    to : dtype or dtype specifier
+        Data type to cast to.
+    casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
+        Controls what kind of data casting may occur.
+
+          * 'no' means the data types should not be cast at all.
+          * 'equiv' means only byte-order changes are allowed.
+          * 'safe' means only casts which can preserve values are allowed.
+          * 'same_kind' means only safe casts or casts within a kind,
+            like float64 to float32, are allowed.
+          * 'unsafe' means any data conversions may be done.
+
+    Returns
+    -------
+    out : bool
+        True if cast can occur according to the casting rule.
+
+    Notes
+    -----
+    Starting in NumPy 1.9, can_cast function now returns False in 'safe'
+    casting mode for integer/float dtype and string dtype if the string dtype
+    length is not long enough to store the max integer/float value converted
+    to a string. Previously can_cast in 'safe' mode returned True for
+    integer/float dtype and a string dtype of any length.
+
+    See also
+    --------
+    dtype, result_type
+
+    Examples
+    --------
+    Basic examples
+
+    >>> np.can_cast(np.int32, np.int64)
+    True
+    >>> np.can_cast(np.float64, complex)
+    True
+    >>> np.can_cast(complex, float)
+    False
+
+    >>> np.can_cast('i8', 'f8')
+    True
+    >>> np.can_cast('i8', 'f4')
+    False
+    >>> np.can_cast('i4', 'S4')
+    False
+
+    Casting scalars
+
+    >>> np.can_cast(100, 'i1')
+    True
+    >>> np.can_cast(150, 'i1')
+    False
+    >>> np.can_cast(150, 'u1')
+    True
+
+    >>> np.can_cast(3.5e100, np.float32)
+    False
+    >>> np.can_cast(1000.0, np.float32)
+    True
+
+    Array scalar checks the value, array does not
+
+    >>> np.can_cast(np.array(1000.0), np.float32)
+    True
+    >>> np.can_cast(np.array([1000.0]), np.float32)
+    False
+
+    Using the casting rules
+
+    >>> np.can_cast('i8', 'i8', 'no')
+    True
+    >>> np.can_cast('<i8', '>i8', 'no')
+    False
+
+    >>> np.can_cast('<i8', '>i8', 'equiv')
+    True
+    >>> np.can_cast('<i4', '>i8', 'equiv')
+    False
+
+    >>> np.can_cast('<i4', '>i8', 'safe')
+    True
+    >>> np.can_cast('<i8', '>i4', 'safe')
+    False
+
+    >>> np.can_cast('<i8', '>i4', 'same_kind')
+    True
+    >>> np.can_cast('<i8', '>u4', 'same_kind')
+    False
+
+    >>> np.can_cast('<i8', '>u4', 'unsafe')
+    True
+
+    """
+    return _multiarray_umath.can_cast(from_, to, casting)
+
+
+def _min_scalar_type_dispatcher(a):
+    return (a,)
+
+
+@array_function_dispatch(_min_scalar_type_dispatcher)
+def min_scalar_type(a):
+    """
+    For scalar ``a``, returns the data type with the smallest size
+    and smallest scalar kind which can hold its value.  For non-scalar
+    array ``a``, returns the vector's dtype unmodified.
+
+    Floating point values are not demoted to integers,
+    and complex values are not demoted to floats.
+
+    Parameters
+    ----------
+    a : scalar or array_like
+        The value whose minimal data type is to be found.
+
+    Returns
+    -------
+    out : dtype
+        The minimal data type.
+
+    Notes
+    -----
+    .. versionadded:: 1.6.0
+
+    See Also
+    --------
+    result_type, promote_types, dtype, can_cast
+
+    Examples
+    --------
+    >>> np.min_scalar_type(10)
+    dtype('uint8')
+
+    >>> np.min_scalar_type(-260)
+    dtype('int16')
+
+    >>> np.min_scalar_type(3.1)
+    dtype('float16')
+
+    >>> np.min_scalar_type(1e50)
+    dtype('float64')
+
+    >>> np.min_scalar_type(np.arange(4,dtype='f8'))
+    dtype('float64')
+
+    """
+    return _multiarray_umath.min_scalar_type(a)
+
+
+def _result_type_dispatcher(*arrays_and_dtypes):
+    return arrays_and_dtypes
+
+
+@array_function_dispatch(_result_type_dispatcher)
+def result_type(*arrays_and_dtypes):
+    """
+    Returns the type that results from applying the NumPy
+    type promotion rules to the arguments.
+
+    Type promotion in NumPy works similarly to the rules in languages
+    like C++, with some slight differences.  When both scalars and
+    arrays are used, the array's type takes precedence and the actual value
+    of the scalar is taken into account.
+
+    For example, calculating 3*a, where a is an array of 32-bit floats,
+    intuitively should result in a 32-bit float output.  If the 3 is a
+    32-bit integer, the NumPy rules indicate it can't convert losslessly
+    into a 32-bit float, so a 64-bit float should be the result type.
+    By examining the value of the constant, '3', we see that it fits in
+    an 8-bit integer, which can be cast losslessly into the 32-bit float.
+
+    Parameters
+    ----------
+    arrays_and_dtypes : list of arrays and dtypes
+        The operands of some operation whose result type is needed.
+
+    Returns
+    -------
+    out : dtype
+        The result type.
+
+    See also
+    --------
+    dtype, promote_types, min_scalar_type, can_cast
+
+    Notes
+    -----
+    .. versionadded:: 1.6.0
+
+    The specific algorithm used is as follows.
+
+    Categories are determined by first checking which of boolean,
+    integer (int/uint), or floating point (float/complex) the maximum
+    kind of all the arrays and the scalars are.
+
+    If there are only scalars or the maximum category of the scalars
+    is higher than the maximum category of the arrays,
+    the data types are combined with :func:`promote_types`
+    to produce the return value.
+
+    Otherwise, `min_scalar_type` is called on each array, and
+    the resulting data types are all combined with :func:`promote_types`
+    to produce the return value.
+
+    The set of int values is not a subset of the uint values for types
+    with the same number of bits, something not reflected in
+    :func:`min_scalar_type`, but handled as a special case in `result_type`.
+
+    Examples
+    --------
+    >>> np.result_type(3, np.arange(7, dtype='i1'))
+    dtype('int8')
+
+    >>> np.result_type('i4', 'c8')
+    dtype('complex128')
+
+    >>> np.result_type(3.0, -2)
+    dtype('float64')
+
+    """
+    return _multiarray_umath.result_type(*arrays_and_dtypes)
+
+
+def _dot_dispatcher(a, b, out=None):
+    return (a, b, out)
+
+
+@array_function_dispatch(_dot_dispatcher)
+def dot(a, b, out=None):
+    """
+    Dot product of two arrays. Specifically,
+
+    - If both `a` and `b` are 1-D arrays, it is inner product of vectors
+      (without complex conjugation).
+
+    - If both `a` and `b` are 2-D arrays, it is matrix multiplication,
+      but using :func:`matmul` or ``a @ b`` is preferred.
+
+    - If either `a` or `b` is 0-D (scalar), it is equivalent to :func:`multiply`
+      and using ``numpy.multiply(a, b)`` or ``a * b`` is preferred.
+
+    - If `a` is an N-D array and `b` is a 1-D array, it is a sum product over
+      the last axis of `a` and `b`.
+
+    - If `a` is an N-D array and `b` is an M-D array (where ``M>=2``), it is a
+      sum product over the last axis of `a` and the second-to-last axis of `b`::
+
+        dot(a, b)[i,j,k,m] = sum(a[i,j,:] * b[k,:,m])
+
+    Parameters
+    ----------
+    a : array_like
+        First argument.
+    b : array_like
+        Second argument.
+    out : ndarray, optional
+        Output argument. This must have the exact kind that would be returned
+        if it was not used. In particular, it must have the right type, must be
+        C-contiguous, and its dtype must be the dtype that would be returned
+        for `dot(a,b)`. This is a performance feature. Therefore, if these
+        conditions are not met, an exception is raised, instead of attempting
+        to be flexible.
+
+    Returns
+    -------
+    output : ndarray
+        Returns the dot product of `a` and `b`.  If `a` and `b` are both
+        scalars or both 1-D arrays then a scalar is returned; otherwise
+        an array is returned.
+        If `out` is given, then it is returned.
+
+    Raises
+    ------
+    ValueError
+        If the last dimension of `a` is not the same size as
+        the second-to-last dimension of `b`.
+
+    See Also
+    --------
+    vdot : Complex-conjugating dot product.
+    tensordot : Sum products over arbitrary axes.
+    einsum : Einstein summation convention.
+    matmul : '@' operator as method with out parameter.
+
+    Examples
+    --------
+    >>> np.dot(3, 4)
+    12
+
+    Neither argument is complex-conjugated:
+
+    >>> np.dot([2j, 3j], [2j, 3j])
+    (-13+0j)
+
+    For 2-D arrays it is the matrix product:
+
+    >>> a = [[1, 0], [0, 1]]
+    >>> b = [[4, 1], [2, 2]]
+    >>> np.dot(a, b)
+    array([[4, 1],
+           [2, 2]])
+
+    >>> a = np.arange(3*4*5*6).reshape((3,4,5,6))
+    >>> b = np.arange(3*4*5*6)[::-1].reshape((5,4,6,3))
+    >>> np.dot(a, b)[2,3,2,1,2,2]
+    499128
+    >>> sum(a[2,3,2,:] * b[1,2,:,2])
+    499128
+
+    """
+    return _multiarray_umath.dot(a, b, out)
+
+
+def _vdot_dispatcher(a, b):
+    return (a, b)
+
+
+@array_function_dispatch(_vdot_dispatcher)
+def vdot(a, b):
+    """
+    Return the dot product of two vectors.
+
+    The vdot(`a`, `b`) function handles complex numbers differently than
+    dot(`a`, `b`).  If the first argument is complex the complex conjugate
+    of the first argument is used for the calculation of the dot product.
+
+    Note that `vdot` handles multidimensional arrays differently than `dot`:
+    it does *not* perform a matrix product, but flattens input arguments
+    to 1-D vectors first. Consequently, it should only be used for vectors.
+
+    Parameters
+    ----------
+    a : array_like
+        If `a` is complex the complex conjugate is taken before calculation
+        of the dot product.
+    b : array_like
+        Second argument to the dot product.
+
+    Returns
+    -------
+    output : ndarray
+        Dot product of `a` and `b`.  Can be an int, float, or
+        complex depending on the types of `a` and `b`.
+
+    See Also
+    --------
+    dot : Return the dot product without using the complex conjugate of the
+          first argument.
+
+    Examples
+    --------
+    >>> a = np.array([1+2j,3+4j])
+    >>> b = np.array([5+6j,7+8j])
+    >>> np.vdot(a, b)
+    (70-8j)
+    >>> np.vdot(b, a)
+    (70+8j)
+
+    Note that higher-dimensional arrays are flattened!
+
+    >>> a = np.array([[1, 4], [5, 6]])
+    >>> b = np.array([[4, 1], [2, 2]])
+    >>> np.vdot(a, b)
+    30
+    >>> np.vdot(b, a)
+    30
+    >>> 1*4 + 4*1 + 5*2 + 6*2
+    30
+
+    """
+    return _multiarray_umath.vdot(a, b)
+
+
+def _is_busday_dispatcher(
+        dates, weekmask=None, holidays=None, busdaycal=None, out=None):
+    return (dates, weekmask, holidays, out)
+
+
+@array_function_dispatch(_is_busday_dispatcher)
+def is_busday(dates, weekmask=None, holidays=None, busdaycal=None,
+              out=None):
+    """
+    is_busday(dates, weekmask='1111100', holidays=None, busdaycal=None, out=None)
+
+    Calculates which of the given dates are valid days, and which are not.
+
+    .. versionadded:: 1.7.0
+
+    Parameters
+    ----------
+    dates : array_like of datetime64[D]
+        The array of dates to process.
+    weekmask : str or array_like of bool, optional
+        A seven-element array indicating which of Monday through Sunday are
+        valid days. May be specified as a length-seven list or array, like
+        [1,1,1,1,1,0,0]; a length-seven string, like '1111100'; or a string
+        like "Mon Tue Wed Thu Fri", made up of 3-character abbreviations for
+        weekdays, optionally separated by white space. Valid abbreviations
+        are: Mon Tue Wed Thu Fri Sat Sun
+    holidays : array_like of datetime64[D], optional
+        An array of dates to consider as invalid dates.  They may be
+        specified in any order, and NaT (not-a-time) dates are ignored.
+        This list is saved in a normalized form that is suited for
+        fast calculations of valid days.
+    busdaycal : busdaycalendar, optional
+        A `busdaycalendar` object which specifies the valid days. If this
+        parameter is provided, neither weekmask nor holidays may be
+        provided.
+    out : array of bool, optional
+        If provided, this array is filled with the result.
+
+    Returns
+    -------
+    out : array of bool
+        An array with the same shape as ``dates``, containing True for
+        each valid day, and False for each invalid day.
+
+    See Also
+    --------
+    busdaycalendar: An object that specifies a custom set of valid days.
+    busday_offset : Applies an offset counted in valid days.
+    busday_count : Counts how many valid days are in a half-open date range.
+
+    Examples
+    --------
+    >>> # The weekdays are Friday, Saturday, and Monday
+    ... np.is_busday(['2011-07-01', '2011-07-02', '2011-07-18'],
+    ...                 holidays=['2011-07-01', '2011-07-04', '2011-07-17'])
+    array([False, False,  True], dtype='bool')
+    """
+    kwargs = {}
+    if weekmask is not None:
+        kwargs['weekmask'] = weekmask
+    if holidays is not None:
+        kwargs['holidays'] = holidays
+    if busdaycal is not None:
+        kwargs['busdaycal'] = busdaycal
+    if out is not None:
+        kwargs['out'] = out
+    return _multiarray_umath.is_busday(dates, **kwargs)
+
+
+def _busday_offset_dispatcher(dates, offsets, roll=None, weekmask=None,
+                              holidays=None, busdaycal=None, out=None):
+    return (dates, offsets, weekmask, holidays, out)
+
+
+@array_function_dispatch(_busday_offset_dispatcher)
+def busday_offset(dates, offsets, roll='raise', weekmask=None,
+                  holidays=None, busdaycal=None, out=None):
+    """
+    busday_offset(dates, offsets, roll='raise', weekmask='1111100', holidays=None, busdaycal=None, out=None)
+
+    First adjusts the date to fall on a valid day according to
+    the ``roll`` rule, then applies offsets to the given dates
+    counted in valid days.
+
+    .. versionadded:: 1.7.0
+
+    Parameters
+    ----------
+    dates : array_like of datetime64[D]
+        The array of dates to process.
+    offsets : array_like of int
+        The array of offsets, which is broadcast with ``dates``.
+    roll : {'raise', 'nat', 'forward', 'following', 'backward', 'preceding', 'modifiedfollowing', 'modifiedpreceding'}, optional
+        How to treat dates that do not fall on a valid day. The default
+        is 'raise'.
+
+          * 'raise' means to raise an exception for an invalid day.
+          * 'nat' means to return a NaT (not-a-time) for an invalid day.
+          * 'forward' and 'following' mean to take the first valid day
+            later in time.
+          * 'backward' and 'preceding' mean to take the first valid day
+            earlier in time.
+          * 'modifiedfollowing' means to take the first valid day
+            later in time unless it is across a Month boundary, in which
+            case to take the first valid day earlier in time.
+          * 'modifiedpreceding' means to take the first valid day
+            earlier in time unless it is across a Month boundary, in which
+            case to take the first valid day later in time.
+    weekmask : str or array_like of bool, optional
+        A seven-element array indicating which of Monday through Sunday are
+        valid days. May be specified as a length-seven list or array, like
+        [1,1,1,1,1,0,0]; a length-seven string, like '1111100'; or a string
+        like "Mon Tue Wed Thu Fri", made up of 3-character abbreviations for
+        weekdays, optionally separated by white space. Valid abbreviations
+        are: Mon Tue Wed Thu Fri Sat Sun
+    holidays : array_like of datetime64[D], optional
+        An array of dates to consider as invalid dates.  They may be
+        specified in any order, and NaT (not-a-time) dates are ignored.
+        This list is saved in a normalized form that is suited for
+        fast calculations of valid days.
+    busdaycal : busdaycalendar, optional
+        A `busdaycalendar` object which specifies the valid days. If this
+        parameter is provided, neither weekmask nor holidays may be
+        provided.
+    out : array of datetime64[D], optional
+        If provided, this array is filled with the result.
+
+    Returns
+    -------
+    out : array of datetime64[D]
+        An array with a shape from broadcasting ``dates`` and ``offsets``
+        together, containing the dates with offsets applied.
+
+    See Also
+    --------
+    busdaycalendar: An object that specifies a custom set of valid days.
+    is_busday : Returns a boolean array indicating valid days.
+    busday_count : Counts how many valid days are in a half-open date range.
+
+    Examples
+    --------
+    >>> # First business day in October 2011 (not accounting for holidays)
+    ... np.busday_offset('2011-10', 0, roll='forward')
+    numpy.datetime64('2011-10-03','D')
+    >>> # Last business day in February 2012 (not accounting for holidays)
+    ... np.busday_offset('2012-03', -1, roll='forward')
+    numpy.datetime64('2012-02-29','D')
+    >>> # Third Wednesday in January 2011
+    ... np.busday_offset('2011-01', 2, roll='forward', weekmask='Wed')
+    numpy.datetime64('2011-01-19','D')
+    >>> # 2012 Mother's Day in Canada and the U.S.
+    ... np.busday_offset('2012-05', 1, roll='forward', weekmask='Sun')
+    numpy.datetime64('2012-05-13','D')
+
+    >>> # First business day on or after a date
+    ... np.busday_offset('2011-03-20', 0, roll='forward')
+    numpy.datetime64('2011-03-21','D')
+    >>> np.busday_offset('2011-03-22', 0, roll='forward')
+    numpy.datetime64('2011-03-22','D')
+    >>> # First business day after a date
+    ... np.busday_offset('2011-03-20', 1, roll='backward')
+    numpy.datetime64('2011-03-21','D')
+    >>> np.busday_offset('2011-03-22', 1, roll='backward')
+    numpy.datetime64('2011-03-23','D')
+    """
+    kwargs = {}
+    if weekmask is not None:
+        kwargs['weekmask'] = weekmask
+    if holidays is not None:
+        kwargs['holidays'] = holidays
+    if busdaycal is not None:
+        kwargs['busdaycal'] = busdaycal
+    if out is not None:
+        kwargs['out'] = out
+    return _multiarray_umath.busday_offset(dates, offsets, roll, **kwargs)
+
+
+def _busday_count_dispatcher(begindates, enddates, weekmask=None,
+                             holidays=None, busdaycal=None, out=None):
+    return (begindates, enddates, weekmask, holidays, out)
+
+
+@array_function_dispatch(_busday_count_dispatcher)
+def busday_count(begindates, enddates, weekmask=None, holidays=None,
+                 busdaycal=None, out=None):
+    """
+    busday_count(begindates, enddates, weekmask='1111100', holidays=[], busdaycal=None, out=None)
+
+    Counts the number of valid days between `begindates` and
+    `enddates`, not including the day of `enddates`.
+
+    If ``enddates`` specifies a date value that is earlier than the
+    corresponding ``begindates`` date value, the count will be negative.
+
+    .. versionadded:: 1.7.0
+
+    Parameters
+    ----------
+    begindates : array_like of datetime64[D]
+        The array of the first dates for counting.
+    enddates : array_like of datetime64[D]
+        The array of the end dates for counting, which are excluded
+        from the count themselves.
+    weekmask : str or array_like of bool, optional
+        A seven-element array indicating which of Monday through Sunday are
+        valid days. May be specified as a length-seven list or array, like
+        [1,1,1,1,1,0,0]; a length-seven string, like '1111100'; or a string
+        like "Mon Tue Wed Thu Fri", made up of 3-character abbreviations for
+        weekdays, optionally separated by white space. Valid abbreviations
+        are: Mon Tue Wed Thu Fri Sat Sun
+    holidays : array_like of datetime64[D], optional
+        An array of dates to consider as invalid dates.  They may be
+        specified in any order, and NaT (not-a-time) dates are ignored.
+        This list is saved in a normalized form that is suited for
+        fast calculations of valid days.
+    busdaycal : busdaycalendar, optional
+        A `busdaycalendar` object which specifies the valid days. If this
+        parameter is provided, neither weekmask nor holidays may be
+        provided.
+    out : array of int, optional
+        If provided, this array is filled with the result.
+
+    Returns
+    -------
+    out : array of int
+        An array with a shape from broadcasting ``begindates`` and ``enddates``
+        together, containing the number of valid days between
+        the begin and end dates.
+
+    See Also
+    --------
+    busdaycalendar: An object that specifies a custom set of valid days.
+    is_busday : Returns a boolean array indicating valid days.
+    busday_offset : Applies an offset counted in valid days.
+
+    Examples
+    --------
+    >>> # Number of weekdays in January 2011
+    ... np.busday_count('2011-01', '2011-02')
+    21
+    >>> # Number of weekdays in 2011
+    ...  np.busday_count('2011', '2012')
+    260
+    >>> # Number of Saturdays in 2011
+    ... np.busday_count('2011', '2012', weekmask='Sat')
+    53
+    """
+    kwargs = {}
+    if weekmask is not None:
+        kwargs['weekmask'] = weekmask
+    if holidays is not None:
+        kwargs['holidays'] = holidays
+    if busdaycal is not None:
+        kwargs['busdaycal'] = busdaycal
+    if out is not None:
+        kwargs['out'] = out
+    return _multiarray_umath.busday_count(begindates, enddates, **kwargs)
+
+
+def _datetime_as_string_dispatcher(
+        arr, unit=None, timezone=None, casting=None):
+    return (arr,)
+
+
+@array_function_dispatch(_datetime_as_string_dispatcher)
+def datetime_as_string(arr, unit=None, timezone='naive', casting='same_kind'):
+    """
+    Convert an array of datetimes into an array of strings.
+
+    Parameters
+    ----------
+    arr : array_like of datetime64
+        The array of UTC timestamps to format.
+    unit : str
+        One of None, 'auto', or a :ref:`datetime unit <arrays.dtypes.dateunits>`.
+    timezone : {'naive', 'UTC', 'local'} or tzinfo
+        Timezone information to use when displaying the datetime. If 'UTC', end
+        with a Z to indicate UTC time. If 'local', convert to the local timezone
+        first, and suffix with a +-#### timezone offset. If a tzinfo object,
+        then do as with 'local', but use the specified timezone.
+    casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}
+        Casting to allow when changing between datetime units.
+
+    Returns
+    -------
+    str_arr : ndarray
+        An array of strings the same shape as `arr`.
+
+    Examples
+    --------
+    >>> d = np.arange('2002-10-27T04:30', 4*60, 60, dtype='M8[m]')
+    >>> d
+    array(['2002-10-27T04:30', '2002-10-27T05:30', '2002-10-27T06:30',
+           '2002-10-27T07:30'], dtype='datetime64[m]')
+
+    Setting the timezone to UTC shows the same information, but with a Z suffix
+
+    >>> np.datetime_as_string(d, timezone='UTC')
+    array(['2002-10-27T04:30Z', '2002-10-27T05:30Z', '2002-10-27T06:30Z',
+           '2002-10-27T07:30Z'], dtype='<U35')
+
+    Note that we picked datetimes that cross a DST boundary. Passing in a
+    ``pytz`` timezone object will print the appropriate offset
+
+    >>> np.datetime_as_string(d, timezone=pytz.timezone('US/Eastern'))
+    array(['2002-10-27T00:30-0400', '2002-10-27T01:30-0400',
+           '2002-10-27T01:30-0500', '2002-10-27T02:30-0500'], dtype='<U39')
+
+    Passing in a unit will change the precision
+
+    >>> np.datetime_as_string(d, unit='h')
+    array(['2002-10-27T04', '2002-10-27T05', '2002-10-27T06', '2002-10-27T07'],
+          dtype='<U32')
+    >>> np.datetime_as_string(d, unit='s')
+    array(['2002-10-27T04:30:00', '2002-10-27T05:30:00', '2002-10-27T06:30:00',
+           '2002-10-27T07:30:00'], dtype='<U38')
+
+    'casting' can be used to specify whether precision can be changed
+
+    >>> np.datetime_as_string(d, unit='h', casting='safe')
+    TypeError: Cannot create a datetime string as units 'h' from a NumPy
+    datetime with units 'm' according to the rule 'safe'
+    """
+    return _multiarray_umath.datetime_as_string(arr, unit, timezone, casting)
diff --git a/numpy/core/numeric.py b/numpy/core/numeric.py
index fed3c0a9d..265c3636f 100644
--- a/numpy/core/numeric.py
+++ b/numpy/core/numeric.py
@@ -6,6 +6,7 @@ try:
     import collections.abc as collections_abc
 except ImportError:
     import collections as collections_abc
+import functools
 import itertools
 import operator
 import sys
@@ -27,6 +28,7 @@ from .multiarray import (
 if sys.version_info[0] < 3:
     from .multiarray import newbuffer, getbuffer
 
+from . import overrides
 from . import umath
 from .umath import (multiply, invert, sin, UFUNC_BUFSIZE_DEFAULT,
                     ERR_IGNORE, ERR_WARN, ERR_RAISE, ERR_CALL, ERR_PRINT,
@@ -40,7 +42,13 @@ ufunc = type(sin)
 newaxis = None
 
 if sys.version_info[0] >= 3:
-    import pickle
+    if sys.version_info[1] in (6, 7):
+        try:
+            import pickle5 as pickle
+        except ImportError:
+            import pickle
+    else:
+        import pickle
     basestring = str
     import builtins
 else:
@@ -48,6 +56,10 @@ else:
     import __builtin__ as builtins
 
 
+array_function_dispatch = functools.partial(
+    overrides.array_function_dispatch, module='numpy')
+
+
 def loads(*args, **kwargs):
     # NumPy 1.15.0, 2017-12-10
     warnings.warn(
@@ -91,6 +103,11 @@ class ComplexWarning(RuntimeWarning):
     pass
 
 
+def _zeros_like_dispatcher(a, dtype=None, order=None, subok=None):
+    return (a,)
+
+
+@array_function_dispatch(_zeros_like_dispatcher)
 def zeros_like(a, dtype=None, order='K', subok=True):
     """
     Return an array of zeros with the same shape and type as a given array.
@@ -205,6 +222,11 @@ def ones(shape, dtype=None, order='C'):
     return a
 
 
+def _ones_like_dispatcher(a, dtype=None, order=None, subok=None):
+    return (a,)
+
+
+@array_function_dispatch(_ones_like_dispatcher)
 def ones_like(a, dtype=None, order='K', subok=True):
     """
     Return an array of ones with the same shape and type as a given array.
@@ -311,6 +333,11 @@ def full(shape, fill_value, dtype=None, order='C'):
     return a
 
 
+def _full_like_dispatcher(a, fill_value, dtype=None, order=None, subok=None):
+    return (a,)
+
+
+@array_function_dispatch(_full_like_dispatcher)
 def full_like(a, fill_value, dtype=None, order='K', subok=True):
     """
     Return a full array with the same shape and type as a given array.
@@ -368,6 +395,11 @@ def full_like(a, fill_value, dtype=None, order='K', subok=True):
     return res
 
 
+def _count_nonzero_dispatcher(a, axis=None):
+    return (a,)
+
+
+@array_function_dispatch(_count_nonzero_dispatcher)
 def count_nonzero(a, axis=None):
     """
     Counts the number of non-zero values in the array ``a``.
@@ -555,7 +587,7 @@ def asanyarray(a, dtype=None, order=None):
 
 def ascontiguousarray(a, dtype=None):
     """
-    Return a contiguous array in memory (C order).
+    Return a contiguous array (ndim >= 1) in memory (C order).
 
     Parameters
     ----------
@@ -586,13 +618,16 @@ def ascontiguousarray(a, dtype=None):
     >>> x.flags['C_CONTIGUOUS']
     True
 
+    Note: This function returns an array with at least one-dimension (1-d) 
+    so it will not preserve 0-d arrays.  
+
     """
     return array(a, dtype, copy=False, order='C', ndmin=1)
 
 
 def asfortranarray(a, dtype=None):
     """
-    Return an array laid out in Fortran order in memory.
+    Return an array (ndim >= 1) laid out in Fortran order in memory.
 
     Parameters
     ----------
@@ -623,6 +658,9 @@ def asfortranarray(a, dtype=None):
     >>> y.flags['F_CONTIGUOUS']
     True
 
+    Note: This function returns an array with at least one-dimension (1-d) 
+    so it will not preserve 0-d arrays.  
+
     """
     return array(a, dtype, copy=False, order='F', ndmin=1)
 
@@ -787,6 +825,11 @@ def isfortran(a):
     return a.flags.fnc
 
 
+def _argwhere_dispatcher(a):
+    return (a,)
+
+
+@array_function_dispatch(_argwhere_dispatcher)
 def argwhere(a):
     """
     Find the indices of array elements that are non-zero, grouped by element.
@@ -828,6 +871,11 @@ def argwhere(a):
     return transpose(nonzero(a))
 
 
+def _flatnonzero_dispatcher(a):
+    return (a,)
+
+
+@array_function_dispatch(_flatnonzero_dispatcher)
 def flatnonzero(a):
     """
     Return indices that are non-zero in the flattened version of a.
@@ -879,6 +927,11 @@ def _mode_from_name(mode):
     return mode
 
 
+def _correlate_dispatcher(a, v, mode=None):
+    return (a, v)
+
+
+@array_function_dispatch(_correlate_dispatcher)
 def correlate(a, v, mode='valid'):
     """
     Cross-correlation of two 1-dimensional sequences.
@@ -947,6 +1000,11 @@ def correlate(a, v, mode='valid'):
     return multiarray.correlate2(a, v, mode)
 
 
+def _convolve_dispatcher(a, v, mode=None):
+    return (a, v)
+
+
+@array_function_dispatch(_convolve_dispatcher)
 def convolve(a, v, mode='full'):
     """
     Returns the discrete, linear convolution of two one-dimensional sequences.
@@ -1046,6 +1104,11 @@ def convolve(a, v, mode='full'):
     return multiarray.correlate(a, v[::-1], mode)
 
 
+def _outer_dispatcher(a, b, out=None):
+    return (a, b, out)
+
+
+@array_function_dispatch(_outer_dispatcher)
 def outer(a, b, out=None):
     """
     Compute the outer product of two vectors.
@@ -1130,6 +1193,11 @@ def outer(a, b, out=None):
     return multiply(a.ravel()[:, newaxis], b.ravel()[newaxis, :], out)
 
 
+def _tensordot_dispatcher(a, b, axes=None):
+    return (a, b)
+
+
+@array_function_dispatch(_tensordot_dispatcher)
 def tensordot(a, b, axes=2):
     """
     Compute tensor dot product along specified axes for arrays >= 1-D.
@@ -1316,6 +1384,11 @@ def tensordot(a, b, axes=2):
     return res.reshape(olda + oldb)
 
 
+def _roll_dispatcher(a, shift, axis=None):
+    return (a,)
+
+
+@array_function_dispatch(_roll_dispatcher)
 def roll(a, shift, axis=None):
     """
     Roll array elements along a given axis.
@@ -1405,6 +1478,11 @@ def roll(a, shift, axis=None):
         return result
 
 
+def _rollaxis_dispatcher(a, axis, start=None):
+    return (a,)
+
+
+@array_function_dispatch(_rollaxis_dispatcher)
 def rollaxis(a, axis, start=0):
     """
     Roll the specified axis backwards, until it lies in a given position.
@@ -1525,6 +1603,11 @@ def normalize_axis_tuple(axis, ndim, argname=None, allow_duplicate=False):
     return axis
 
 
+def _moveaxis_dispatcher(a, source, destination):
+    return (a,)
+
+
+@array_function_dispatch(_moveaxis_dispatcher)
 def moveaxis(a, source, destination):
     """
     Move axes of an array to new positions.
@@ -1601,6 +1684,11 @@ def _move_axis_to_0(a, axis):
     return moveaxis(a, axis, 0)
 
 
+def _cross_dispatcher(a, b, axisa=None, axisb=None, axisc=None, axis=None):
+    return (a, b)
+
+
+@array_function_dispatch(_cross_dispatcher)
 def cross(a, b, axisa=-1, axisb=-1, axisc=-1, axis=None):
     """
     Return the cross product of two (arrays of) vectors.
@@ -1928,6 +2016,10 @@ def fromfunction(function, shape, **kwargs):
     return function(*args, **kwargs)
 
 
+def _frombuffer(buf, dtype, shape, order):
+    return frombuffer(buf, dtype=dtype).reshape(shape, order=order)
+
+
 def isscalar(num):
     """
     Returns True if the type of `num` is a scalar type.
@@ -2240,6 +2332,11 @@ def identity(n, dtype=None):
     return eye(n, dtype=dtype)
 
 
+def _allclose_dispatcher(a, b, rtol=None, atol=None, equal_nan=None):
+    return (a, b)
+
+
+@array_function_dispatch(_allclose_dispatcher)
 def allclose(a, b, rtol=1.e-5, atol=1.e-8, equal_nan=False):
     """
     Returns True if two arrays are element-wise equal within a tolerance.
@@ -2311,6 +2408,11 @@ def allclose(a, b, rtol=1.e-5, atol=1.e-8, equal_nan=False):
     return bool(res)
 
 
+def _isclose_dispatcher(a, b, rtol=None, atol=None, equal_nan=None):
+    return (a, b)
+
+
+@array_function_dispatch(_isclose_dispatcher)
 def isclose(a, b, rtol=1.e-5, atol=1.e-8, equal_nan=False):
     """
     Returns a boolean array where two arrays are element-wise equal within a
@@ -2426,6 +2528,11 @@ def isclose(a, b, rtol=1.e-5, atol=1.e-8, equal_nan=False):
         return cond[()]  # Flatten 0d arrays to scalars
 
 
+def _array_equal_dispatcher(a1, a2):
+    return (a1, a2)
+
+
+@array_function_dispatch(_array_equal_dispatcher)
 def array_equal(a1, a2):
     """
     True if two arrays have the same shape and elements, False otherwise.
@@ -2468,6 +2575,11 @@ def array_equal(a1, a2):
     return bool(asarray(a1 == a2).all())
 
 
+def _array_equiv_dispatcher(a1, a2):
+    return (a1, a2)
+
+
+@array_function_dispatch(_array_equiv_dispatcher)
 def array_equiv(a1, a2):
     """
     Returns True if input arrays are shape consistent and all elements equal.
diff --git a/numpy/core/numerictypes.py b/numpy/core/numerictypes.py
index 3ff9ceef0..2fb841f7c 100644
--- a/numpy/core/numerictypes.py
+++ b/numpy/core/numerictypes.py
@@ -116,8 +116,8 @@ from ._type_aliases import (
     _concrete_types,
     _concrete_typeinfo,
     _bits_of,
-    _kind_to_stem,
 )
+from ._dtype import _kind_name
 
 # we don't export these for import *, but we do want them accessible
 # as numerictypes.bool, etc.
@@ -181,8 +181,7 @@ def maximum_sctype(t):
     if g is None:
         return t
     t = g
-    bits = _bits_of(t)
-    base = _kind_to_stem[dtype(t).kind]
+    base = _kind_name(dtype(t))
     if base in sctypes:
         return sctypes[base][-1]
     else:
diff --git a/numpy/core/overrides.py b/numpy/core/overrides.py
index 17e3d475f..85a8c32bb 100644
--- a/numpy/core/overrides.py
+++ b/numpy/core/overrides.py
@@ -2,8 +2,11 @@
 
 TODO: rewrite this in C for performance.
 """
+import collections
 import functools
-from numpy.core.multiarray import ndarray
+
+from numpy.core._multiarray_umath import ndarray
+from numpy.compat._inspect import getargspec
 
 
 _NDARRAY_ARRAY_FUNCTION = ndarray.__array_function__
@@ -68,8 +71,8 @@ def array_function_implementation_or_override(
         Function that implements the operation on NumPy array without
         overrides when called like ``implementation(*args, **kwargs)``.
     public_api : function
-        Function exposed by NumPy's public API riginally called like
-        ``public_api(*args, **kwargs`` on which arguments are now being
+        Function exposed by NumPy's public API originally called like
+        ``public_api(*args, **kwargs)`` on which arguments are now being
         checked.
     relevant_args : iterable
         Iterable of arguments to check for __array_function__ methods.
@@ -102,18 +105,59 @@ def array_function_implementation_or_override(
         if result is not NotImplemented:
             return result
 
-    raise TypeError('no implementation found for {} on types that implement '
+    func_name = '{}.{}'.format(public_api.__module__, public_api.__name__)
+    raise TypeError("no implementation found for '{}' on types that implement "
                     '__array_function__: {}'
-                    .format(public_api, list(map(type, overloaded_args))))
+                    .format(func_name, list(map(type, overloaded_args))))
+
+
+ArgSpec = collections.namedtuple('ArgSpec', 'args varargs keywords defaults')
+
+
+def verify_matching_signatures(implementation, dispatcher):
+    """Verify that a dispatcher function has the right signature."""
+    implementation_spec = ArgSpec(*getargspec(implementation))
+    dispatcher_spec = ArgSpec(*getargspec(dispatcher))
+
+    if (implementation_spec.args != dispatcher_spec.args or
+            implementation_spec.varargs != dispatcher_spec.varargs or
+            implementation_spec.keywords != dispatcher_spec.keywords or
+            (bool(implementation_spec.defaults) !=
+             bool(dispatcher_spec.defaults)) or
+            (implementation_spec.defaults is not None and
+             len(implementation_spec.defaults) !=
+             len(dispatcher_spec.defaults))):
+        raise RuntimeError('implementation and dispatcher for %s have '
+                           'different function signatures' % implementation)
 
+    if implementation_spec.defaults is not None:
+        if dispatcher_spec.defaults != (None,) * len(dispatcher_spec.defaults):
+            raise RuntimeError('dispatcher functions can only use None for '
+                               'default argument values')
 
-def array_function_dispatch(dispatcher):
+
+def array_function_dispatch(dispatcher, module=None, verify=True):
     """Decorator for adding dispatch with the __array_function__ protocol."""
     def decorator(implementation):
+        # TODO: only do this check when the appropriate flag is enabled or for
+        # a dev install. We want this check for testing but don't want to
+        # slow down all numpy imports.
+        if verify:
+            verify_matching_signatures(implementation, dispatcher)
+
         @functools.wraps(implementation)
         def public_api(*args, **kwargs):
             relevant_args = dispatcher(*args, **kwargs)
             return array_function_implementation_or_override(
                 implementation, public_api, relevant_args, args, kwargs)
+
+        if module is not None:
+            public_api.__module__ = module
+
+        # TODO: remove this when we drop Python 2 support (functools.wraps
+        # adds __wrapped__ automatically in later versions)
+        public_api.__wrapped__ = implementation
+
         return public_api
+
     return decorator
diff --git a/numpy/core/records.py b/numpy/core/records.py
index a483871ba..1b596e4de 100644
--- a/numpy/core/records.py
+++ b/numpy/core/records.py
@@ -42,7 +42,7 @@ import warnings
 
 from . import numeric as sb
 from . import numerictypes as nt
-from numpy.compat import isfileobj, bytes, long, unicode
+from numpy.compat import isfileobj, bytes, long, unicode, os_fspath
 from .arrayprint import get_printoptions
 
 # All of the functions allow formats to be a dtype
@@ -737,9 +737,9 @@ def fromfile(fd, dtype=None, shape=None, offset=0, formats=None,
              names=None, titles=None, aligned=False, byteorder=None):
     """Create an array from binary file data
 
-    If file is a string then that file is opened, else it is assumed
-    to be a file object. The file object must support random access
-    (i.e. it must have tell and seek methods).
+    If file is a string or a path-like object then that file is opened,
+    else it is assumed to be a file object. The file object must
+    support random access (i.e. it must have tell and seek methods).
 
     >>> from tempfile import TemporaryFile
     >>> a = np.empty(10,dtype='f8,i4,a5')
@@ -763,10 +763,14 @@ def fromfile(fd, dtype=None, shape=None, offset=0, formats=None,
     elif isinstance(shape, (int, long)):
         shape = (shape,)
 
-    name = 0
-    if isinstance(fd, str):
+    if isfileobj(fd):
+        # file already opened
+        name = 0
+    else:
+        # open file
+        fd = open(os_fspath(fd), 'rb')
         name = 1
-        fd = open(fd, 'rb')
+
     if (offset > 0):
         fd.seek(offset, 1)
     size = get_remaining_size(fd)
diff --git a/numpy/core/setup.py b/numpy/core/setup.py
index bea9ff392..a4429cee2 100644
--- a/numpy/core/setup.py
+++ b/numpy/core/setup.py
@@ -731,12 +731,15 @@ def configuration(parent_package='',top_path=None):
             join('src', 'common', 'lowlevel_strided_loops.h'),
             join('src', 'common', 'mem_overlap.h'),
             join('src', 'common', 'npy_config.h'),
+            join('src', 'common', 'npy_ctypes.h'),
             join('src', 'common', 'npy_extint128.h'),
+            join('src', 'common', 'npy_import.h'),
             join('src', 'common', 'npy_longdouble.h'),
             join('src', 'common', 'templ_common.h.src'),
             join('src', 'common', 'ucsnarrow.h'),
             join('src', 'common', 'ufunc_override.h'),
             join('src', 'common', 'umathmodule.h'),
+            join('src', 'common', 'numpyos.h'),
             ]
 
     common_src = [
@@ -746,6 +749,7 @@ def configuration(parent_package='',top_path=None):
             join('src', 'common', 'templ_common.h.src'),
             join('src', 'common', 'ucsnarrow.c'),
             join('src', 'common', 'ufunc_override.c'),
+            join('src', 'common', 'numpyos.c'),
             ]
 
     blas_info = get_info('blas_opt', 0)
@@ -785,7 +789,6 @@ def configuration(parent_package='',top_path=None):
             join('src', 'multiarray', 'multiarraymodule.h'),
             join('src', 'multiarray', 'nditer_impl.h'),
             join('src', 'multiarray', 'number.h'),
-            join('src', 'multiarray', 'numpyos.h'),
             join('src', 'multiarray', 'refcount.h'),
             join('src', 'multiarray', 'scalartypes.h'),
             join('src', 'multiarray', 'sequence.h'),
@@ -851,7 +854,6 @@ def configuration(parent_package='',top_path=None):
             join('src', 'multiarray', 'nditer_constr.c'),
             join('src', 'multiarray', 'nditer_pywrap.c'),
             join('src', 'multiarray', 'number.c'),
-            join('src', 'multiarray', 'numpyos.c'),
             join('src', 'multiarray', 'refcount.c'),
             join('src', 'multiarray', 'sequence.c'),
             join('src', 'multiarray', 'shape.c'),
diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py
index e637dbc20..f837df112 100644
--- a/numpy/core/setup_common.py
+++ b/numpy/core/setup_common.py
@@ -41,7 +41,8 @@ C_ABI_VERSION = 0x01000009
 # 0x0000000b - 1.13.x
 # 0x0000000c - 1.14.x
 # 0x0000000c - 1.15.x
-C_API_VERSION = 0x0000000c
+# 0x0000000d - 1.16.x
+C_API_VERSION = 0x0000000d
 
 class MismatchCAPIWarning(Warning):
     pass
diff --git a/numpy/core/shape_base.py b/numpy/core/shape_base.py
index 52717abda..3edf0824e 100644
--- a/numpy/core/shape_base.py
+++ b/numpy/core/shape_base.py
@@ -3,11 +3,26 @@ from __future__ import division, absolute_import, print_function
 __all__ = ['atleast_1d', 'atleast_2d', 'atleast_3d', 'block', 'hstack',
            'stack', 'vstack']
 
+import functools
+import operator
+import types
+import warnings
 
 from . import numeric as _nx
+from . import overrides
 from .numeric import array, asanyarray, newaxis
 from .multiarray import normalize_axis_index
 
+
+array_function_dispatch = functools.partial(
+    overrides.array_function_dispatch, module='numpy')
+
+
+def _atleast_1d_dispatcher(*arys):
+    return arys
+
+
+@array_function_dispatch(_atleast_1d_dispatcher)
 def atleast_1d(*arys):
     """
     Convert inputs to arrays with at least one dimension.
@@ -60,6 +75,12 @@ def atleast_1d(*arys):
     else:
         return res
 
+
+def _atleast_2d_dispatcher(*arys):
+    return arys
+
+
+@array_function_dispatch(_atleast_2d_dispatcher)
 def atleast_2d(*arys):
     """
     View inputs as arrays with at least two dimensions.
@@ -112,6 +133,12 @@ def atleast_2d(*arys):
     else:
         return res
 
+
+def _atleast_3d_dispatcher(*arys):
+    return arys
+
+
+@array_function_dispatch(_atleast_3d_dispatcher)
 def atleast_3d(*arys):
     """
     View inputs as arrays with at least three dimensions.
@@ -179,6 +206,22 @@ def atleast_3d(*arys):
         return res
 
 
+def _arrays_for_stack_dispatcher(arrays, stacklevel=4):
+    if not hasattr(arrays, '__getitem__') and hasattr(arrays, '__iter__'):
+        warnings.warn('arrays to stack must be passed as a "sequence" type '
+                      'such as list or tuple. Support for non-sequence '
+                      'iterables such as generators is deprecated as of '
+                      'NumPy 1.16 and will raise an error in the future.',
+                      FutureWarning, stacklevel=stacklevel)
+        return ()
+    return arrays
+
+
+def _vhstack_dispatcher(tup):
+    return _arrays_for_stack_dispatcher(tup)
+
+
+@array_function_dispatch(_vhstack_dispatcher)
 def vstack(tup):
     """
     Stack arrays in sequence vertically (row wise).
@@ -233,6 +276,8 @@ def vstack(tup):
     """
     return _nx.concatenate([atleast_2d(_m) for _m in tup], 0)
 
+
+@array_function_dispatch(_vhstack_dispatcher)
 def hstack(tup):
     """
     Stack arrays in sequence horizontally (column wise).
@@ -288,6 +333,15 @@ def hstack(tup):
         return _nx.concatenate(arrs, 1)
 
 
+def _stack_dispatcher(arrays, axis=None, out=None):
+    arrays = _arrays_for_stack_dispatcher(arrays, stacklevel=6)
+    for a in arrays:
+        yield a
+    if out is not None:
+        yield out
+
+
+@array_function_dispatch(_stack_dispatcher)
 def stack(arrays, axis=0, out=None):
     """
     Join a sequence of arrays along a new axis.
@@ -394,6 +448,10 @@ def _block_check_depths_match(arrays, parent_index=[]):
         refer to it, and the last index along the empty axis will be `None`.
     max_arr_ndim : int
         The maximum of the ndims of the arrays nested in `arrays`.
+    final_size: int
+        The number of elements in the final array. This is used the motivate
+        the choice of algorithm used using benchmarking wisdom.
+
     """
     if type(arrays) is tuple:
         # not strictly necessary, but saves us from:
@@ -412,8 +470,9 @@ def _block_check_depths_match(arrays, parent_index=[]):
         idxs_ndims = (_block_check_depths_match(arr, parent_index + [i])
                       for i, arr in enumerate(arrays))
 
-        first_index, max_arr_ndim = next(idxs_ndims)
-        for index, ndim in idxs_ndims:
+        first_index, max_arr_ndim, final_size = next(idxs_ndims)
+        for index, ndim, size in idxs_ndims:
+            final_size += size
             if ndim > max_arr_ndim:
                 max_arr_ndim = ndim
             if len(index) != len(first_index):
@@ -428,13 +487,15 @@ def _block_check_depths_match(arrays, parent_index=[]):
             # propagate our flag that indicates an empty list at the bottom
             if index[-1] is None:
                 first_index = index
-        return first_index, max_arr_ndim
+
+        return first_index, max_arr_ndim, final_size
     elif type(arrays) is list and len(arrays) == 0:
         # We've 'bottomed out' on an empty list
-        return parent_index + [None], 0
+        return parent_index + [None], 0, 0
     else:
         # We've 'bottomed out' - arrays is either a scalar or an array
-        return parent_index, _nx.ndim(arrays)
+        size = _nx.size(arrays)
+        return parent_index, _nx.ndim(arrays), size
 
 
 def _atleast_nd(a, ndim):
@@ -443,9 +504,132 @@ def _atleast_nd(a, ndim):
     return array(a, ndmin=ndim, copy=False, subok=True)
 
 
+def _accumulate(values):
+    # Helper function because Python 2.7 doesn't have
+    # itertools.accumulate
+    value = 0
+    accumulated = []
+    for v in values:
+        value += v
+        accumulated.append(value)
+    return accumulated
+
+
+def _concatenate_shapes(shapes, axis):
+    """Given array shapes, return the resulting shape and slices prefixes.
+
+    These help in nested concatation.
+    Returns
+    -------
+    shape: tuple of int
+        This tuple satisfies:
+        ```
+        shape, _ = _concatenate_shapes([arr.shape for shape in arrs], axis)
+        shape == concatenate(arrs, axis).shape
+        ```
+
+    slice_prefixes: tuple of (slice(start, end), )
+        For a list of arrays being concatenated, this returns the slice
+        in the larger array at axis that needs to be sliced into.
+
+        For example, the following holds:
+        ```
+        ret = concatenate([a, b, c], axis)
+        _, (sl_a, sl_b, sl_c) = concatenate_slices([a, b, c], axis)
+
+        ret[(slice(None),) * axis + sl_a] == a
+        ret[(slice(None),) * axis + sl_b] == b
+        ret[(slice(None),) * axis + sl_c] == c
+        ```
+
+        Thses are called slice prefixes since they are used in the recursive
+        blocking algorithm to compute the left-most slices during the
+        recursion. Therefore, they must be prepended to rest of the slice
+        that was computed deeper in the recusion.
+
+        These are returned as tuples to ensure that they can quickly be added
+        to existing slice tuple without creating a new tuple everytime.
+
+    """
+    # Cache a result that will be reused.
+    shape_at_axis = [shape[axis] for shape in shapes]
+
+    # Take a shape, any shape
+    first_shape = shapes[0]
+    first_shape_pre = first_shape[:axis]
+    first_shape_post = first_shape[axis+1:]
+
+    if any(shape[:axis] != first_shape_pre or
+           shape[axis+1:] != first_shape_post for shape in shapes):
+        raise ValueError(
+            'Mismatched array shapes in block along axis {}.'.format(axis))
+
+    shape = (first_shape_pre + (sum(shape_at_axis),) + first_shape[axis+1:])
+
+    offsets_at_axis = _accumulate(shape_at_axis)
+    slice_prefixes = [(slice(start, end),)
+                      for start, end in zip([0] + offsets_at_axis,
+                                            offsets_at_axis)]
+    return shape, slice_prefixes
+
+
+def _block_info_recursion(arrays, max_depth, result_ndim, depth=0):
+    """
+    Returns the shape of the final array, along with a list
+    of slices and a list of arrays that can be used for assignment inside the
+    new array
+
+    Parameters
+    ----------
+    arrays : nested list of arrays
+        The arrays to check
+    max_depth : list of int
+        The number of nested lists
+    result_ndim: int
+        The number of dimensions in thefinal array.
+
+    Returns
+    -------
+    shape : tuple of int
+        The shape that the final array will take on.
+    slices: list of tuple of slices
+        The slices into the full array required for assignment. These are
+        required to be prepended with ``(Ellipsis, )`` to obtain to correct
+        final index.
+    arrays: list of ndarray
+        The data to assign to each slice of the full array
+
+    """
+    if depth < max_depth:
+        shapes, slices, arrays = zip(
+            *[_block_info_recursion(arr, max_depth, result_ndim, depth+1)
+              for arr in arrays])
+
+        axis = result_ndim - max_depth + depth
+        shape, slice_prefixes = _concatenate_shapes(shapes, axis)
+
+        # Prepend the slice prefix and flatten the slices
+        slices = [slice_prefix + the_slice
+                  for slice_prefix, inner_slices in zip(slice_prefixes, slices)
+                  for the_slice in inner_slices]
+
+        # Flatten the array list
+        arrays = functools.reduce(operator.add, arrays)
+
+        return shape, slices, arrays
+    else:
+        # We've 'bottomed out' - arrays is either a scalar or an array
+        # type(arrays) is not list
+        # Return the slice and the array inside a list to be consistent with
+        # the recursive case.
+        arr = _atleast_nd(arrays, result_ndim)
+        return arr.shape, [()], [arr]
+
+
 def _block(arrays, max_depth, result_ndim, depth=0):
     """
-    Internal implementation of block. `arrays` is the argument passed to
+    Internal implementation of block based on repeated concatenation.
+    `arrays` is the argument passed to
     block. `max_depth` is the depth of nested lists within `arrays` and
     `result_ndim` is the greatest of the dimensions of the arrays in
     `arrays` and the depth of the lists in `arrays` (see block docstring
@@ -461,6 +645,19 @@ def _block(arrays, max_depth, result_ndim, depth=0):
         return _atleast_nd(arrays, result_ndim)
 
 
+def _block_dispatcher(arrays):
+    # Use type(...) is list to match the behavior of np.block(), which special
+    # cases list specifically rather than allowing for generic iterables or
+    # tuple. Also, we know that list.__array_function__ will never exist.
+    if type(arrays) is list:
+        for subarrays in arrays:
+            for subarray in _block_dispatcher(subarrays):
+                yield subarray
+    else:
+        yield arrays
+
+
+@array_function_dispatch(_block_dispatcher)
 def block(arrays):
     """
     Assemble an nd-array from nested lists of blocks.
@@ -609,7 +806,38 @@ def block(arrays):
 
 
     """
-    bottom_index, arr_ndim = _block_check_depths_match(arrays)
+    arrays, list_ndim, result_ndim, final_size = _block_setup(arrays)
+
+    # It was found through benchmarking that making an array of final size
+    # around 256x256 was faster by straight concatenation on a
+    # i7-7700HQ processor and dual channel ram 2400MHz.
+    # It didn't seem to matter heavily on the dtype used.
+    #
+    # A 2D array using repeated concatenation requires 2 copies of the array.
+    #
+    # The fastest algorithm will depend on the ratio of CPU power to memory
+    # speed.
+    # One can monitor the results of the benchmark
+    # https://pv.github.io/numpy-bench/#bench_shape_base.Block2D.time_block2d
+    # to tune this parameter until a C version of the `_block_info_recursion`
+    # algorithm is implemented which would likely be faster than the python
+    # version.
+    if list_ndim * final_size > (2 * 512 * 512):
+        return _block_slicing(arrays, list_ndim, result_ndim)
+    else:
+        return _block_concatenate(arrays, list_ndim, result_ndim)
+
+
+# Theses helper functions are mostly used for testing.
+# They allow us to write tests that directly call `_block_slicing`
+# or `_block_concatenate` wtihout blocking large arrays to forse the wisdom
+# to trigger the desired path.
+def _block_setup(arrays):
+    """
+    Returns
+    (`arrays`, list_ndim, result_ndim, final_size)
+    """
+    bottom_index, arr_ndim, final_size = _block_check_depths_match(arrays)
     list_ndim = len(bottom_index)
     if bottom_index and bottom_index[-1] is None:
         raise ValueError(
@@ -617,4 +845,35 @@ def block(arrays):
                 _block_format_index(bottom_index)
             )
         )
-    return _block(arrays, list_ndim, max(arr_ndim, list_ndim))
+    result_ndim = max(arr_ndim, list_ndim)
+    return arrays, list_ndim, result_ndim, final_size
+
+
+def _block_slicing(arrays, list_ndim, result_ndim):
+    shape, slices, arrays = _block_info_recursion(
+        arrays, list_ndim, result_ndim)
+    dtype = _nx.result_type(*[arr.dtype for arr in arrays])
+
+    # Test preferring F only in the case that all input arrays are F
+    F_order = all(arr.flags['F_CONTIGUOUS'] for arr in arrays)
+    C_order =  all(arr.flags['C_CONTIGUOUS'] for arr in arrays)
+    order = 'F' if F_order and not C_order else 'C'
+    result = _nx.empty(shape=shape, dtype=dtype, order=order)
+    # Note: In a c implementation, the function
+    # PyArray_CreateMultiSortedStridePerm could be used for more advanced
+    # guessing of the desired order.
+
+    for the_slice, arr in zip(slices, arrays):
+        result[(Ellipsis,) + the_slice] = arr
+    return result
+
+
+def _block_concatenate(arrays, list_ndim, result_ndim):
+    result = _block(arrays, list_ndim, result_ndim)
+    if list_ndim == 0:
+        # Catch an edge case where _block returns a view because
+        # `arrays` is a single numpy array and not a list of numpy arrays.
+        # This might copy scalars or lists twice, but this isn't a likely
+        # usecase for those interested in performance
+        result = result.copy()
+    return result
diff --git a/numpy/core/src/common/npy_config.h b/numpy/core/src/common/npy_config.h
index 673ea1d94..eedfbe364 100644
--- a/numpy/core/src/common/npy_config.h
+++ b/numpy/core/src/common/npy_config.h
@@ -30,7 +30,7 @@
 
 #endif
 
-#if defined(_MSC_VER) && (_MSC_VER == 1900)
+#if defined(_MSC_VER) && (_MSC_VER >= 1900)
 
 #undef HAVE_CASIN
 #undef HAVE_CASINF
@@ -44,6 +44,18 @@
 #undef HAVE_CATANH
 #undef HAVE_CATANHF
 #undef HAVE_CATANHL
+#undef HAVE_CSQRT
+#undef HAVE_CSQRTF
+#undef HAVE_CSQRTL
+#undef HAVE_CLOG
+#undef HAVE_CLOGF
+#undef HAVE_CLOGL
+#undef HAVE_CACOS
+#undef HAVE_CACOSF
+#undef HAVE_CACOSL
+#undef HAVE_CACOSH
+#undef HAVE_CACOSHF
+#undef HAVE_CACOSHL
 
 #endif
 
diff --git a/numpy/core/src/common/npy_ctypes.h b/numpy/core/src/common/npy_ctypes.h
new file mode 100644
index 000000000..f26db9e05
--- /dev/null
+++ b/numpy/core/src/common/npy_ctypes.h
@@ -0,0 +1,49 @@
+#ifndef NPY_CTYPES_H
+#define NPY_CTYPES_H
+
+#include <Python.h>
+
+#include "npy_import.h"
+
+/*
+ * Check if a python type is a ctypes class.
+ *
+ * Works like the Py<type>_Check functions, returning true if the argument
+ * looks like a ctypes object.
+ *
+ * This entire function is just a wrapper around the Python function of the
+ * same name.
+ */
+NPY_INLINE static int
+npy_ctypes_check(PyTypeObject *obj)
+{
+    static PyObject *py_func = NULL;
+    PyObject *ret_obj;
+    int ret;
+
+    npy_cache_import("numpy.core._internal", "npy_ctypes_check", &py_func);
+    if (py_func == NULL) {
+        goto fail;
+    }
+
+    ret_obj = PyObject_CallFunctionObjArgs(py_func, (PyObject *)obj, NULL);
+    if (ret_obj == NULL) {
+        goto fail;
+    }
+
+    ret = PyObject_IsTrue(ret_obj);
+    if (ret == -1) {
+        goto fail;
+    }
+
+    return ret;
+
+fail:
+    /* If the above fails, then we should just assume that the type is not from
+     * ctypes
+     */
+    PyErr_Clear();
+    return 0;
+}
+
+#endif
diff --git a/numpy/core/src/common/npy_longdouble.c b/numpy/core/src/common/npy_longdouble.c
index 508fbceac..561f4b825 100644
--- a/numpy/core/src/common/npy_longdouble.c
+++ b/numpy/core/src/common/npy_longdouble.c
@@ -1,17 +1,11 @@
 #include <Python.h>
 
 #define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+
 #include "numpy/ndarraytypes.h"
 #include "numpy/npy_math.h"
-
-/* This is a backport of Py_SETREF */
-#define NPY_SETREF(op, op2)                      \
-    do {                                        \
-        PyObject *_py_tmp = (PyObject *)(op);   \
-        (op) = (op2);                           \
-        Py_DECREF(_py_tmp);                     \
-    } while (0)
-
+#include "npy_pycompat.h"
 
 /*
  * Heavily derived from PyLong_FromDouble
@@ -66,7 +60,7 @@ npy_longdouble_to_PyLong(npy_longdouble ldval)
         npy_ulonglong chunk = (npy_ulonglong)frac;
         PyObject *l_chunk;
         /* v = v << chunk_size */
-        NPY_SETREF(v, PyNumber_Lshift(v, l_chunk_size));
+        Py_SETREF(v, PyNumber_Lshift(v, l_chunk_size));
         if (v == NULL) {
             goto done;
         }
@@ -77,7 +71,7 @@ npy_longdouble_to_PyLong(npy_longdouble ldval)
             goto done;
         }
         /* v = v | chunk */
-        NPY_SETREF(v, PyNumber_Or(v, l_chunk));
+        Py_SETREF(v, PyNumber_Or(v, l_chunk));
         Py_DECREF(l_chunk);
         if (v == NULL) {
             goto done;
@@ -90,7 +84,7 @@ npy_longdouble_to_PyLong(npy_longdouble ldval)
 
     /* v = -v */
     if (neg) {
-        NPY_SETREF(v, PyNumber_Negative(v));
+        Py_SETREF(v, PyNumber_Negative(v));
         if (v == NULL) {
             goto done;
         }
diff --git a/numpy/core/src/multiarray/numpyos.c b/numpy/core/src/common/numpyos.c
index 52dcbf3c8..d60b1ca17 100644
--- a/numpy/core/src/multiarray/numpyos.c
+++ b/numpy/core/src/common/numpyos.c
@@ -769,3 +769,31 @@ NumPyOS_ascii_ftoLf(FILE *fp, long double *value)
     }
     return r;
 }
+
+NPY_NO_EXPORT npy_longlong
+NumPyOS_strtoll(const char *str, char **endptr, int base)
+{
+#if defined HAVE_STRTOLL
+    return strtoll(str, endptr, base);
+#elif defined _MSC_VER
+    return _strtoi64(str, endptr, base);
+#else
+    /* ok on 64 bit posix */
+    return PyOS_strtol(str, endptr, base);
+#endif
+}
+
+NPY_NO_EXPORT npy_ulonglong
+NumPyOS_strtoull(const char *str, char **endptr, int base)
+{
+#if defined HAVE_STRTOULL
+    return strtoull(str, endptr, base);
+#elif defined _MSC_VER
+    return _strtoui64(str, endptr, base);
+#else
+    /* ok on 64 bit posix */
+    return PyOS_strtoul(str, endptr, base);
+#endif
+}
+
+
diff --git a/numpy/core/src/multiarray/numpyos.h b/numpy/core/src/common/numpyos.h
index 7ca795a6f..4deed8400 100644
--- a/numpy/core/src/multiarray/numpyos.h
+++ b/numpy/core/src/common/numpyos.h
@@ -31,4 +31,11 @@ NumPyOS_ascii_ftoLf(FILE *fp, long double *value);
 NPY_NO_EXPORT int
 NumPyOS_ascii_isspace(int c);
 
+/* Convert a string to an int in an arbitrary base */
+NPY_NO_EXPORT npy_longlong
+NumPyOS_strtoll(const char *str, char **endptr, int base);
+
+/* Convert a string to an int in an arbitrary base */
+NPY_NO_EXPORT npy_ulonglong
+NumPyOS_strtoull(const char *str, char **endptr, int base);
 #endif
diff --git a/numpy/core/src/multiarray/arrayobject.c b/numpy/core/src/multiarray/arrayobject.c
index 368f5ded7..936a30426 100644
--- a/numpy/core/src/multiarray/arrayobject.c
+++ b/numpy/core/src/multiarray/arrayobject.c
@@ -471,7 +471,7 @@ array_dealloc(PyArrayObject *self)
 {
     PyArrayObject_fields *fa = (PyArrayObject_fields *)self;
 
-    _array_dealloc_buffer_info(self);
+    _dealloc_cached_buffer_info((PyObject*)self);
 
     if (fa->weakreflist != NULL) {
         PyObject_ClearWeakRefs((PyObject *)self);
@@ -1218,37 +1218,6 @@ _void_compare(PyArrayObject *self, PyArrayObject *other, int cmp_op)
     }
 }
 
-/* This is a copy of _PyErr_ChainExceptions, with:
- *  - a minimal implementation for python 2
- *  - __cause__ used instead of __context__
- */
-NPY_NO_EXPORT void
-PyArray_ChainExceptionsCause(PyObject *exc, PyObject *val, PyObject *tb)
-{
-    if (exc == NULL)
-        return;
-
-    if (PyErr_Occurred()) {
-        /* only py3 supports this anyway */
-        #ifdef NPY_PY3K
-            PyObject *exc2, *val2, *tb2;
-            PyErr_Fetch(&exc2, &val2, &tb2);
-            PyErr_NormalizeException(&exc, &val, &tb);
-            if (tb != NULL) {
-                PyException_SetTraceback(val, tb);
-                Py_DECREF(tb);
-            }
-            Py_DECREF(exc);
-            PyErr_NormalizeException(&exc2, &val2, &tb2);
-            PyException_SetCause(val2, val);
-            PyErr_Restore(exc2, val2, tb2);
-        #endif
-    }
-    else {
-        PyErr_Restore(exc, val, tb);
-    }
-}
-
 /*
  * Silence the current error and emit a deprecation warning instead.
  *
@@ -1260,7 +1229,7 @@ DEPRECATE_silence_error(const char *msg) {
     PyObject *exc, *val, *tb;
     PyErr_Fetch(&exc, &val, &tb);
     if (DEPRECATE(msg) < 0) {
-        PyArray_ChainExceptionsCause(exc, val, tb);
+        npy_PyErr_ChainExceptionsCause(exc, val, tb);
         return -1;
     }
     Py_XDECREF(exc);
@@ -1377,7 +1346,7 @@ fail:
     /*
      * Reraise the original exception, possibly chaining with a new one.
      */
-    PyArray_ChainExceptionsCause(exc, val, tb);
+    npy_PyErr_ChainExceptionsCause(exc, val, tb);
     return NULL;
 }
 
diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src
index 0e69cfc07..d3aa1bd92 100644
--- a/numpy/core/src/multiarray/arraytypes.c.src
+++ b/numpy/core/src/multiarray/arraytypes.c.src
@@ -2,7 +2,8 @@
 #define PY_SSIZE_T_CLEAN
 #include "Python.h"
 #include "structmember.h"
-
+#include <limits.h>
+#include <assert.h>
 
 #define NPY_NO_DEPRECATED_API NPY_API_VERSION
 #define _MULTIARRAYMODULE
@@ -34,8 +35,7 @@
 
 #include "cblasfuncs.h"
 #include "npy_cblas.h"
-#include <limits.h>
-#include <assert.h>
+#include "buffer.h"
 
 /* check for sequences, but ignore the types numpy considers scalars */
 static NPY_INLINE npy_bool
@@ -150,32 +150,6 @@ MyPyLong_AsUnsigned@Type@ (PyObject *obj)
 
 /**end repeat**/
 
-static npy_longlong
-npy_strtoll(const char *str, char **endptr, int base)
-{
-#if defined HAVE_STRTOLL
-    return strtoll(str, endptr, base);
-#elif defined _MSC_VER
-    return _strtoi64(str, endptr, base);
-#else
-    /* ok on 64 bit posix */
-    return PyOS_strtol(str, endptr, base);
-#endif
-}
-
-static npy_ulonglong
-npy_strtoull(const char *str, char **endptr, int base)
-{
-#if defined HAVE_STRTOULL
-    return strtoull(str, endptr, base);
-#elif defined _MSC_VER
-    return _strtoui64(str, endptr, base);
-#else
-    /* ok on 64 bit posix */
-    return PyOS_strtoul(str, endptr, base);
-#endif
-}
-
 /*
  *****************************************************************************
  **                         GETITEM AND SETITEM                             **
@@ -947,6 +921,7 @@ VOID_setitem(PyObject *op, void *input, void *vap)
             memset(ip + view.len, 0, itemsize - view.len);
         }
         PyBuffer_Release(&view);
+        _dealloc_cached_buffer_info(op);
     }
 #else
     {
@@ -1796,8 +1771,8 @@ BOOL_scan(FILE *fp, npy_bool *ip, void *NPY_UNUSED(ignore),
  * #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint,
  *         npy_long, npy_ulong, npy_longlong, npy_ulonglong,
  *         npy_datetime, npy_timedelta#
- * #func = (PyOS_strtol, PyOS_strtoul)*4, npy_strtoll, npy_strtoull,
- *         npy_strtoll*2#
+ * #func = (PyOS_strtol, PyOS_strtoul)*4, NumPyOS_strtoll, NumPyOS_strtoull,
+ *         NumPyOS_strtoll*2#
  * #btype = (npy_long, npy_ulong)*4, npy_longlong, npy_ulonglong,
  *          npy_longlong*2#
  */
diff --git a/numpy/core/src/multiarray/buffer.c b/numpy/core/src/multiarray/buffer.c
index d60e739b7..2f66d7f2f 100644
--- a/numpy/core/src/multiarray/buffer.c
+++ b/numpy/core/src/multiarray/buffer.c
@@ -922,7 +922,7 @@ fail:
  */
 
 NPY_NO_EXPORT void
-_array_dealloc_buffer_info(PyArrayObject *self)
+_dealloc_cached_buffer_info(PyObject *self)
 {
     int reset_error_state = 0;
     PyObject *ptype, *pvalue, *ptraceback;
@@ -936,7 +936,7 @@ _array_dealloc_buffer_info(PyArrayObject *self)
         PyErr_Fetch(&ptype, &pvalue, &ptraceback);
     }
 
-    _buffer_clear_info((PyObject*)self);
+    _buffer_clear_info(self);
 
     if (reset_error_state) {
         PyErr_Restore(ptype, pvalue, ptraceback);
@@ -1027,8 +1027,11 @@ _descriptor_from_pep3118_format(char *s)
     Py_DECREF(str);
     Py_DECREF(_numpy_internal);
     if (descr == NULL) {
+        PyObject *exc, *val, *tb;
+        PyErr_Fetch(&exc, &val, &tb);
         PyErr_Format(PyExc_ValueError,
                      "'%s' is not a valid PEP 3118 buffer format string", buf);
+        npy_PyErr_ChainExceptionsCause(exc, val, tb);
         free(buf);
         return NULL;
     }
diff --git a/numpy/core/src/multiarray/buffer.h b/numpy/core/src/multiarray/buffer.h
index d5da8f440..fae413c85 100644
--- a/numpy/core/src/multiarray/buffer.h
+++ b/numpy/core/src/multiarray/buffer.h
@@ -4,7 +4,7 @@
 extern NPY_NO_EXPORT PyBufferProcs array_as_buffer;
 
 NPY_NO_EXPORT void
-_array_dealloc_buffer_info(PyArrayObject *self);
+_dealloc_cached_buffer_info(PyObject *self);
 
 NPY_NO_EXPORT PyArray_Descr*
 _descriptor_from_pep3118_format(char *s);
diff --git a/numpy/core/src/multiarray/common.c b/numpy/core/src/multiarray/common.c
index 5b4611e8a..65a290770 100644
--- a/numpy/core/src/multiarray/common.c
+++ b/numpy/core/src/multiarray/common.c
@@ -312,6 +312,7 @@ PyArray_DTypeFromObjectHelper(PyObject *obj, int maxdims,
             PyErr_Clear();
             dtype = _descriptor_from_pep3118_format(buffer_view.format);
             PyBuffer_Release(&buffer_view);
+            _dealloc_cached_buffer_info(obj);
             if (dtype) {
                 goto promote_types;
             }
@@ -323,6 +324,7 @@ PyArray_DTypeFromObjectHelper(PyObject *obj, int maxdims,
             dtype = PyArray_DescrNewFromType(NPY_VOID);
             dtype->elsize = buffer_view.itemsize;
             PyBuffer_Release(&buffer_view);
+            _dealloc_cached_buffer_info(obj);
             goto promote_types;
         }
         else {
@@ -636,6 +638,14 @@ _IsWriteable(PyArrayObject *ap)
         return NPY_FALSE;
     }
     PyBuffer_Release(&view);
+    /*
+     * The first call to PyObject_GetBuffer stores a reference to a struct
+     * _buffer_info_t (from buffer.c, with format, ndim, strides and shape) in
+     * a static dictionary, with id(base) as the key. Usually we release it
+     * after the call to PyBuffer_Release, via a call to
+     * _dealloc_cached_buffer_info, but in this case leave it in the cache to
+     * speed up future calls to _IsWriteable.
+     */
 #else
     if (PyObject_AsWriteBuffer(base, &dummy, &n) < 0) {
         PyErr_Clear();
diff --git a/numpy/core/src/multiarray/compiled_base.c b/numpy/core/src/multiarray/compiled_base.c
index 1c27f8394..e8380e3bc 100644
--- a/numpy/core/src/multiarray/compiled_base.c
+++ b/numpy/core/src/multiarray/compiled_base.c
@@ -1156,7 +1156,27 @@ arr_unravel_index(PyObject *self, PyObject *args, PyObject *kwds)
     int i, ret_ndim;
     npy_intp ret_dims[NPY_MAXDIMS], ret_strides[NPY_MAXDIMS];
 
-    char *kwlist[] = {"indices", "dims", "order", NULL};
+    char *kwlist[] = {"indices", "shape", "order", NULL};
+
+    /* Continue to support the older "dims" argument in place
+     * of the "shape" argument. Issue an appropriate warning
+     * if "dims" is detected in keywords, then replace it with
+     * the new "shape" argument and continue processing as usual */
+
+
+    if (kwds) {
+        PyObject *dims_item, *shape_item;
+        dims_item = PyDict_GetItemString(kwds, "dims");
+        shape_item = PyDict_GetItemString(kwds, "shape");
+        if (dims_item != NULL && shape_item == NULL) {
+            if (DEPRECATE("'shape' argument should be"
+                          " used instead of 'dims'") < 0) {
+                return NULL;
+            }
+            PyDict_SetItemString(kwds, "shape", dims_item);
+            PyDict_DelItemString(kwds, "dims");
+        }
+    }
 
     if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&|O&:unravel_index",
                     kwlist,
diff --git a/numpy/core/src/multiarray/conversion_utils.c b/numpy/core/src/multiarray/conversion_utils.c
index 7e92e5991..cef3c27ed 100644
--- a/numpy/core/src/multiarray/conversion_utils.c
+++ b/numpy/core/src/multiarray/conversion_utils.c
@@ -16,6 +16,7 @@
 
 #include "conversion_utils.h"
 #include "alloc.h"
+#include "buffer.h"
 
 static int
 PyArray_PyIntAsInt_ErrMsg(PyObject *o, const char * msg) NPY_GCC_NONNULL(2);
@@ -185,6 +186,7 @@ PyArray_BufferConverter(PyObject *obj, PyArray_Chunk *buf)
      * sticks around after the release.
      */
     PyBuffer_Release(&view);
+    _dealloc_cached_buffer_info(obj);
 
     /* Point to the base of the buffer object if present */
     if (PyMemoryView_Check(obj)) {
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index aaaaeee82..23a8dcea2 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -11,7 +11,7 @@
 
 #include "npy_config.h"
 
-#include "npy_import.h"
+#include "npy_ctypes.h"
 #include "npy_pycompat.h"
 #include "multiarraymodule.h"
 
@@ -743,12 +743,14 @@ discover_dimensions(PyObject *obj, int *maxndim, npy_intp *d, int check_it,
                 d[i] = buffer_view.shape[i];
             }
             PyBuffer_Release(&buffer_view);
+            _dealloc_cached_buffer_info(obj);
             return 0;
         }
         else if (PyObject_GetBuffer(obj, &buffer_view, PyBUF_SIMPLE) == 0) {
             d[0] = buffer_view.len;
             *maxndim = 1;
             PyBuffer_Release(&buffer_view);
+            _dealloc_cached_buffer_info(obj);
             return 0;
         }
         else {
@@ -1381,15 +1383,7 @@ _array_from_buffer_3118(PyObject *memoryview)
          * Note that even if the above are fixed in master, we have to drop the
          * early patch versions of python to actually make use of the fixes.
          */
-
-        int is_ctypes = _is_from_ctypes(view->obj);
-        if (is_ctypes < 0) {
-            /* This error is not useful */
-            PyErr_WriteUnraisable(view->obj);
-            is_ctypes = 0;
-        }
-
-        if (!is_ctypes) {
+        if (!npy_ctypes_check(Py_TYPE(view->obj))) {
             /* This object has no excuse for a broken PEP3118 buffer */
             PyErr_Format(
                     PyExc_RuntimeError,
@@ -2471,6 +2465,7 @@ PyArray_FromInterface(PyObject *origin)
          * sticks around after the release.
          */
         PyBuffer_Release(&view);
+        _dealloc_cached_buffer_info(base);
 #else
         res = PyObject_AsWriteBuffer(base, (void **)&data, &buffer_len);
         if (res < 0) {
@@ -3726,6 +3721,7 @@ PyArray_FromBuffer(PyObject *buf, PyArray_Descr *type,
      * sticks around after the release.
      */
     PyBuffer_Release(&view);
+    _dealloc_cached_buffer_info(buf);
 #else
     if (PyObject_AsWriteBuffer(buf, (void *)&data, &ts) == -1) {
         writeable = 0;
diff --git a/numpy/core/src/multiarray/datetime.c b/numpy/core/src/multiarray/datetime.c
index 7f837901c..a8550d958 100644
--- a/numpy/core/src/multiarray/datetime.c
+++ b/numpy/core/src/multiarray/datetime.c
@@ -2845,6 +2845,16 @@ convert_pyobject_to_timedelta(PyArray_DatetimeMetaData *meta, PyObject *obj,
         *out = NPY_DATETIME_NAT;
         return 0;
     }
+    else if (PyArray_IsScalar(obj, Integer)) {
+        /* Use the default unit if none was specified */
+        if (meta->base == NPY_FR_ERROR) {
+            meta->base = NPY_DATETIME_DEFAULTUNIT;
+            meta->num = 1;
+        }
+
+        *out = PyLong_AsLongLong(obj);
+        return 0;
+    }
     else {
         PyErr_SetString(PyExc_ValueError,
                 "Could not convert object to NumPy timedelta");
diff --git a/numpy/core/src/multiarray/descriptor.c b/numpy/core/src/multiarray/descriptor.c
index 439980877..b9be3c09f 100644
--- a/numpy/core/src/multiarray/descriptor.c
+++ b/numpy/core/src/multiarray/descriptor.c
@@ -10,7 +10,7 @@
 #include "numpy/arrayscalars.h"
 
 #include "npy_config.h"
-
+#include "npy_ctypes.h"
 #include "npy_pycompat.h"
 
 #include "_datetime.h"
@@ -19,6 +19,7 @@
 #include "descriptor.h"
 #include "alloc.h"
 #include "assert.h"
+#include "buffer.h"
 
 /*
  * offset:    A starting offset.
@@ -54,79 +55,46 @@ Borrowed_PyMapping_GetItemString(PyObject *o, char *key)
     return ret;
 }
 
-/*
- * Creates a dtype object from ctypes inputs.
- *
- * Returns a new reference to a dtype object, or NULL
- * if this is not possible. When it returns NULL, it does
- * not set a Python exception.
- */
 static PyArray_Descr *
-_arraydescr_fromctypes(PyObject *obj)
+_arraydescr_from_ctypes_type(PyTypeObject *type)
 {
-    PyObject *dtypedescr;
-    PyArray_Descr *newdescr;
-    int ret;
+    PyObject *_numpy_dtype_ctypes;
+    PyObject *res;
 
-    /* Understand basic ctypes */
-    dtypedescr = PyObject_GetAttrString(obj, "_type_");
-    PyErr_Clear();
-    if (dtypedescr) {
-        ret = PyArray_DescrConverter(dtypedescr, &newdescr);
-        Py_DECREF(dtypedescr);
-        if (ret == NPY_SUCCEED) {
-            PyObject *length;
-            /* Check for ctypes arrays */
-            length = PyObject_GetAttrString(obj, "_length_");
-            PyErr_Clear();
-            if (length) {
-                /* derived type */
-                PyObject *newtup;
-                PyArray_Descr *derived;
-                newtup = Py_BuildValue("N(N)", newdescr, length);
-                ret = PyArray_DescrConverter(newtup, &derived);
-                Py_DECREF(newtup);
-                if (ret == NPY_SUCCEED) {
-                    return derived;
-                }
-                PyErr_Clear();
-                return NULL;
-            }
-            return newdescr;
-        }
-        PyErr_Clear();
+    /* Call the python function of the same name. */
+    _numpy_dtype_ctypes = PyImport_ImportModule("numpy.core._dtype_ctypes");
+    if (_numpy_dtype_ctypes == NULL) {
         return NULL;
     }
-    /* Understand ctypes structures --
-       bit-fields are not supported
-       automatically aligns */
-    dtypedescr = PyObject_GetAttrString(obj, "_fields_");
-    PyErr_Clear();
-    if (dtypedescr) {
-        ret = PyArray_DescrAlignConverter(dtypedescr, &newdescr);
-        Py_DECREF(dtypedescr);
-        if (ret == NPY_SUCCEED) {
-            return newdescr;
-        }
-        PyErr_Clear();
+    res = PyObject_CallMethod(_numpy_dtype_ctypes, "dtype_from_ctypes_type", "O", (PyObject *)type);
+    Py_DECREF(_numpy_dtype_ctypes);
+    if (res == NULL) {
+        return NULL;
     }
 
-    return NULL;
+    /*
+     * sanity check that dtype_from_ctypes_type returned the right type,
+     * since getting it wrong would give segfaults.
+     */
+    if (!PyObject_TypeCheck(res, &PyArrayDescr_Type)) {
+        Py_DECREF(res);
+        PyErr_BadInternalCall();
+        return NULL;
+    }
+
+    return (PyArray_Descr *)res;
 }
 
 /*
- * This function creates a dtype object when:
- *  - The object has a "dtype" attribute, and it can be converted
- *    to a dtype object.
- *  - The object is a ctypes type object, including array
- *    and structure types.
+ * This function creates a dtype object when the object has a "dtype" attribute,
+ * and it can be converted to a dtype object.
  *
  * Returns a new reference to a dtype object, or NULL
  * if this is not possible. When it returns NULL, it does
  * not set a Python exception.
  */
 NPY_NO_EXPORT PyArray_Descr *
-_arraydescr_fromobj(PyObject *obj)
+_arraydescr_from_dtype_attr(PyObject *obj)
 {
     PyObject *dtypedescr;
     PyArray_Descr *newdescr = NULL;
@@ -135,15 +103,18 @@ _arraydescr_fromobj(PyObject *obj)
     /* For arbitrary objects that have a "dtype" attribute */
     dtypedescr = PyObject_GetAttrString(obj, "dtype");
     PyErr_Clear();
-    if (dtypedescr != NULL) {
-        ret = PyArray_DescrConverter(dtypedescr, &newdescr);
-        Py_DECREF(dtypedescr);
-        if (ret == NPY_SUCCEED) {
-            return newdescr;
-        }
+    if (dtypedescr == NULL) {
+        return NULL;
+    }
+
+    ret = PyArray_DescrConverter(dtypedescr, &newdescr);
+    Py_DECREF(dtypedescr);
+    if (ret != NPY_SUCCEED) {
         PyErr_Clear();
+        return NULL;
     }
-    return _arraydescr_fromctypes(obj);
+
+    return newdescr;
 }
 
 /*
@@ -1423,10 +1394,20 @@ PyArray_DescrConverter(PyObject *obj, PyArray_Descr **at)
             check_num = NPY_VOID;
         }
         else {
-            *at = _arraydescr_fromobj(obj);
+            *at = _arraydescr_from_dtype_attr(obj);
             if (*at) {
                 return NPY_SUCCEED;
             }
+
+            /*
+             * Note: this comes after _arraydescr_from_dtype_attr because the ctypes
+             * type might override the dtype if numpy does not otherwise
+             * support it.
+             */
+            if (npy_ctypes_check((PyTypeObject *)obj)) {
+                *at = _arraydescr_from_ctypes_type((PyTypeObject *)obj);
+                return *at ? NPY_SUCCEED : NPY_FAIL;
+            }
         }
         goto finish;
     }
@@ -1596,13 +1577,23 @@ PyArray_DescrConverter(PyObject *obj, PyArray_Descr **at)
         goto fail;
     }
     else {
-        *at = _arraydescr_fromobj(obj);
+        *at = _arraydescr_from_dtype_attr(obj);
         if (*at) {
             return NPY_SUCCEED;
         }
         if (PyErr_Occurred()) {
             return NPY_FAIL;
         }
+
+        /*
+         * Note: this comes after _arraydescr_from_dtype_attr because the ctypes
+         * type might override the dtype if numpy does not otherwise
+         * support it.
+         */
+        if (npy_ctypes_check(Py_TYPE(obj))) {
+            *at = _arraydescr_from_ctypes_type(Py_TYPE(obj));
+            return *at ? NPY_SUCCEED : NPY_FAIL;
+        }
         goto fail;
     }
     if (PyErr_Occurred()) {
@@ -1770,6 +1761,7 @@ arraydescr_dealloc(PyArray_Descr *self)
         Py_INCREF(self);
         return;
     }
+    _dealloc_cached_buffer_info((PyObject*)self);
     Py_XDECREF(self->typeobj);
     Py_XDECREF(self->names);
     Py_XDECREF(self->fields);
diff --git a/numpy/core/src/multiarray/descriptor.h b/numpy/core/src/multiarray/descriptor.h
index 5a3e4b15f..a5f3b8cdf 100644
--- a/numpy/core/src/multiarray/descriptor.h
+++ b/numpy/core/src/multiarray/descriptor.h
@@ -8,7 +8,7 @@ NPY_NO_EXPORT PyObject *
 array_set_typeDict(PyObject *NPY_UNUSED(ignored), PyObject *args);
 
 NPY_NO_EXPORT PyArray_Descr *
-_arraydescr_fromobj(PyObject *obj);
+_arraydescr_from_dtype_attr(PyObject *obj);
 
 
 NPY_NO_EXPORT int
diff --git a/numpy/core/src/multiarray/dtype_transfer.c b/numpy/core/src/multiarray/dtype_transfer.c
index 97d899ce0..2b29d4f8c 100644
--- a/numpy/core/src/multiarray/dtype_transfer.c
+++ b/numpy/core/src/multiarray/dtype_transfer.c
@@ -51,6 +51,20 @@
 #endif
 /**********************************************/
 
+#if NPY_DT_DBG_TRACING
+/*
+ * Thin wrapper around print that ignores exceptions
+ */
+static void
+_safe_print(PyObject *obj)
+{
+    if (PyObject_Print(obj, stdout, 0) < 0) {
+        PyErr_Clear();
+        printf("<error during print>");
+    }
+}
+#endif
+
 /*
  * Returns a transfer function which DECREFs any references in src_type.
  *
@@ -1042,9 +1056,9 @@ get_nbo_cast_datetime_transfer_function(int aligned,
 
 #if NPY_DT_DBG_TRACING
     printf("Dtype transfer from ");
-    PyObject_Print((PyObject *)src_dtype, stdout, 0);
+    _safe_print((PyObject *)src_dtype);
     printf(" to ");
-    PyObject_Print((PyObject *)dst_dtype, stdout, 0);
+    _safe_print((PyObject *)dst_dtype);
     printf("\n");
     printf("has conversion fraction %lld/%lld\n", num, denom);
 #endif
@@ -1089,9 +1103,9 @@ get_nbo_datetime_to_string_transfer_function(int aligned,
 
 #if NPY_DT_DBG_TRACING
     printf("Dtype transfer from ");
-    PyObject_Print((PyObject *)src_dtype, stdout, 0);
+    _safe_print((PyObject *)src_dtype);
     printf(" to ");
-    PyObject_Print((PyObject *)dst_dtype, stdout, 0);
+    _safe_print((PyObject *)dst_dtype);
     printf("\n");
 #endif
 
@@ -1211,9 +1225,9 @@ get_nbo_string_to_datetime_transfer_function(int aligned,
 
 #if NPY_DT_DBG_TRACING
     printf("Dtype transfer from ");
-    PyObject_Print((PyObject *)src_dtype, stdout, 0);
+    _safe_print((PyObject *)src_dtype);
     printf(" to ");
-    PyObject_Print((PyObject *)dst_dtype, stdout, 0);
+    _safe_print((PyObject *)dst_dtype);
     printf("\n");
 #endif
 
@@ -3421,9 +3435,13 @@ PyArray_GetDTypeTransferFunction(int aligned,
 
 #if NPY_DT_DBG_TRACING
     printf("Calculating dtype transfer from ");
-    PyObject_Print((PyObject *)src_dtype, stdout, 0);
+    if (PyObject_Print((PyObject *)src_dtype, stdout, 0) < 0) {
+        return NPY_FAIL;
+    }
     printf(" to ");
-    PyObject_Print((PyObject *)dst_dtype, stdout, 0);
+    if (PyObject_Print((PyObject *)dst_dtype, stdout, 0) < 0) {
+        return NPY_FAIL;
+    }
     printf("\n");
 #endif
 
diff --git a/numpy/core/src/multiarray/getset.c b/numpy/core/src/multiarray/getset.c
index cae4273ff..24962da8a 100644
--- a/numpy/core/src/multiarray/getset.c
+++ b/numpy/core/src/multiarray/getset.c
@@ -20,6 +20,7 @@
 #include "arrayobject.h"
 #include "mem_overlap.h"
 #include "alloc.h"
+#include "buffer.h"
 
 /*******************  array attribute get and set routines ******************/
 
@@ -143,6 +144,7 @@ array_strides_set(PyArrayObject *self, PyObject *obj)
         offset = PyArray_BYTES(self) - (char *)view.buf;
         numbytes = view.len + offset;
         PyBuffer_Release(&view);
+        _dealloc_cached_buffer_info((PyObject*)new);
     }
 #else
     if (PyArray_BASE(new) &&
@@ -376,6 +378,7 @@ array_data_set(PyArrayObject *self, PyObject *op)
      * sticks around after the release.
      */
     PyBuffer_Release(&view);
+    _dealloc_cached_buffer_info(op);
 #else
     if (PyObject_AsWriteBuffer(op, &buf, &buf_len) < 0) {
         PyErr_Clear();
diff --git a/numpy/core/src/multiarray/item_selection.c b/numpy/core/src/multiarray/item_selection.c
index de54ca1b3..a7c6b14f4 100644
--- a/numpy/core/src/multiarray/item_selection.c
+++ b/numpy/core/src/multiarray/item_selection.c
@@ -45,7 +45,7 @@ PyArray_TakeFrom(PyArrayObject *self0, PyObject *indices0, int axis,
 
     indices = NULL;
     self = (PyArrayObject *)PyArray_CheckAxis(self0, &axis,
-                                    NPY_ARRAY_CARRAY);
+                                    NPY_ARRAY_CARRAY_RO);
     if (self == NULL) {
         return NULL;
     }
diff --git a/numpy/core/src/multiarray/iterators.c b/numpy/core/src/multiarray/iterators.c
index 3e3248f53..a3bc8e742 100644
--- a/numpy/core/src/multiarray/iterators.c
+++ b/numpy/core/src/multiarray/iterators.c
@@ -92,114 +92,6 @@ parse_index_entry(PyObject *op, npy_intp *step_size,
 }
 
 
-/*
- * Parses an index that has no fancy indexing. Populates
- * out_dimensions, out_strides, and out_offset.
- */
-NPY_NO_EXPORT int
-parse_index(PyArrayObject *self, PyObject *op,
-            npy_intp *out_dimensions,
-            npy_intp *out_strides,
-            npy_intp *out_offset,
-            int check_index)
-{
-    int i, j, n;
-    int nd_old, nd_new, n_add, n_ellipsis;
-    npy_intp n_steps, start, offset, step_size;
-    PyObject *op1 = NULL;
-    int is_slice;
-
-    if (PySlice_Check(op) || op == Py_Ellipsis || op == Py_None) {
-        n = 1;
-        op1 = op;
-        Py_INCREF(op);
-        /* this relies on the fact that n==1 for loop below */
-        is_slice = 1;
-    }
-    else {
-        if (!PySequence_Check(op)) {
-            PyErr_SetString(PyExc_IndexError,
-                            "index must be either an int "
-                            "or a sequence");
-            return -1;
-        }
-        n = PySequence_Length(op);
-        is_slice = 0;
-    }
-
-    nd_old = nd_new = 0;
-
-    offset = 0;
-    for (i = 0; i < n; i++) {
-        if (!is_slice) {
-            op1 = PySequence_GetItem(op, i);
-            if (op1 == NULL) {
-                return -1;
-            }
-        }
-        start = parse_index_entry(op1, &step_size, &n_steps,
-                                  nd_old < PyArray_NDIM(self) ?
-                                  PyArray_DIMS(self)[nd_old] : 0,
-                                  nd_old, check_index ?
-                                  nd_old < PyArray_NDIM(self) : 0);
-        Py_DECREF(op1);
-        if (start == -1) {
-            break;
-        }
-        if (n_steps == NEWAXIS_INDEX) {
-            out_dimensions[nd_new] = 1;
-            out_strides[nd_new] = 0;
-            nd_new++;
-        }
-        else if (n_steps == ELLIPSIS_INDEX) {
-            for (j = i + 1, n_ellipsis = 0; j < n; j++) {
-                op1 = PySequence_GetItem(op, j);
-                if (op1 == Py_None) {
-                    n_ellipsis++;
-                }
-                Py_DECREF(op1);
-            }
-            n_add = PyArray_NDIM(self)-(n-i-n_ellipsis-1+nd_old);
-            if (n_add < 0) {
-                PyErr_SetString(PyExc_IndexError, "too many indices");
-                return -1;
-            }
-            for (j = 0; j < n_add; j++) {
-                out_dimensions[nd_new] = PyArray_DIMS(self)[nd_old];
-                out_strides[nd_new] = PyArray_STRIDES(self)[nd_old];
-                nd_new++; nd_old++;
-            }
-        }
-        else {
-            if (nd_old >= PyArray_NDIM(self)) {
-                PyErr_SetString(PyExc_IndexError, "too many indices");
-                return -1;
-            }
-            offset += PyArray_STRIDES(self)[nd_old]*start;
-            nd_old++;
-            if (n_steps != SINGLE_INDEX) {
-                out_dimensions[nd_new] = n_steps;
-                out_strides[nd_new] = step_size *
-                                            PyArray_STRIDES(self)[nd_old-1];
-                nd_new++;
-            }
-        }
-    }
-    if (i < n) {
-        return -1;
-    }
-    n_add = PyArray_NDIM(self)-nd_old;
-    for (j = 0; j < n_add; j++) {
-        out_dimensions[nd_new] = PyArray_DIMS(self)[nd_old];
-        out_strides[nd_new] = PyArray_STRIDES(self)[nd_old];
-        nd_new++;
-        nd_old++;
-    }
-    *out_offset = offset;
-    return nd_new;
-}
-
-
 /*********************** Element-wise Array Iterator ***********************/
 /*  Aided by Peter J. Verveer's  nd_image package and numpy's arraymap  ****/
 /*         and Python's array iterator                                   ***/
diff --git a/numpy/core/src/multiarray/iterators.h b/numpy/core/src/multiarray/iterators.h
index 04f57c885..376dc154a 100644
--- a/numpy/core/src/multiarray/iterators.h
+++ b/numpy/core/src/multiarray/iterators.h
@@ -1,17 +1,6 @@
 #ifndef _NPY_ARRAYITERATORS_H_
 #define _NPY_ARRAYITERATORS_H_
 
-/*
- * Parses an index that has no fancy indexing. Populates
- * out_dimensions, out_strides, and out_offset.
- */
-NPY_NO_EXPORT int
-parse_index(PyArrayObject *self, PyObject *op,
-            npy_intp *out_dimensions,
-            npy_intp *out_strides,
-            npy_intp *out_offset,
-            int check_index);
-
 NPY_NO_EXPORT PyObject
 *iter_subscript(PyArrayIterObject *, PyObject *);
 
diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c
index 038c21c92..d371ae762 100644
--- a/numpy/core/src/multiarray/mapping.c
+++ b/numpy/core/src/multiarray/mapping.c
@@ -1128,8 +1128,8 @@ array_boolean_subscript(PyArrayObject *self,
                 1, &size, PyArray_STRIDES(ret), PyArray_BYTES(ret),
                 PyArray_FLAGS(self), (PyObject *)self, (PyObject *)tmp);
 
+        Py_DECREF(tmp);
         if (ret == NULL) {
-            Py_DECREF(tmp);
             return NULL;
         }
     }
diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c
index cb63c7f74..23b0bfd24 100644
--- a/numpy/core/src/multiarray/methods.c
+++ b/numpy/core/src/multiarray/methods.c
@@ -356,6 +356,7 @@ PyArray_GetField(PyArrayObject *self, PyArray_Descr *typed, int offset)
     PyObject *ret = NULL;
     PyObject *safe;
     static PyObject *checkfunc = NULL;
+    int self_elsize, typed_elsize;
 
     /* check that we are not reinterpreting memory containing Objects. */
     if (_may_have_objects(PyArray_DESCR(self)) || _may_have_objects(typed)) {
@@ -373,6 +374,22 @@ PyArray_GetField(PyArrayObject *self, PyArray_Descr *typed, int offset)
         }
         Py_DECREF(safe);
     }
+    self_elsize = PyArray_ITEMSIZE(self);
+    typed_elsize = typed->elsize;
+
+    /* check that values are valid */
+    if (typed_elsize > self_elsize) {
+        PyErr_SetString(PyExc_ValueError, "new type is larger than original type");
+        return NULL;
+    }
+    if (offset < 0) {
+        PyErr_SetString(PyExc_ValueError, "offset is negative");
+        return NULL;
+    }
+    if (offset > self_elsize - typed_elsize) {
+        PyErr_SetString(PyExc_ValueError, "new type plus offset is larger than original type");
+        return NULL;
+    }
 
     ret = PyArray_NewFromDescr_int(
             Py_TYPE(self), typed,
@@ -1602,6 +1619,8 @@ array_reduce(PyArrayObject *self, PyObject *NPY_UNUSED(args))
 
        Notice because Python does not describe a mechanism to write
        raw data to the pickle, this performs a copy to a string first
+       This issue is now adressed in protocol 5, where a buffer is serialized
+       instead of a string,
     */
 
     state = PyTuple_New(5);
@@ -1635,6 +1654,132 @@ array_reduce(PyArrayObject *self, PyObject *NPY_UNUSED(args))
 }
 
 static PyObject *
+array_reduce_ex(PyArrayObject *self, PyObject *args)
+{
+    int protocol;
+    PyObject *ret = NULL, *numeric_mod = NULL, *from_buffer_func = NULL;
+    PyObject *buffer_tuple = NULL, *pickle_module = NULL, *pickle_class = NULL;
+    PyObject *class_args = NULL, *class_args_tuple = NULL, *unused = NULL;
+    PyObject *subclass_array_reduce = NULL;
+    PyObject *buffer = NULL, *transposed_array = NULL;
+    PyArray_Descr *descr = NULL;
+    char order;
+
+    if (PyArg_ParseTuple(args, "i", &protocol)){
+        descr = PyArray_DESCR(self);
+        if ((protocol < 5) ||
+            (!PyArray_IS_C_CONTIGUOUS((PyArrayObject*)self) &&
+             !PyArray_IS_F_CONTIGUOUS((PyArrayObject*)self)) ||
+            PyDataType_FLAGCHK(descr, NPY_ITEM_HASOBJECT) ||
+            (PyType_IsSubtype(((PyObject*)self)->ob_type, &PyArray_Type) &&
+             ((PyObject*)self)->ob_type != &PyArray_Type) ||
+            PyDataType_ISUNSIZED(descr)) {
+            /* The PickleBuffer class from version 5 of the pickle protocol
+             * can only be used for arrays backed by a contiguous data buffer.
+             * For all other cases we fallback to the generic array_reduce
+             * method that involves using a temporary bytes allocation. However
+             * we do not call array_reduce directly but instead lookup and call
+             * the __reduce__ method to make sure that it's possible customize
+             * pickling in sub-classes. */
+            subclass_array_reduce = PyObject_GetAttrString((PyObject *)self,
+                                                           "__reduce__");
+            return PyObject_CallObject(subclass_array_reduce, unused);
+        }
+        else if (protocol == 5){
+            ret = PyTuple_New(2);
+
+            if (ret == NULL) {
+                return NULL;
+            }
+
+            /* if the python version is below 3.8, the pickle module does not provide
+             * built-in support for protocol 5. We try importing the pickle5
+             * backport instead */
+#if PY_VERSION_HEX >= 0x03080000
+            pickle_module = PyImport_ImportModule("pickle");
+#elif PY_VERSION_HEX < 0x03080000 && PY_VERSION_HEX >= 0x03060000
+            pickle_module = PyImport_ImportModule("pickle5");
+            if (pickle_module == NULL){
+                /* for protocol 5, raise a clear ImportError if pickle5 is not found
+                 */
+                PyErr_SetString(PyExc_ImportError, "Using pickle protocol 5 "
+                        "requires the pickle5 module for python versions >=3.6 "
+                        "and <3.8");
+                return NULL;
+            }
+#else
+            PyErr_SetString(PyExc_ValueError, "pickle protocol 5 is not available "
+                                               "for python versions < 3.6");
+            return NULL;
+#endif
+            if (pickle_module == NULL){
+                return NULL;
+            }
+
+            pickle_class = PyObject_GetAttrString(pickle_module,
+                                                  "PickleBuffer");
+
+            class_args_tuple = PyTuple_New(1);
+            if (!PyArray_IS_C_CONTIGUOUS((PyArrayObject*)self) &&
+                PyArray_IS_F_CONTIGUOUS((PyArrayObject*)self)){
+
+                /* if the array if Fortran-contiguous and not C-contiguous,
+                 * the PickleBuffer instance will hold a view on the transpose
+                 * of the initial array, that is C-contiguous. */
+                order = 'F';
+                transposed_array = PyArray_Transpose((PyArrayObject*)self, NULL);
+                PyTuple_SET_ITEM(class_args_tuple, 0, transposed_array);
+            }
+            else {
+                order = 'C';
+                PyTuple_SET_ITEM(class_args_tuple, 0, (PyObject *)self);
+                Py_INCREF(self);
+            }
+
+            class_args = Py_BuildValue("O", class_args_tuple);
+
+            buffer = PyObject_CallObject(pickle_class, class_args);
+
+            numeric_mod = PyImport_ImportModule("numpy.core.numeric");
+            if (numeric_mod == NULL) {
+                Py_DECREF(ret);
+                return NULL;
+            }
+            from_buffer_func = PyObject_GetAttrString(numeric_mod,
+                                                      "_frombuffer");
+            Py_DECREF(numeric_mod);
+
+            Py_INCREF(descr);
+
+            buffer_tuple = PyTuple_New(4);
+            PyTuple_SET_ITEM(buffer_tuple, 0, buffer);
+            PyTuple_SET_ITEM(buffer_tuple, 1, (PyObject *)descr);
+            PyTuple_SET_ITEM(buffer_tuple, 2,
+                             PyObject_GetAttrString((PyObject *)self,
+                                                    "shape"));
+            PyTuple_SET_ITEM(buffer_tuple, 3,
+                             PyUnicode_FromStringAndSize(&order,
+                                                         (Py_ssize_t)1));
+
+            PyTuple_SET_ITEM(ret, 0, from_buffer_func);
+            PyTuple_SET_ITEM(ret, 1, buffer_tuple);
+
+            return ret;
+        }
+        else {
+            PyErr_Format(PyExc_ValueError,
+                         "cannot call __reduce_ex__ with protocol >= %d",
+                         5);
+            return NULL;
+        }
+    }
+    else {
+        return NULL;
+    }
+
+}
+
+static PyObject *
 array_setstate(PyArrayObject *self, PyObject *args)
 {
     PyObject *shape;
@@ -2507,6 +2652,9 @@ NPY_NO_EXPORT PyMethodDef array_methods[] = {
     {"__reduce__",
         (PyCFunction) array_reduce,
         METH_VARARGS, NULL},
+    {"__reduce_ex__",
+        (PyCFunction) array_reduce_ex,
+        METH_VARARGS, NULL},
     {"__setstate__",
         (PyCFunction) array_setstate,
         METH_VARARGS, NULL},
diff --git a/numpy/core/src/multiarray/scalarapi.c b/numpy/core/src/multiarray/scalarapi.c
index 5ef6c0bbf..bc435d1ca 100644
--- a/numpy/core/src/multiarray/scalarapi.c
+++ b/numpy/core/src/multiarray/scalarapi.c
@@ -471,7 +471,7 @@ PyArray_DescrFromTypeObject(PyObject *type)
     /* Do special thing for VOID sub-types */
     if (PyType_IsSubtype((PyTypeObject *)type, &PyVoidArrType_Type)) {
         new = PyArray_DescrNewFromType(NPY_VOID);
-        conv = _arraydescr_fromobj(type);
+        conv = _arraydescr_from_dtype_attr(type);
         if (conv) {
             new->fields = conv->fields;
             Py_INCREF(new->fields);
diff --git a/numpy/core/src/multiarray/scalartypes.c.src b/numpy/core/src/multiarray/scalartypes.c.src
index 6dd8b1a29..0f201b966 100644
--- a/numpy/core/src/multiarray/scalartypes.c.src
+++ b/numpy/core/src/multiarray/scalartypes.c.src
@@ -139,6 +139,7 @@ gentype_alloc(PyTypeObject *type, Py_ssize_t nitems)
 static void
 gentype_dealloc(PyObject *v)
 {
+    _dealloc_cached_buffer_info(v);
     Py_TYPE(v)->tp_free(v);
 }
 
@@ -1858,6 +1859,7 @@ gentype_reduce(PyObject *self, PyObject *NPY_UNUSED(args))
          * sticks around after the release.
          */
         PyBuffer_Release(&view);
+        _dealloc_cached_buffer_info(self);
     }
     else {
         Py_DECREF(ret);
diff --git a/numpy/core/src/multiarray/shape.c b/numpy/core/src/multiarray/shape.c
index 3ac71e285..30820737e 100644
--- a/numpy/core/src/multiarray/shape.c
+++ b/numpy/core/src/multiarray/shape.c
@@ -89,11 +89,19 @@ PyArray_Resize(PyArrayObject *self, PyArray_Dims *newshape, int refcheck,
             return NULL;
         }
 
+        if (PyArray_BASE(self) != NULL
+              || (((PyArrayObject_fields *)self)->weakreflist != NULL)) {
+            PyErr_SetString(PyExc_ValueError,
+                    "cannot resize an array that "
+                    "references or is referenced\n"
+                    "by another array in this way. Use the np.resize function.");
+            return NULL;
+        }
         if (refcheck) {
 #ifdef PYPY_VERSION
             PyErr_SetString(PyExc_ValueError,
                     "cannot resize an array with refcheck=True on PyPy.\n"
-                    "Use the resize function or refcheck=False");
+                    "Use the np.resize function or refcheck=False");
             return NULL;
 #else
             refcnt = PyArray_REFCOUNT(self);
@@ -102,13 +110,12 @@ PyArray_Resize(PyArrayObject *self, PyArray_Dims *newshape, int refcheck,
         else {
             refcnt = 1;
         }
-        if ((refcnt > 2)
-                || (PyArray_BASE(self) != NULL)
-                || (((PyArrayObject_fields *)self)->weakreflist != NULL)) {
+        if (refcnt > 2) {
             PyErr_SetString(PyExc_ValueError,
                     "cannot resize an array that "
                     "references or is referenced\n"
-                    "by another array in this way.  Use the resize function");
+                    "by another array in this way.\n"
+                    "Use the np.resize function or refcheck=False");
             return NULL;
         }
 
diff --git a/numpy/core/src/npymath/ieee754.c.src b/numpy/core/src/npymath/ieee754.c.src
index 8b5eef87a..d960838c8 100644
--- a/numpy/core/src/npymath/ieee754.c.src
+++ b/numpy/core/src/npymath/ieee754.c.src
@@ -568,13 +568,21 @@ int npy_get_floatstatus() {
 
 /*
  * Functions to set the floating point status word.
- * keep in sync with NO_FLOATING_POINT_SUPPORT in ufuncobject.h
  */
 
 #if (defined(__unix__) || defined(unix)) && !defined(USG)
 #include <sys/param.h>
 #endif
 
+
+/*
+ * Define floating point status functions. We must define
+ * npy_get_floatstatus_barrier, npy_clear_floatstatus_barrier,
+ * npy_set_floatstatus_{divbyzero, overflow, underflow, invalid}
+ * for all supported platforms.
+ */
+
+
 /* Solaris --------------------------------------------------------*/
 /* --------ignoring SunOS ieee_flags approach, someone else can
 **         deal with that! */
@@ -626,117 +634,94 @@ void npy_set_floatstatus_invalid(void)
     fpsetsticky(FP_X_INV);
 }
 
+#elif defined(_AIX)
+#include <float.h>
+#include <fpxcp.h>
 
-#elif defined(__GLIBC__) || defined(__APPLE__) || \
-      defined(__CYGWIN__) || defined(__MINGW32__) || \
-      (defined(__FreeBSD__) && (__FreeBSD_version >= 502114))
-#  include <fenv.h>
-
-int npy_get_floatstatus_barrier(char* param)
+int npy_get_floatstatus_barrier(char *param)
 {
-    int fpstatus = fetestexcept(FE_DIVBYZERO | FE_OVERFLOW |
-                                FE_UNDERFLOW | FE_INVALID);
+    int fpstatus = fp_read_flag();
     /*
      * By using a volatile, the compiler cannot reorder this call
      */
     if (param != NULL) {
         volatile char NPY_UNUSED(c) = *(char*)param;
     }
-
-    return ((FE_DIVBYZERO  & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
-           ((FE_OVERFLOW   & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
-           ((FE_UNDERFLOW  & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
-           ((FE_INVALID    & fpstatus) ? NPY_FPE_INVALID : 0);
+    return ((FP_DIV_BY_ZERO & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
+           ((FP_OVERFLOW & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
+           ((FP_UNDERFLOW & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
+           ((FP_INVALID & fpstatus) ? NPY_FPE_INVALID : 0);
 }
 
 int npy_clear_floatstatus_barrier(char * param)
 {
-    /* testing float status is 50-100 times faster than clearing on x86 */
     int fpstatus = npy_get_floatstatus_barrier(param);
-    if (fpstatus != 0) {
-        feclearexcept(FE_DIVBYZERO | FE_OVERFLOW |
-                      FE_UNDERFLOW | FE_INVALID);
-    }
+    fp_swap_flag(0);
 
     return fpstatus;
 }
 
-
 void npy_set_floatstatus_divbyzero(void)
 {
-    feraiseexcept(FE_DIVBYZERO);
+    fp_raise_xcp(FP_DIV_BY_ZERO);
 }
 
 void npy_set_floatstatus_overflow(void)
 {
-    feraiseexcept(FE_OVERFLOW);
+    fp_raise_xcp(FP_OVERFLOW);
 }
 
 void npy_set_floatstatus_underflow(void)
 {
-    feraiseexcept(FE_UNDERFLOW);
+    fp_raise_xcp(FP_UNDERFLOW);
 }
 
 void npy_set_floatstatus_invalid(void)
 {
-    feraiseexcept(FE_INVALID);
-}
-
-#elif defined(_AIX)
-#include <float.h>
-#include <fpxcp.h>
-
-int npy_get_floatstatus_barrier(char *param)
-{
-    int fpstatus = fp_read_flag();
-    /*
-     * By using a volatile, the compiler cannot reorder this call
-     */
-    if (param != NULL) {
-        volatile char NPY_UNUSED(c) = *(char*)param;
-    }
-    return ((FP_DIV_BY_ZERO & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
-           ((FP_OVERFLOW & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
-           ((FP_UNDERFLOW & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
-           ((FP_INVALID & fpstatus) ? NPY_FPE_INVALID : 0);
+    fp_raise_xcp(FP_INVALID);
 }
 
-int npy_clear_floatstatus_barrier(char * param)
-{
-    int fpstatus = npy_get_floatstatus_barrier(param);
-    fp_swap_flag(0);
+#elif defined(_MSC_VER) || (defined(__osf__) && defined(__alpha))
 
-    return fpstatus;
-}
+/*
+ * By using a volatile floating point value,
+ * the compiler is forced to actually do the requested
+ * operations because of potential concurrency.
+ *
+ * We shouldn't write multiple values to a single
+ * global here, because that would cause
+ * a race condition.
+ */
+static volatile double _npy_floatstatus_x,
+    _npy_floatstatus_zero = 0.0, _npy_floatstatus_big = 1e300,
+    _npy_floatstatus_small = 1e-300, _npy_floatstatus_inf;
 
 void npy_set_floatstatus_divbyzero(void)
 {
-    fp_raise_xcp(FP_DIV_BY_ZERO);
+    _npy_floatstatus_x = 1.0 / _npy_floatstatus_zero;
 }
 
 void npy_set_floatstatus_overflow(void)
 {
-    fp_raise_xcp(FP_OVERFLOW);
+    _npy_floatstatus_x = _npy_floatstatus_big * 1e300;
 }
 
 void npy_set_floatstatus_underflow(void)
 {
-    fp_raise_xcp(FP_UNDERFLOW);
+    _npy_floatstatus_x = _npy_floatstatus_small * 1e-300;
 }
 
 void npy_set_floatstatus_invalid(void)
 {
-    fp_raise_xcp(FP_INVALID);
+    _npy_floatstatus_inf = NPY_INFINITY;
+    _npy_floatstatus_x = _npy_floatstatus_inf - NPY_INFINITY;
 }
 
-#else
-
 /* MS Windows -----------------------------------------------------*/
 #if defined(_MSC_VER)
 
 #include <float.h>
 
-
 int npy_get_floatstatus_barrier(char *param)
 {
     /*
@@ -796,53 +781,61 @@ int npy_clear_floatstatus_barrier(char *param)
     return fpstatus;
 }
 
+#endif
+/* End of defined(_MSC_VER) || (defined(__osf__) && defined(__alpha)) */
+
 #else
+/* General GCC code, should work on most platforms */
+#  include <fenv.h>
 
-int npy_get_floatstatus_barrier(char *NPY_UNUSED(param))
+int npy_get_floatstatus_barrier(char* param)
 {
-    return 0;
+    int fpstatus = fetestexcept(FE_DIVBYZERO | FE_OVERFLOW |
+                                FE_UNDERFLOW | FE_INVALID);
+    /*
+     * By using a volatile, the compiler cannot reorder this call
+     */
+    if (param != NULL) {
+        volatile char NPY_UNUSED(c) = *(char*)param;
+    }
+
+    return ((FE_DIVBYZERO  & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
+           ((FE_OVERFLOW   & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
+           ((FE_UNDERFLOW  & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
+           ((FE_INVALID    & fpstatus) ? NPY_FPE_INVALID : 0);
 }
 
-int npy_clear_floatstatus_barrier(char *param)
+int npy_clear_floatstatus_barrier(char * param)
 {
+    /* testing float status is 50-100 times faster than clearing on x86 */
     int fpstatus = npy_get_floatstatus_barrier(param);
-    return 0;
-}
+    if (fpstatus != 0) {
+        feclearexcept(FE_DIVBYZERO | FE_OVERFLOW |
+                      FE_UNDERFLOW | FE_INVALID);
+    }
 
-#endif
+    return fpstatus;
+}
 
-/*
- * By using a volatile floating point value,
- * the compiler is forced to actually do the requested
- * operations because of potential concurrency.
- *
- * We shouldn't write multiple values to a single
- * global here, because that would cause
- * a race condition.
- */
-static volatile double _npy_floatstatus_x,
-    _npy_floatstatus_zero = 0.0, _npy_floatstatus_big = 1e300,
-    _npy_floatstatus_small = 1e-300, _npy_floatstatus_inf;
 
 void npy_set_floatstatus_divbyzero(void)
 {
-    _npy_floatstatus_x = 1.0 / _npy_floatstatus_zero;
+    feraiseexcept(FE_DIVBYZERO);
 }
 
 void npy_set_floatstatus_overflow(void)
 {
-    _npy_floatstatus_x = _npy_floatstatus_big * 1e300;
+    feraiseexcept(FE_OVERFLOW);
 }
 
 void npy_set_floatstatus_underflow(void)
 {
-    _npy_floatstatus_x = _npy_floatstatus_small * 1e-300;
+    feraiseexcept(FE_UNDERFLOW);
 }
 
 void npy_set_floatstatus_invalid(void)
 {
-    _npy_floatstatus_inf = NPY_INFINITY;
-    _npy_floatstatus_x = _npy_floatstatus_inf - NPY_INFINITY;
+    feraiseexcept(FE_INVALID);
 }
 
 #endif
diff --git a/numpy/core/src/umath/_umath_tests.c.src b/numpy/core/src/umath/_umath_tests.c.src
index fcbdbe330..8cb74f177 100644
--- a/numpy/core/src/umath/_umath_tests.c.src
+++ b/numpy/core/src/umath/_umath_tests.c.src
@@ -128,6 +128,8 @@ static void
 /**end repeat**/
 
 char *matrix_multiply_signature = "(m,n),(n,p)->(m,p)";
+/* for use with matrix_multiply code, but different signature */
+char *matmul_signature = "(m?,n),(n,p?)->(m?,p?)";
 
 /**begin repeat
 
@@ -195,6 +197,45 @@ static void
 
 /**end repeat**/
 
+char *cross1d_signature = "(3),(3)->(3)";
+
+/**begin repeat
+
+   #TYPE=LONG,DOUBLE#
+   #typ=npy_long, npy_double#
+*/
+
+/*
+ *  This implements the cross product:
+ *        out[n, 0] = in1[n, 1]*in2[n, 2] - in1[n, 2]*in2[n, 1]
+ *        out[n, 1] = in1[n, 2]*in2[n, 0] - in1[n, 0]*in2[n, 2]
+ *        out[n, 2] = in1[n, 0]*in2[n, 1] - in1[n, 1]*in2[n, 0]
+ */
+static void
+@TYPE@_cross1d(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
+{
+    INIT_OUTER_LOOP_3
+    npy_intp is1=steps[0], is2=steps[1], os = steps[2];
+    BEGIN_OUTER_LOOP_3
+        @typ@ i1_x = *(@typ@ *)(args[0] + 0*is1);
+        @typ@ i1_y = *(@typ@ *)(args[0] + 1*is1);
+        @typ@ i1_z = *(@typ@ *)(args[0] + 2*is1);
+
+        @typ@ i2_x = *(@typ@ *)(args[1] + 0*is2);
+        @typ@ i2_y = *(@typ@ *)(args[1] + 1*is2);
+        @typ@ i2_z = *(@typ@ *)(args[1] + 2*is2);
+        char *op = args[2];
+
+        *(@typ@ *)op = i1_y * i2_z - i1_z * i2_y;
+        op += os;
+        *(@typ@ *)op = i1_z * i2_x - i1_x * i2_z;
+        op += os;
+        *(@typ@ *)op = i1_x * i2_y - i1_y * i2_x;
+    END_OUTER_LOOP
+}
+
+/**end repeat**/
+
 char *euclidean_pdist_signature = "(n,d)->(p)";
 
 /**begin repeat
@@ -285,17 +326,39 @@ static void
 
 /**end repeat**/
 
+/*  The following lines were generated using a slightly modified
+    version of code_generators/generate_umath.py and adding these
+    lines to defdict:
+
+defdict = {
+'inner1d' :
+    Ufunc(2, 1, None_,
+        r'''inner on the last dimension and broadcast on the rest \n"
+        "     \"(i),(i)->()\" \n''',
+        TD('ld'),
+        ),
+'innerwt' :
+    Ufunc(3, 1, None_,
+        r'''inner1d with a weight argument \n"
+        "     \"(i),(i),(i)->()\" \n''',
+        TD('ld'),
+        ),
+}
+
+*/
 
 static PyUFuncGenericFunction inner1d_functions[] = { LONG_inner1d, DOUBLE_inner1d };
-static void * inner1d_data[] = { (void *)NULL, (void *)NULL };
+static void *inner1d_data[] = { (void *)NULL, (void *)NULL };
 static char inner1d_signatures[] = { NPY_LONG, NPY_LONG, NPY_LONG, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE };
 static PyUFuncGenericFunction innerwt_functions[] = { LONG_innerwt, DOUBLE_innerwt };
-static void * innerwt_data[] = { (void *)NULL, (void *)NULL };
+static void *innerwt_data[] = { (void *)NULL, (void *)NULL };
 static char innerwt_signatures[] = { NPY_LONG, NPY_LONG, NPY_LONG, NPY_LONG, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE };
 static PyUFuncGenericFunction matrix_multiply_functions[] = { LONG_matrix_multiply, FLOAT_matrix_multiply, DOUBLE_matrix_multiply };
 static void *matrix_multiply_data[] = { (void *)NULL, (void *)NULL, (void *)NULL };
 static char matrix_multiply_signatures[] = { NPY_LONG, NPY_LONG, NPY_LONG,  NPY_FLOAT, NPY_FLOAT, NPY_FLOAT,  NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE };
-
+static PyUFuncGenericFunction cross1d_functions[] = { LONG_cross1d, DOUBLE_cross1d };
+static void *cross1d_data[] = { (void *)NULL, (void *)NULL };
+static char cross1d_signatures[] = { NPY_LONG, NPY_LONG, NPY_LONG, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE };
 static PyUFuncGenericFunction euclidean_pdist_functions[] =
                             { FLOAT_euclidean_pdist, DOUBLE_euclidean_pdist };
 static void *eucldiean_pdist_data[] = { (void *)NULL, (void *)NULL };
@@ -303,7 +366,7 @@ static char euclidean_pdist_signatures[] = { NPY_FLOAT, NPY_FLOAT,
                                              NPY_DOUBLE, NPY_DOUBLE };
 
 static PyUFuncGenericFunction cumsum_functions[] = { LONG_cumsum, DOUBLE_cumsum };
-static void * cumsum_data[] = { (void *)NULL, (void *)NULL };
+static void *cumsum_data[] = { (void *)NULL, (void *)NULL };
 static char cumsum_signatures[] = { NPY_LONG, NPY_LONG, NPY_DOUBLE, NPY_DOUBLE };
 
 
@@ -346,6 +409,17 @@ addUfuncs(PyObject *dictionary) {
     }
     PyDict_SetItemString(dictionary, "matrix_multiply", f);
     Py_DECREF(f);
+    f = PyUFunc_FromFuncAndDataAndSignature(matrix_multiply_functions,
+                    matrix_multiply_data, matrix_multiply_signatures,
+                    3, 2, 1, PyUFunc_None, "matmul",
+                    "matmul on last two dimensions, with some being optional\n"
+                    "     \"(m?,n),(n,p?)->(m?,p?)\" \n",
+                    0, matmul_signature);
+    if (f == NULL) {
+        return -1;
+    }
+    PyDict_SetItemString(dictionary, "matmul", f);
+    Py_DECREF(f);
     f = PyUFunc_FromFuncAndDataAndSignature(euclidean_pdist_functions,
                     eucldiean_pdist_data, euclidean_pdist_signatures,
                     2, 1, 1, PyUFunc_None, "euclidean_pdist",
@@ -376,6 +450,16 @@ addUfuncs(PyObject *dictionary) {
     }
     PyDict_SetItemString(dictionary, "inner1d_no_doc", f);
     Py_DECREF(f);
+    f = PyUFunc_FromFuncAndDataAndSignature(cross1d_functions, cross1d_data,
+                    cross1d_signatures, 2, 2, 1, PyUFunc_None, "cross1d",
+                    "cross product on the last dimension and broadcast on the rest \n"\
+                    "     \"(3),(3)->(3)\" \n",
+                    0, cross1d_signature);
+    if (f == NULL) {
+        return -1;
+    }
+    PyDict_SetItemString(dictionary, "cross1d", f);
+    Py_DECREF(f);
 
     return 0;
 }
@@ -385,9 +469,10 @@ static PyObject *
 UMath_Tests_test_signature(PyObject *NPY_UNUSED(dummy), PyObject *args)
 {
     int nin, nout, i;
-    PyObject *signature, *sig_str;
-    PyUFuncObject *f = NULL;
-    PyObject *core_num_dims = NULL, *core_dim_ixs = NULL;
+    PyObject *signature=NULL, *sig_str=NULL;
+    PyUFuncObject *f=NULL;
+    PyObject *core_num_dims=NULL, *core_dim_ixs=NULL;
+    PyObject *core_dim_flags=NULL, *core_dim_sizes=NULL;
     int core_enabled;
     int core_num_ixs = 0;
 
@@ -442,7 +527,7 @@ UMath_Tests_test_signature(PyObject *NPY_UNUSED(dummy), PyObject *args)
             goto fail;
         }
         for (i = 0; i < core_num_ixs; i++) {
-            PyObject * val = PyLong_FromLong(f->core_dim_ixs[i]);
+            PyObject *val = PyLong_FromLong(f->core_dim_ixs[i]);
             PyTuple_SET_ITEM(core_dim_ixs, i, val);
         }
     }
@@ -450,13 +535,44 @@ UMath_Tests_test_signature(PyObject *NPY_UNUSED(dummy), PyObject *args)
         Py_INCREF(Py_None);
         core_dim_ixs = Py_None;
     }
+    if (f->core_dim_flags != NULL) {
+        core_dim_flags = PyTuple_New(f->core_num_dim_ix);
+        if (core_dim_flags == NULL) {
+            goto fail;
+        }
+        for (i = 0; i < f->core_num_dim_ix; i++) {
+            PyObject *val = PyLong_FromLong(f->core_dim_flags[i]);
+            PyTuple_SET_ITEM(core_dim_flags, i, val);
+        }
+    }
+    else {
+        Py_INCREF(Py_None);
+        core_dim_flags = Py_None;
+    }
+    if (f->core_dim_sizes != NULL) {
+        core_dim_sizes = PyTuple_New(f->core_num_dim_ix);
+        if (core_dim_sizes == NULL) {
+            goto fail;
+        }
+        for (i = 0; i < f->core_num_dim_ix; i++) {
+            PyObject *val = PyLong_FromLong(f->core_dim_sizes[i]);
+            PyTuple_SET_ITEM(core_dim_sizes, i, val);
+        }
+    }
+    else {
+        Py_INCREF(Py_None);
+        core_dim_sizes = Py_None;
+    }
     Py_DECREF(f);
-    return Py_BuildValue("iOO", core_enabled, core_num_dims, core_dim_ixs);
+    return Py_BuildValue("iOOOO", core_enabled, core_num_dims,
+                         core_dim_ixs, core_dim_flags, core_dim_sizes);
 
 fail:
     Py_XDECREF(f);
     Py_XDECREF(core_num_dims);
     Py_XDECREF(core_dim_ixs);
+    Py_XDECREF(core_dim_flags);
+    Py_XDECREF(core_dim_sizes);
     return NULL;
 }
 
@@ -464,8 +580,8 @@ static PyMethodDef UMath_TestsMethods[] = {
     {"test_signature",  UMath_Tests_test_signature, METH_VARARGS,
      "Test signature parsing of ufunc. \n"
      "Arguments: nin nout signature \n"
-     "If fails, it returns NULL. Otherwise it will returns 0 for scalar ufunc "
-     "and 1 for generalized ufunc. \n",
+     "If fails, it returns NULL. Otherwise it returns a tuple of ufunc "
+     "internals. \n",
      },
     {NULL, NULL, 0, NULL}        /* Sentinel */
 };
@@ -504,6 +620,7 @@ PyMODINIT_FUNC init_umath_tests(void) {
     if (m == NULL) {
         return RETVAL(NULL);
     }
+
     import_array();
     import_ufunc();
 
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
index e62942efd..f96e621b8 100644
--- a/numpy/core/src/umath/loops.c.src
+++ b/numpy/core/src/umath/loops.c.src
@@ -1591,6 +1591,34 @@ TIMEDELTA_mm_d_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *
     }
 }
 
+NPY_NO_EXPORT void
+TIMEDELTA_mm_m_remainder(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
+{
+    BINARY_LOOP {
+        const npy_timedelta in1 = *(npy_timedelta *)ip1;
+        const npy_timedelta in2 = *(npy_timedelta *)ip2;
+        if (in1 == NPY_DATETIME_NAT || in2 == NPY_DATETIME_NAT) {
+            *((npy_timedelta *)op1) = NPY_DATETIME_NAT;
+        }
+        else {
+            if (in2 == 0) {
+                npy_set_floatstatus_divbyzero();
+                *((npy_timedelta *)op1) = 0;
+            }
+            else {
+                /* handle mixed case the way Python does */
+                const npy_timedelta rem = in1 % in2;
+                if ((in1 > 0) == (in2 > 0) || rem == 0) {
+                    *((npy_timedelta *)op1) = rem;
+                }
+                else {
+                    *((npy_timedelta *)op1) = rem + in2;
+                }
+            }
+        }
+    }
+}
+
 /*
  *****************************************************************************
  **                             FLOAT LOOPS                                 **
@@ -1833,10 +1861,7 @@ NPY_NO_EXPORT void
         if (!run_unary_reduce_simd_@kind@_@TYPE@(args, dimensions, steps)) {
             BINARY_REDUCE_LOOP(@type@) {
                 const @type@ in2 = *(@type@ *)ip2;
-                io1 = (io1 @OP@ in2 || npy_isnan(io1)) ? io1 : in2;
-            }
-            if (npy_isnan(io1)) {
-                npy_set_floatstatus_invalid();
+                io1 = (npy_isnan(io1) || io1 @OP@ in2) ? io1 : in2;
             }
             *((@type@ *)iop1) = io1;
         }
@@ -1845,13 +1870,11 @@ NPY_NO_EXPORT void
         BINARY_LOOP {
             @type@ in1 = *(@type@ *)ip1;
             const @type@ in2 = *(@type@ *)ip2;
-            in1 = (in1 @OP@ in2 || npy_isnan(in1)) ? in1 : in2;
-            if (npy_isnan(in1)) {
-                npy_set_floatstatus_invalid();
-            }
+            in1 = (npy_isnan(in1) || in1 @OP@ in2) ? in1 : in2;
             *((@type@ *)op1) = in1;
         }
     }
+    npy_clear_floatstatus_barrier((char*)dimensions);
 }
 /**end repeat1**/
 
@@ -1866,7 +1889,7 @@ NPY_NO_EXPORT void
     if (IS_BINARY_REDUCE) {
         BINARY_REDUCE_LOOP(@type@) {
             const @type@ in2 = *(@type@ *)ip2;
-            io1 = (io1 @OP@ in2 || npy_isnan(in2)) ? io1 : in2;
+            io1 = (npy_isnan(in2) || io1 @OP@ in2) ? io1 : in2;
         }
         *((@type@ *)iop1) = io1;
     }
@@ -1874,7 +1897,7 @@ NPY_NO_EXPORT void
         BINARY_LOOP {
             const @type@ in1 = *(@type@ *)ip1;
             const @type@ in2 = *(@type@ *)ip2;
-            *((@type@ *)op1) = (in1 @OP@ in2 || npy_isnan(in2)) ? in1 : in2;
+            *((@type@ *)op1) = (npy_isnan(in2) || in1 @OP@ in2) ? in1 : in2;
         }
     }
     npy_clear_floatstatus_barrier((char*)dimensions);
@@ -2195,14 +2218,11 @@ HALF_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED
 {
     /*  */
     BINARY_LOOP {
-        npy_half in1 = *(npy_half *)ip1;
+        const npy_half in1 = *(npy_half *)ip1;
         const npy_half in2 = *(npy_half *)ip2;
-        in1 = (@OP@(in1, in2) || npy_half_isnan(in1)) ? in1 : in2;
-        if (npy_half_isnan(in1)) {
-            npy_set_floatstatus_invalid();
-        } 
-        *((npy_half *)op1) = in1;
+        *((npy_half *)op1) = (@OP@(in1, in2) || npy_half_isnan(in1)) ? in1 : in2;
     }
+    /* npy_half_isnan will never set floatstatus_invalid, so do not clear */
 }
 /**end repeat**/
 
@@ -2219,7 +2239,7 @@ HALF_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED
         const npy_half in2 = *(npy_half *)ip2;
         *((npy_half *)op1) = (@OP@(in1, in2) || npy_half_isnan(in2)) ? in1 : in2;
     }
-    npy_clear_floatstatus_barrier((char*)dimensions);
+    /* npy_half_isnan will never set floatstatus_invalid, so do not clear */
 }
 /**end repeat**/
 
@@ -2761,16 +2781,14 @@ NPY_NO_EXPORT void
         @ftype@ in1i = ((@ftype@ *)ip1)[1];
         const @ftype@ in2r = ((@ftype@ *)ip2)[0];
         const @ftype@ in2i = ((@ftype@ *)ip2)[1];
-        if ( !(@OP@(in1r, in1i, in2r, in2i) || npy_isnan(in1r) || npy_isnan(in1i))) {
+        if ( !(npy_isnan(in1r) || npy_isnan(in1i) || @OP@(in1r, in1i, in2r, in2i))) {
             in1r = in2r;
             in1i = in2i;
         }
-        if (npy_isnan(in1r) || npy_isnan(in1i)) { 
-            npy_set_floatstatus_invalid();
-        }
         ((@ftype@ *)op1)[0] = in1r;
         ((@ftype@ *)op1)[1] = in1i;
     }
+    npy_clear_floatstatus_barrier((char*)dimensions);
 }
 /**end repeat1**/
 
@@ -2786,7 +2804,7 @@ NPY_NO_EXPORT void
         const @ftype@ in1i = ((@ftype@ *)ip1)[1];
         const @ftype@ in2r = ((@ftype@ *)ip2)[0];
         const @ftype@ in2i = ((@ftype@ *)ip2)[1];
-        if (@OP@(in1r, in1i, in2r, in2i) || npy_isnan(in2r) || npy_isnan(in2i)) {
+        if (npy_isnan(in2r) || npy_isnan(in2i) || @OP@(in1r, in1i, in2r, in2i)) {
             ((@ftype@ *)op1)[0] = in1r;
             ((@ftype@ *)op1)[1] = in1i;
         }
@@ -2795,6 +2813,7 @@ NPY_NO_EXPORT void
             ((@ftype@ *)op1)[1] = in2i;
         }
     }
+    npy_clear_floatstatus_barrier((char*)dimensions);
 }
 /**end repeat1**/
 
diff --git a/numpy/core/src/umath/loops.h.src b/numpy/core/src/umath/loops.h.src
index 5c2b2c22c..9b6327308 100644
--- a/numpy/core/src/umath/loops.h.src
+++ b/numpy/core/src/umath/loops.h.src
@@ -473,6 +473,9 @@ TIMEDELTA_md_m_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *
 NPY_NO_EXPORT void
 TIMEDELTA_mm_d_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func));
 
+NPY_NO_EXPORT void
+TIMEDELTA_mm_m_remainder(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func));
+
 /* Special case equivalents to above functions */
 
 #define TIMEDELTA_mq_m_true_divide TIMEDELTA_mq_m_divide
diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src
index 5c0568c12..da0713b2b 100644
--- a/numpy/core/src/umath/simd.inc.src
+++ b/numpy/core/src/umath/simd.inc.src
@@ -17,8 +17,6 @@
 
 #include "lowlevel_strided_loops.h"
 #include "numpy/npy_common.h"
-/* for NO_FLOATING_POINT_SUPPORT */
-#include "numpy/ufuncobject.h"
 #include "numpy/npy_math.h"
 #ifdef NPY_HAVE_SSE2_INTRINSICS
 #include <emmintrin.h>
@@ -132,7 +130,6 @@ abs_ptrdiff(char *a, char *b)
  * #func = sqrt, absolute, negative, minimum, maximum#
  * #check = IS_BLOCKABLE_UNARY*3, IS_BLOCKABLE_REDUCE*2 #
  * #name = unary*3, unary_reduce*2#
- * #minmax = 0*3, 1*2#
  */
 
 #if @vector@ && defined NPY_HAVE_SSE2_INTRINSICS
@@ -146,9 +143,6 @@ sse2_@func@_@TYPE@(@type@ *, @type@ *, const npy_intp n);
 static NPY_INLINE int
 run_@name@_simd_@func@_@TYPE@(char **args, npy_intp *dimensions, npy_intp *steps)
 {
-#if @minmax@ && (defined NO_FLOATING_POINT_SUPPORT)
-    return 0;
-#else
 #if @vector@ && defined NPY_HAVE_SSE2_INTRINSICS
     if (@check@(sizeof(@type@), 16)) {
         sse2_@func@_@TYPE@((@type@*)args[1], (@type@*)args[0], dimensions[0]);
@@ -156,7 +150,6 @@ run_@name@_simd_@func@_@TYPE@(char **args, npy_intp *dimensions, npy_intp *steps
     }
 #endif
     return 0;
-#endif
 }
 
 /**end repeat1**/
@@ -1021,7 +1014,7 @@ sse2_@kind@_@TYPE@(@type@ * ip, @type@ * op, const npy_intp n)
 {
     const npy_intp stride = 16 / (npy_intp)sizeof(@type@);
     LOOP_BLOCK_ALIGN_VAR(ip, @type@, 16) {
-        *op = (*op @OP@ ip[i] || npy_isnan(*op)) ? *op : ip[i];
+        *op = (npy_isnan(*op) || *op @OP@ ip[i]) ? *op : ip[i];
     }
     assert(n < (stride) || npy_is_aligned(&ip[i], 16));
     if (i + 3 * stride <= n) {
@@ -1045,15 +1038,13 @@ sse2_@kind@_@TYPE@(@type@ * ip, @type@ * op, const npy_intp n)
         }
         else {
             @type@ tmp = sse2_horizontal_@VOP@_@vtype@(c1);
-            *op  = (*op @OP@ tmp || npy_isnan(*op)) ? *op : tmp;
+            *op  = (npy_isnan(*op) || *op @OP@ tmp) ? *op : tmp;
         }
     }
     LOOP_BLOCKED_END {
-        *op  = (*op @OP@ ip[i] || npy_isnan(*op)) ? *op : ip[i];
-    }
-    if (npy_isnan(*op)) {
-        npy_set_floatstatus_invalid();
+        *op  = (npy_isnan(*op) || *op @OP@ ip[i]) ? *op : ip[i];
     }
+    npy_clear_floatstatus_barrier((char*)op);
 }
 /**end repeat1**/
 
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index 459b0a594..8fb731fb7 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -46,6 +46,7 @@
 #include "npy_import.h"
 #include "extobj.h"
 #include "common.h"
+#include "numpyos.h"
 
 /********** PRINTF DEBUG TRACING **************/
 #define NPY_UF_DBG_TRACING 0
@@ -480,7 +481,27 @@ _is_alnum_underscore(char ch)
 }
 
 /*
- * Return the ending position of a variable name
+ * Convert a string into a number
+ */
+static npy_intp
+_get_size(const char* str)
+{
+    char *stop;
+    npy_longlong size = NumPyOS_strtoll(str, &stop, 10);
+
+    if (stop == str || _is_alpha_underscore(*stop)) {
+        /* not a well formed number */
+         return -1;
+    }
+    if (size >= NPY_MAX_INTP || size <= NPY_MIN_INTP) {
+        /* len(str) too long */
+        return -1;
+    }
+    return size;
+ }
+
+/*
+ * Return the ending position of a variable name including optional modifier
  */
 static int
 _get_end_of_name(const char* str, int offset)
@@ -489,6 +510,9 @@ _get_end_of_name(const char* str, int offset)
     while (_is_alnum_underscore(str[ret])) {
         ret++;
     }
+    if (str[ret] == '?') {
+        ret ++;
+    }
     return ret;
 }
 
@@ -530,7 +554,6 @@ _parse_signature(PyUFuncObject *ufunc, const char *signature)
                         "_parse_signature with NULL signature");
         return -1;
     }
-
     len = strlen(signature);
     ufunc->core_signature = PyArray_malloc(sizeof(char) * (len+1));
     if (ufunc->core_signature) {
@@ -546,13 +569,22 @@ _parse_signature(PyUFuncObject *ufunc, const char *signature)
     ufunc->core_enabled = 1;
     ufunc->core_num_dim_ix = 0;
     ufunc->core_num_dims = PyArray_malloc(sizeof(int) * ufunc->nargs);
-    ufunc->core_dim_ixs = PyArray_malloc(sizeof(int) * len); /* shrink this later */
     ufunc->core_offsets = PyArray_malloc(sizeof(int) * ufunc->nargs);
-    if (ufunc->core_num_dims == NULL || ufunc->core_dim_ixs == NULL
-        || ufunc->core_offsets == NULL) {
+    /* The next three items will be shrunk later */
+    ufunc->core_dim_ixs = PyArray_malloc(sizeof(int) * len);
+    ufunc->core_dim_sizes = PyArray_malloc(sizeof(npy_intp) * len);
+    ufunc->core_dim_flags = PyArray_malloc(sizeof(npy_uint32) * len);
+
+    if (ufunc->core_num_dims == NULL || ufunc->core_dim_ixs == NULL ||
+        ufunc->core_offsets == NULL ||
+        ufunc->core_dim_sizes == NULL ||
+        ufunc->core_dim_flags == NULL) {
         PyErr_NoMemory();
         goto fail;
     }
+    for (i = 0; i < len; i++) {
+        ufunc->core_dim_flags[i] = 0;
+    }
 
     i = _next_non_white_space(signature, 0);
     while (signature[i] != '\0') {
@@ -577,26 +609,70 @@ _parse_signature(PyUFuncObject *ufunc, const char *signature)
         i = _next_non_white_space(signature, i + 1);
         while (signature[i] != ')') {
             /* loop over core dimensions */
-            int j = 0;
-            if (!_is_alpha_underscore(signature[i])) {
-                parse_error = "expect dimension name";
+            int ix, i_end;
+            npy_intp frozen_size;
+            npy_bool can_ignore;
+
+            if (signature[i] == '\0') {
+                parse_error = "unexpected end of signature string";
                 goto fail;
             }
-            while (j < ufunc->core_num_dim_ix) {
-                if (_is_same_name(signature+i, var_names[j])) {
+            /*
+             * Is this a variable or a fixed size dimension?
+             */
+            if (_is_alpha_underscore(signature[i])) {
+                frozen_size = -1;
+            }
+            else {
+                frozen_size = (npy_intp)_get_size(signature + i);
+                if (frozen_size <= 0) {
+                    parse_error = "expect dimension name or non-zero frozen size";
+                    goto fail;
+                }
+            }
+            /* Is this dimension flexible? */
+            i_end = _get_end_of_name(signature, i);
+            can_ignore = (i_end > 0 && signature[i_end - 1] == '?');
+            /*
+             * Determine whether we already saw this dimension name,
+             * get its index, and set its properties
+             */
+            for(ix = 0; ix < ufunc->core_num_dim_ix; ix++) {
+                if (frozen_size > 0 ?
+                    frozen_size == ufunc->core_dim_sizes[ix] :
+                    _is_same_name(signature + i, var_names[ix])) {
                     break;
                 }
-                j++;
             }
-            if (j >= ufunc->core_num_dim_ix) {
-                var_names[j] = signature+i;
+            /*
+             * If a new dimension, store its properties; if old, check consistency.
+             */
+            if (ix == ufunc->core_num_dim_ix) {
                 ufunc->core_num_dim_ix++;
+                var_names[ix] = signature + i;
+                ufunc->core_dim_sizes[ix] = frozen_size;
+                if (frozen_size < 0) {
+                    ufunc->core_dim_flags[ix] |= UFUNC_CORE_DIM_SIZE_INFERRED;
+                }
+                if (can_ignore) {
+                    ufunc->core_dim_flags[ix] |= UFUNC_CORE_DIM_CAN_IGNORE;
+                }
+            } else {
+                if (can_ignore && !(ufunc->core_dim_flags[ix] &
+                                    UFUNC_CORE_DIM_CAN_IGNORE)) {
+                    parse_error = "? cannot be used, name already seen without ?";
+                    goto fail;
+                }
+                if (!can_ignore && (ufunc->core_dim_flags[ix] &
+                                    UFUNC_CORE_DIM_CAN_IGNORE)) {
+                    parse_error = "? must be used, name already seen with ?";
+                    goto fail;
+                }
             }
-            ufunc->core_dim_ixs[cur_core_dim] = j;
+            ufunc->core_dim_ixs[cur_core_dim] = ix;
             cur_core_dim++;
             nd++;
-            i = _get_end_of_name(signature, i);
-            i = _next_non_white_space(signature, i);
+            i = _next_non_white_space(signature, i_end);
             if (signature[i] != ',' && signature[i] != ')') {
                 parse_error = "expect ',' or ')'";
                 goto fail;
@@ -633,7 +709,14 @@ _parse_signature(PyUFuncObject *ufunc, const char *signature)
         goto fail;
     }
     ufunc->core_dim_ixs = PyArray_realloc(ufunc->core_dim_ixs,
-            sizeof(int)*cur_core_dim);
+            sizeof(int) * cur_core_dim);
+    ufunc->core_dim_sizes = PyArray_realloc(
+            ufunc->core_dim_sizes,
+            sizeof(npy_intp) * ufunc->core_num_dim_ix);
+    ufunc->core_dim_flags = PyArray_realloc(
+            ufunc->core_dim_flags,
+            sizeof(npy_uint32) * ufunc->core_num_dim_ix);
+
     /* check for trivial core-signature, e.g. "(),()->()" */
     if (cur_core_dim == 0) {
         ufunc->core_enabled = 0;
@@ -664,7 +747,7 @@ _set_out_array(PyObject *obj, PyArrayObject **store)
         /* Translate None to NULL */
         return 0;
     }
-    if PyArray_Check(obj) {
+    if (PyArray_Check(obj)) {
         /* If it's an array, store it */
         if (PyArray_FailUnlessWriteable((PyArrayObject *)obj,
                                         "output array") < 0) {
@@ -1935,6 +2018,72 @@ fail:
 }
 
 /*
+ * Validate that operands have enough dimensions, accounting for
+ * possible flexible dimensions that may be absent.
+ */
+static int
+_validate_num_dims(PyUFuncObject *ufunc, PyArrayObject **op,
+                   npy_uint32 *core_dim_flags,
+                   int *op_core_num_dims) {
+    int i, j;
+    int nin = ufunc->nin;
+    int nop = ufunc->nargs;
+
+    for (i = 0; i < nop; i++) {
+        if (op[i] != NULL) {
+            int op_ndim = PyArray_NDIM(op[i]);
+
+            if (op_ndim < op_core_num_dims[i]) {
+                int core_offset = ufunc->core_offsets[i];
+                /* We've too few, but some dimensions might be flexible */
+                for (j = core_offset;
+                     j < core_offset + ufunc->core_num_dims[i]; j++) {
+                    int core_dim_index = ufunc->core_dim_ixs[j];
+                    if ((core_dim_flags[core_dim_index] &
+                         UFUNC_CORE_DIM_CAN_IGNORE)) {
+                        int i1, j1, k;
+                        /*
+                         * Found a dimension that can be ignored. Flag that
+                         * it is missing, and unflag that it can be ignored,
+                         * since we are doing so already.
+                         */
+                        core_dim_flags[core_dim_index] |= UFUNC_CORE_DIM_MISSING;
+                        core_dim_flags[core_dim_index] ^= UFUNC_CORE_DIM_CAN_IGNORE;
+                        /*
+                         * Reduce the number of core dimensions for all
+                         * operands that use this one (including ours),
+                         * and check whether we're now OK.
+                         */
+                        for (i1 = 0, k=0; i1 < nop; i1++) {
+                            for (j1 = 0; j1 < ufunc->core_num_dims[i1]; j1++) {
+                                if (ufunc->core_dim_ixs[k++] == core_dim_index) {
+                                    op_core_num_dims[i1]--;
+                                }
+                            }
+                        }
+                        if (op_ndim == op_core_num_dims[i]) {
+                            break;
+                        }
+                    }
+                }
+                if (op_ndim < op_core_num_dims[i]) {
+                    PyErr_Format(PyExc_ValueError,
+                         "%s: %s operand %d does not have enough "
+                         "dimensions (has %d, gufunc core with "
+                         "signature %s requires %d)",
+                         ufunc_get_name_cstr(ufunc),
+                         i < nin ? "Input" : "Output",
+                         i < nin ? i : i - nin, PyArray_NDIM(op[i]),
+                         ufunc->core_signature, op_core_num_dims[i]);
+                    return -1;
+                }
+            }
+        }
+    }
+    return 0;
+}
+
+/*
  * Check whether any of the outputs of a gufunc has core dimensions.
  */
 static int
@@ -2007,7 +2156,7 @@ _check_keepdims_support(PyUFuncObject *ufunc) {
  * Returns 0 on success, and -1 on failure
  */
 static int
-_parse_axes_arg(PyUFuncObject *ufunc, int core_num_dims[], PyObject *axes,
+_parse_axes_arg(PyUFuncObject *ufunc, int op_core_num_dims[], PyObject *axes,
                 PyArrayObject **op, int broadcast_ndim, int **remap_axis) {
     int nin = ufunc->nin;
     int nop = ufunc->nargs;
@@ -2037,7 +2186,7 @@ _parse_axes_arg(PyUFuncObject *ufunc, int core_num_dims[], PyObject *axes,
         PyObject *op_axes_tuple, *axis_item;
         int axis, op_axis;
 
-        op_ncore = core_num_dims[iop];
+        op_ncore = op_core_num_dims[iop];
         if (op[iop] != NULL) {
             op_ndim = PyArray_NDIM(op[iop]);
             op_nbroadcast = op_ndim - op_ncore;
@@ -2191,57 +2340,72 @@ _parse_axis_arg(PyUFuncObject *ufunc, int core_num_dims[], PyObject *axis,
  *
  * Returns 0 on success, and -1 on failure
  *
- * The behavior has been changed in NumPy 1.10.0, and the following
+ * The behavior has been changed in NumPy 1.16.0, and the following
  * requirements must be fulfilled or an error will be raised:
  *  * Arguments, both input and output, must have at least as many
  *    dimensions as the corresponding number of core dimensions. In
- *    previous versions, 1's were prepended to the shape as needed.
+ *    versions before 1.10, 1's were prepended to the shape as needed.
  *  * Core dimensions with same labels must have exactly matching sizes.
- *    In previous versions, core dimensions of size 1 would broadcast
+ *    In versions before 1.10, core dimensions of size 1 would broadcast
  *    against other core dimensions with the same label.
  *  * All core dimensions must have their size specified by a passed in
- *    input or output argument. In previous versions, core dimensions in
+ *    input or output argument. In versions before 1.10, core dimensions in
  *    an output argument that were not specified in an input argument,
  *    and whose size could not be inferred from a passed in output
  *    argument, would have their size set to 1.
+ *  * Core dimensions may be fixed, new in NumPy 1.16
  */
 static int
 _get_coredim_sizes(PyUFuncObject *ufunc, PyArrayObject **op,
-                   npy_intp* core_dim_sizes, int **remap_axis) {
+                   int *op_core_num_dims, npy_uint32 *core_dim_flags,
+                   npy_intp *core_dim_sizes, int **remap_axis) {
     int i;
     int nin = ufunc->nin;
     int nout = ufunc->nout;
     int nop = nin + nout;
 
-    for (i = 0; i < ufunc->core_num_dim_ix; ++i) {
-        core_dim_sizes[i] = -1;
-    }
     for (i = 0; i < nop; ++i) {
         if (op[i] != NULL) {
             int idim;
             int dim_offset = ufunc->core_offsets[i];
-            int num_dims = ufunc->core_num_dims[i];
-            int core_start_dim = PyArray_NDIM(op[i]) - num_dims;
+            int core_start_dim = PyArray_NDIM(op[i]) - op_core_num_dims[i];
+            int dim_delta = 0;
+
+            /* checked before this routine gets called */
+            assert(core_start_dim >= 0);
+
             /*
              * Make sure every core dimension exactly matches all other core
-             * dimensions with the same label.
+             * dimensions with the same label. Note that flexible dimensions
+             * may have been removed at this point, if so, they are marked
+             * with UFUNC_CORE_DIM_MISSING.
              */
-            for (idim = 0; idim < num_dims; ++idim) {
-                int core_dim_index = ufunc->core_dim_ixs[dim_offset+idim];
-                npy_intp op_dim_size = PyArray_DIM(
-                    op[i], REMAP_AXIS(i, core_start_dim+idim));
-
-                if (core_dim_sizes[core_dim_index] == -1) {
+            for (idim = 0; idim < ufunc->core_num_dims[i]; ++idim) {
+                int core_index = dim_offset + idim;
+                int core_dim_index = ufunc->core_dim_ixs[core_index];
+                npy_intp core_dim_size = core_dim_sizes[core_dim_index];
+                npy_intp op_dim_size;
+
+                /* can only happen if flexible; dimension missing altogether */
+                if (core_dim_flags[core_dim_index] & UFUNC_CORE_DIM_MISSING) {
+                    op_dim_size = 1;
+                    dim_delta++; /* for indexing in dimensions */
+                }
+                else {
+                    op_dim_size = PyArray_DIM(op[i],
+                             REMAP_AXIS(i, core_start_dim + idim - dim_delta));
+                }
+                if (core_dim_sizes[core_dim_index] < 0) {
                     core_dim_sizes[core_dim_index] = op_dim_size;
                 }
-                else if (op_dim_size != core_dim_sizes[core_dim_index]) {
+                else if (op_dim_size != core_dim_size) {
                     PyErr_Format(PyExc_ValueError,
                             "%s: %s operand %d has a mismatch in its "
                             "core dimension %d, with gufunc "
                             "signature %s (size %zd is different "
                             "from %zd)",
                             ufunc_get_name_cstr(ufunc), i < nin ? "Input" : "Output",
-                            i < nin ? i : i - nin, idim,
+                            i < nin ? i : i - nin, idim - dim_delta,
                             ufunc->core_signature, op_dim_size,
                             core_dim_sizes[core_dim_index]);
                     return -1;
@@ -2253,39 +2417,29 @@ _get_coredim_sizes(PyUFuncObject *ufunc, PyArrayObject **op,
     /*
      * Make sure no core dimension is unspecified.
      */
-    for (i = 0; i < ufunc->core_num_dim_ix; ++i) {
-        if (core_dim_sizes[i] == -1) {
-            break;
-        }
-    }
-    if (i != ufunc->core_num_dim_ix) {
-        /*
-         * There is at least one core dimension missing, find in which
-         * operand it comes up first (it has to be an output operand).
-         */
-        const int missing_core_dim = i;
-        int out_op;
-        for (out_op = nin; out_op < nop; ++out_op) {
-            int first_idx = ufunc->core_offsets[out_op];
-            int last_idx = first_idx + ufunc->core_num_dims[out_op];
-            for (i = first_idx; i < last_idx; ++i) {
-                if (ufunc->core_dim_ixs[i] == missing_core_dim) {
-                    break;
-                }
-            }
-            if (i < last_idx) {
-                /* Change index offsets for error message */
-                out_op -= nin;
-                i -= first_idx;
-                break;
+    for (i = nin; i < nop; ++i) {
+        int idim;
+        int dim_offset = ufunc->core_offsets[i];
+
+        for (idim = 0; idim < ufunc->core_num_dims[i]; ++idim) {
+            int core_dim_index = ufunc->core_dim_ixs[dim_offset + idim];
+
+            /* check all cases where the size has not yet been set */
+            if (core_dim_sizes[core_dim_index] < 0) {
+                /*
+                 * Oops, this dimension was never specified
+                 * (can only happen if output op not given)
+                 */
+                PyErr_Format(PyExc_ValueError,
+                        "%s: Output operand %d has core dimension %d "
+                        "unspecified, with gufunc signature %s",
+                        ufunc_get_name_cstr(ufunc), i - nin, idim,
+                        ufunc->core_signature);
+                return -1;
             }
         }
-        PyErr_Format(PyExc_ValueError,
-                     "%s: Output operand %d has core dimension %d "
-                     "unspecified, with gufunc signature %s",
-                     ufunc_get_name_cstr(ufunc), out_op, i, ufunc->core_signature);
-        return -1;
     }
+
     return 0;
 }
 
@@ -2324,6 +2478,26 @@ _get_identity(PyUFuncObject *ufunc, npy_bool *reorderable) {
     }
 }
 
+/*
+ * Copy over parts of the ufunc structure that may need to be
+ * changed during execution.  Returns 0 on success; -1 otherwise.
+ */
+static int
+_initialize_variable_parts(PyUFuncObject *ufunc,
+                           int op_core_num_dims[],
+                           npy_intp core_dim_sizes[],
+                           npy_uint32 core_dim_flags[]) {
+    int i;
+
+    for (i = 0; i < ufunc->nargs; i++) {
+        op_core_num_dims[i] = ufunc->core_num_dims[i];
+    }
+    for (i = 0; i < ufunc->core_num_dim_ix; i++) {
+        core_dim_sizes[i] = ufunc->core_dim_sizes[i];
+        core_dim_flags[i] = ufunc->core_dim_flags[i];
+    }
+    return 0;
+}
 
 static int
 PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
@@ -2340,10 +2514,10 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
 
     /* Use remapped axes for generalized ufunc */
     int broadcast_ndim, iter_ndim;
-    int core_num_dims_array[NPY_MAXARGS];
-    int *core_num_dims;
+    int op_core_num_dims[NPY_MAXARGS];
     int op_axes_arrays[NPY_MAXARGS][NPY_MAXDIMS];
     int *op_axes[NPY_MAXARGS];
+    npy_uint32 core_dim_flags[NPY_MAXARGS];
 
     npy_uint32 op_flags[NPY_MAXARGS];
     npy_intp iter_shape[NPY_MAXARGS];
@@ -2398,6 +2572,12 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
         dtypes[i] = NULL;
         arr_prep[i] = NULL;
     }
+    /* Initialize possibly variable parts to the values from the ufunc */
+    retval = _initialize_variable_parts(ufunc, op_core_num_dims,
+                                        core_dim_sizes, core_dim_flags);
+    if (retval < 0) {
+        goto fail;
+    }
 
     NPY_UF_DBG_PRINT("Getting arguments\n");
 
@@ -2429,41 +2609,28 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
         }
     }
     /*
-     * If keepdims is set and true, signal all dimensions will be the same.
+     * If keepdims is set and true, which means all input dimensions are
+     * the same, signal that all output dimensions will be the same too.
      */
     if (keepdims == 1) {
-        int num_dims = ufunc->core_num_dims[0];
-        for (i = 0; i < nop; ++i) {
-            core_num_dims_array[i] = num_dims;
+        int num_dims = op_core_num_dims[0];
+        for (i = nin; i < nop; ++i) {
+            op_core_num_dims[i] = num_dims;
         }
-        core_num_dims = core_num_dims_array;
     }
     else {
         /* keepdims was not set or was false; no adjustment necessary */
-        core_num_dims = ufunc->core_num_dims;
         keepdims = 0;
     }
     /*
      * Check that operands have the minimum dimensions required.
      * (Just checks core; broadcast dimensions are tested by the iterator.)
      */
-    for (i = 0; i < nop; i++) {
-        if (op[i] != NULL && PyArray_NDIM(op[i]) < core_num_dims[i]) {
-            PyErr_Format(PyExc_ValueError,
-                         "%s: %s operand %d does not have enough "
-                         "dimensions (has %d, gufunc core with "
-                         "signature %s requires %d)",
-                         ufunc_name,
-                         i < nin ? "Input" : "Output",
-                         i < nin ? i : i - nin,
-                         PyArray_NDIM(op[i]),
-                         ufunc->core_signature,
-                         core_num_dims[i]);
-            retval = -1;
-            goto fail;
-        }
+    retval = _validate_num_dims(ufunc, op, core_dim_flags,
+                                op_core_num_dims);
+    if (retval < 0) {
+        goto fail;
     }
-
     /*
      * Figure out the number of iteration dimensions, which
      * is the broadcast result of all the input non-core
@@ -2471,30 +2638,12 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
      */
     broadcast_ndim = 0;
     for (i = 0; i < nin; ++i) {
-        int n = PyArray_NDIM(op[i]) - core_num_dims[i];
+        int n = PyArray_NDIM(op[i]) - op_core_num_dims[i];
         if (n > broadcast_ndim) {
             broadcast_ndim = n;
         }
     }
 
-    /*
-     * Figure out the number of iterator creation dimensions,
-     * which is the broadcast dimensions + all the core dimensions of
-     * the outputs, so that the iterator can allocate those output
-     * dimensions following the rules of order='F', for example.
-     */
-    iter_ndim = broadcast_ndim;
-    for (i = nin; i < nop; ++i) {
-        iter_ndim += core_num_dims[i];
-    }
-    if (iter_ndim > NPY_MAXDIMS) {
-        PyErr_Format(PyExc_ValueError,
-                    "too many dimensions for generalized ufunc %s",
-                    ufunc_name);
-        retval = -1;
-        goto fail;
-    }
-
     /* Possibly remap axes. */
     if (axes != NULL || axis != NULL) {
         remap_axis = PyArray_malloc(sizeof(remap_axis[0]) * nop);
@@ -2508,11 +2657,11 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
             remap_axis[i] = remap_axis_memory + i * NPY_MAXDIMS;
         }
         if (axis) {
-            retval = _parse_axis_arg(ufunc, core_num_dims, axis, op,
+            retval = _parse_axis_arg(ufunc, op_core_num_dims, axis, op,
                                      broadcast_ndim, remap_axis);
         }
         else {
-            retval = _parse_axes_arg(ufunc, core_num_dims, axes, op,
+            retval = _parse_axes_arg(ufunc, op_core_num_dims, axes, op,
                                      broadcast_ndim, remap_axis);
         }
         if(retval < 0) {
@@ -2521,10 +2670,28 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
     }
 
     /* Collect the lengths of the labelled core dimensions */
-    retval = _get_coredim_sizes(ufunc, op, core_dim_sizes, remap_axis);
+    retval = _get_coredim_sizes(ufunc, op, op_core_num_dims, core_dim_flags,
+                                core_dim_sizes, remap_axis);
     if(retval < 0) {
         goto fail;
     }
+    /*
+     * Figure out the number of iterator creation dimensions,
+     * which is the broadcast dimensions + all the core dimensions of
+     * the outputs, so that the iterator can allocate those output
+     * dimensions following the rules of order='F', for example.
+     */
+    iter_ndim = broadcast_ndim;
+    for (i = nin; i < nop; ++i) {
+        iter_ndim += op_core_num_dims[i];
+    }
+    if (iter_ndim > NPY_MAXDIMS) {
+        PyErr_Format(PyExc_ValueError,
+                    "too many dimensions for generalized ufunc %s",
+                    ufunc_name);
+        retval = -1;
+        goto fail;
+    }
 
     /* Fill in the initial part of 'iter_shape' */
     for (idim = 0; idim < broadcast_ndim; ++idim) {
@@ -2537,11 +2704,7 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
         int n;
 
         if (op[i]) {
-            /*
-             * Note that n may be negative if broadcasting
-             * extends into the core dimensions.
-             */
-            n = PyArray_NDIM(op[i]) - core_num_dims[i];
+            n = PyArray_NDIM(op[i]) - op_core_num_dims[i];
         }
         else {
             n = broadcast_ndim;
@@ -2565,24 +2728,49 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
         /* Except for when it belongs to this output */
         if (i >= nin) {
             int dim_offset = ufunc->core_offsets[i];
-            int num_dims = core_num_dims[i];
+            int num_removed = 0;
             /*
              * Fill in 'iter_shape' and 'op_axes' for the core dimensions
              * of this output. Here, we have to be careful: if keepdims
-             * was used, then this axis is not a real core dimension,
-             * but is being added back for broadcasting, so its size is 1.
+             * was used, then the axes are not real core dimensions, but
+             * are being added back for broadcasting, so their size is 1.
+             * If the axis was removed, we should skip altogether.
              */
-            for (idim = 0; idim < num_dims; ++idim) {
-                iter_shape[j] = keepdims ? 1 : core_dim_sizes[
-                                        ufunc->core_dim_ixs[dim_offset + idim]];
-                op_axes_arrays[i][j] = REMAP_AXIS(i, n + idim);
-                ++j;
+            if (keepdims) {
+                for (idim = 0; idim < op_core_num_dims[i]; ++idim) {
+                    iter_shape[j] = 1;
+                    op_axes_arrays[i][j] = REMAP_AXIS(i, n + idim);
+                    ++j;
+                }
+            }
+            else {
+                for (idim = 0; idim < ufunc->core_num_dims[i]; ++idim) {
+                    int core_index = dim_offset + idim;
+                    int core_dim_index = ufunc->core_dim_ixs[core_index];
+                    if ((core_dim_flags[core_dim_index] &
+                         UFUNC_CORE_DIM_MISSING)) {
+                        /* skip it */
+                        num_removed++;
+                        continue;
+                    }
+                    iter_shape[j] = core_dim_sizes[ufunc->core_dim_ixs[core_index]];
+                    op_axes_arrays[i][j] = REMAP_AXIS(i, n + idim - num_removed);
+                    ++j;
+                }
             }
         }
 
         op_axes[i] = op_axes_arrays[i];
     }
 
+#if NPY_UF_DBG_TRACING
+    printf("iter shapes:");
+    for (j=0; j < iter_ndim; j++) {
+        printf(" %ld", iter_shape[j]);
+    }
+    printf("\n");
+#endif
+
     /* Get the buffersize and errormask */
     if (_get_bufsize_errmask(extobj, ufunc_name, &buffersize, &errormask) < 0) {
         retval = -1;
@@ -2705,8 +2893,6 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
     /* Copy the strides after the first nop */
     idim = nop;
     for (i = 0; i < nop; ++i) {
-        int num_dims = ufunc->core_num_dims[i];
-        int core_start_dim = PyArray_NDIM(op[i]) - num_dims;
         /*
          * Need to use the arrays in the iterator, not op, because
          * a copy with a different-sized type may have been made.
@@ -2714,20 +2900,31 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
         PyArrayObject *arr = NpyIter_GetOperandArray(iter)[i];
         npy_intp *shape = PyArray_SHAPE(arr);
         npy_intp *strides = PyArray_STRIDES(arr);
-        for (j = 0; j < num_dims; ++j) {
-            if (core_start_dim + j >= 0) {
-                /*
-                 * Force the stride to zero when the shape is 1, so
-                 * that the broadcasting works right.
-                 */
-                int remapped_axis = REMAP_AXIS(i, core_start_dim + j);
+        /*
+         * Could be negative if flexible dims are used, but not for
+         * keepdims, since those dimensions are allocated in arr.
+         */
+        int core_start_dim = PyArray_NDIM(arr) - op_core_num_dims[i];
+        int num_removed = 0;
+        int dim_offset = ufunc->core_offsets[i];
+
+        for (j = 0; j < ufunc->core_num_dims[i]; ++j) {
+            int core_dim_index = ufunc->core_dim_ixs[dim_offset + j];
+            /*
+             * Force zero stride when the shape is 1 (always the case for
+             * for missing dimensions), so that broadcasting works right.
+             */
+            if (core_dim_flags[core_dim_index] & UFUNC_CORE_DIM_MISSING) {
+                num_removed++;
+                inner_strides[idim++] = 0;
+            }
+            else {
+                int remapped_axis = REMAP_AXIS(i, core_start_dim + j - num_removed);
                 if (shape[remapped_axis] != 1) {
                     inner_strides[idim++] = strides[remapped_axis];
                 } else {
                     inner_strides[idim++] = 0;
                 }
-            } else {
-                inner_strides[idim++] = 0;
             }
         }
     }
@@ -4644,7 +4841,6 @@ PyUFunc_FromFuncAndDataAndSignature(PyUFuncGenericFunction *func, void **data,
                                      int unused, const char *signature)
 {
     PyUFuncObject *ufunc;
-
     if (nin + nout > NPY_MAXARGS) {
         PyErr_Format(PyExc_ValueError,
                      "Cannot construct a ufunc with more than %d operands "
@@ -4657,11 +4853,9 @@ PyUFunc_FromFuncAndDataAndSignature(PyUFuncGenericFunction *func, void **data,
     if (ufunc == NULL) {
         return NULL;
     }
+    memset(ufunc, 0, sizeof(PyUFuncObject));
     PyObject_Init((PyObject *)ufunc, &PyUFunc_Type);
 
-    ufunc->reserved1 = 0;
-    ufunc->reserved2 = NULL;
-
     ufunc->nin = nin;
     ufunc->nout = nout;
     ufunc->nargs = nin+nout;
@@ -4671,9 +4865,6 @@ PyUFunc_FromFuncAndDataAndSignature(PyUFuncGenericFunction *func, void **data,
     ufunc->data = data;
     ufunc->types = types;
     ufunc->ntypes = ntypes;
-    ufunc->ptr = NULL;
-    ufunc->obj = NULL;
-    ufunc->userloops=NULL;
 
     /* Type resolution and inner loop selection functions */
     ufunc->type_resolver = &PyUFunc_DefaultTypeResolver;
@@ -4694,15 +4885,6 @@ PyUFunc_FromFuncAndDataAndSignature(PyUFuncGenericFunction *func, void **data,
     }
     memset(ufunc->op_flags, 0, sizeof(npy_uint32)*ufunc->nargs);
 
-    ufunc->iter_flags = 0;
-
-    /* generalized ufunc */
-    ufunc->core_enabled = 0;
-    ufunc->core_num_dim_ix = 0;
-    ufunc->core_num_dims = NULL;
-    ufunc->core_dim_ixs = NULL;
-    ufunc->core_offsets = NULL;
-    ufunc->core_signature = NULL;
     if (signature != NULL) {
         if (_parse_signature(ufunc, signature) != 0) {
             Py_DECREF(ufunc);
diff --git a/numpy/core/src/umath/ufunc_type_resolution.c b/numpy/core/src/umath/ufunc_type_resolution.c
index 807b03512..6b042d837 100644
--- a/numpy/core/src/umath/ufunc_type_resolution.c
+++ b/numpy/core/src/umath/ufunc_type_resolution.c
@@ -40,6 +40,25 @@ npy_casting_to_string(NPY_CASTING casting)
             return "<unknown>";
     }
 }
+
+static int
+raise_binary_type_reso_error(PyUFuncObject *ufunc, PyArrayObject **operands) {
+    PyObject *errmsg;
+    const char *ufunc_name = ufunc_get_name_cstr(ufunc);
+    errmsg = PyUString_FromFormat("ufunc %s cannot use operands "
+                        "with types ", ufunc_name);
+    PyUString_ConcatAndDel(&errmsg,
+            PyObject_Repr((PyObject *)PyArray_DESCR(operands[0])));
+    PyUString_ConcatAndDel(&errmsg,
+            PyUString_FromString(" and "));
+    PyUString_ConcatAndDel(&errmsg,
+            PyObject_Repr((PyObject *)PyArray_DESCR(operands[1])));
+    PyErr_SetObject(PyExc_TypeError, errmsg);
+    Py_DECREF(errmsg);
+    return -1;
+}
+
+
 /*UFUNC_API
  *
  * Validates that the input operands can be cast to
@@ -605,7 +624,6 @@ PyUFunc_AdditionTypeResolver(PyUFuncObject *ufunc,
 {
     int type_num1, type_num2;
     int i;
-    const char *ufunc_name = ufunc_get_name_cstr(ufunc);
 
     type_num1 = PyArray_DESCR(operands[0])->type_num;
     type_num2 = PyArray_DESCR(operands[1])->type_num;
@@ -661,7 +679,7 @@ PyUFunc_AdditionTypeResolver(PyUFuncObject *ufunc,
             type_num2 = NPY_TIMEDELTA;
         }
         else {
-            goto type_reso_error;
+            return raise_binary_type_reso_error(ufunc, operands);
         }
     }
     else if (type_num1 == NPY_DATETIME) {
@@ -703,7 +721,7 @@ PyUFunc_AdditionTypeResolver(PyUFuncObject *ufunc,
             type_num2 = NPY_TIMEDELTA;
         }
         else {
-            goto type_reso_error;
+            return raise_binary_type_reso_error(ufunc, operands);
         }
     }
     else if (PyTypeNum_ISINTEGER(type_num1) || PyTypeNum_ISBOOL(type_num1)) {
@@ -739,11 +757,11 @@ PyUFunc_AdditionTypeResolver(PyUFuncObject *ufunc,
             type_num1 = NPY_TIMEDELTA;
         }
         else {
-            goto type_reso_error;
+            return raise_binary_type_reso_error(ufunc, operands);
         }
     }
     else {
-        goto type_reso_error;
+        return raise_binary_type_reso_error(ufunc, operands);
     }
 
     /* Check against the casting rules */
@@ -756,21 +774,6 @@ PyUFunc_AdditionTypeResolver(PyUFuncObject *ufunc,
     }
 
     return 0;
-
-type_reso_error: {
-        PyObject *errmsg;
-        errmsg = PyUString_FromFormat("ufunc %s cannot use operands "
-                            "with types ", ufunc_name);
-        PyUString_ConcatAndDel(&errmsg,
-                PyObject_Repr((PyObject *)PyArray_DESCR(operands[0])));
-        PyUString_ConcatAndDel(&errmsg,
-                PyUString_FromString(" and "));
-        PyUString_ConcatAndDel(&errmsg,
-                PyObject_Repr((PyObject *)PyArray_DESCR(operands[1])));
-        PyErr_SetObject(PyExc_TypeError, errmsg);
-        Py_DECREF(errmsg);
-        return -1;
-    }
 }
 
 /*
@@ -793,7 +796,6 @@ PyUFunc_SubtractionTypeResolver(PyUFuncObject *ufunc,
 {
     int type_num1, type_num2;
     int i;
-    const char *ufunc_name = ufunc_get_name_cstr(ufunc);
 
     type_num1 = PyArray_DESCR(operands[0])->type_num;
     type_num2 = PyArray_DESCR(operands[1])->type_num;
@@ -846,7 +848,7 @@ PyUFunc_SubtractionTypeResolver(PyUFuncObject *ufunc,
             type_num2 = NPY_TIMEDELTA;
         }
         else {
-            goto type_reso_error;
+            return raise_binary_type_reso_error(ufunc, operands);
         }
     }
     else if (type_num1 == NPY_DATETIME) {
@@ -904,7 +906,7 @@ PyUFunc_SubtractionTypeResolver(PyUFuncObject *ufunc,
             Py_INCREF(out_dtypes[1]);
         }
         else {
-            goto type_reso_error;
+            return raise_binary_type_reso_error(ufunc, operands);
         }
     }
     else if (PyTypeNum_ISINTEGER(type_num1) || PyTypeNum_ISBOOL(type_num1)) {
@@ -922,11 +924,11 @@ PyUFunc_SubtractionTypeResolver(PyUFuncObject *ufunc,
             type_num1 = NPY_TIMEDELTA;
         }
         else {
-            goto type_reso_error;
+            return raise_binary_type_reso_error(ufunc, operands);
         }
     }
     else {
-        goto type_reso_error;
+        return raise_binary_type_reso_error(ufunc, operands);
     }
 
     /* Check against the casting rules */
@@ -939,21 +941,6 @@ PyUFunc_SubtractionTypeResolver(PyUFuncObject *ufunc,
     }
 
     return 0;
-
-type_reso_error: {
-        PyObject *errmsg;
-        errmsg = PyUString_FromFormat("ufunc %s cannot use operands "
-                            "with types ", ufunc_name);
-        PyUString_ConcatAndDel(&errmsg,
-                PyObject_Repr((PyObject *)PyArray_DESCR(operands[0])));
-        PyUString_ConcatAndDel(&errmsg,
-                PyUString_FromString(" and "));
-        PyUString_ConcatAndDel(&errmsg,
-                PyObject_Repr((PyObject *)PyArray_DESCR(operands[1])));
-        PyErr_SetObject(PyExc_TypeError, errmsg);
-        Py_DECREF(errmsg);
-        return -1;
-    }
 }
 
 /*
@@ -973,7 +960,6 @@ PyUFunc_MultiplicationTypeResolver(PyUFuncObject *ufunc,
 {
     int type_num1, type_num2;
     int i;
-    const char *ufunc_name = ufunc_get_name_cstr(ufunc);
 
     type_num1 = PyArray_DESCR(operands[0])->type_num;
     type_num2 = PyArray_DESCR(operands[1])->type_num;
@@ -1020,7 +1006,7 @@ PyUFunc_MultiplicationTypeResolver(PyUFuncObject *ufunc,
             type_num2 = NPY_DOUBLE;
         }
         else {
-            goto type_reso_error;
+            return raise_binary_type_reso_error(ufunc, operands);
         }
     }
     else if (PyTypeNum_ISINTEGER(type_num1) || PyTypeNum_ISBOOL(type_num1)) {
@@ -1042,7 +1028,7 @@ PyUFunc_MultiplicationTypeResolver(PyUFuncObject *ufunc,
             type_num1 = NPY_LONGLONG;
         }
         else {
-            goto type_reso_error;
+            return raise_binary_type_reso_error(ufunc, operands);
         }
     }
     else if (PyTypeNum_ISFLOAT(type_num1)) {
@@ -1064,11 +1050,11 @@ PyUFunc_MultiplicationTypeResolver(PyUFuncObject *ufunc,
             type_num1 = NPY_DOUBLE;
         }
         else {
-            goto type_reso_error;
+            return raise_binary_type_reso_error(ufunc, operands);
         }
     }
     else {
-        goto type_reso_error;
+        return raise_binary_type_reso_error(ufunc, operands);
     }
 
     /* Check against the casting rules */
@@ -1081,21 +1067,6 @@ PyUFunc_MultiplicationTypeResolver(PyUFuncObject *ufunc,
     }
 
     return 0;
-
-type_reso_error: {
-        PyObject *errmsg;
-        errmsg = PyUString_FromFormat("ufunc %s cannot use operands "
-                            "with types ", ufunc_name);
-        PyUString_ConcatAndDel(&errmsg,
-                PyObject_Repr((PyObject *)PyArray_DESCR(operands[0])));
-        PyUString_ConcatAndDel(&errmsg,
-                PyUString_FromString(" and "));
-        PyUString_ConcatAndDel(&errmsg,
-                PyObject_Repr((PyObject *)PyArray_DESCR(operands[1])));
-        PyErr_SetObject(PyExc_TypeError, errmsg);
-        Py_DECREF(errmsg);
-        return -1;
-    }
 }
 
 
@@ -1115,7 +1086,6 @@ PyUFunc_DivisionTypeResolver(PyUFuncObject *ufunc,
 {
     int type_num1, type_num2;
     int i;
-    const char *ufunc_name = ufunc_get_name_cstr(ufunc);
 
     type_num1 = PyArray_DESCR(operands[0])->type_num;
     type_num2 = PyArray_DESCR(operands[1])->type_num;
@@ -1183,11 +1153,11 @@ PyUFunc_DivisionTypeResolver(PyUFuncObject *ufunc,
             type_num2 = NPY_DOUBLE;
         }
         else {
-            goto type_reso_error;
+            return raise_binary_type_reso_error(ufunc, operands);
         }
     }
     else {
-        goto type_reso_error;
+        return raise_binary_type_reso_error(ufunc, operands);
     }
 
     /* Check against the casting rules */
@@ -1200,21 +1170,57 @@ PyUFunc_DivisionTypeResolver(PyUFuncObject *ufunc,
     }
 
     return 0;
+}
 
-type_reso_error: {
-        PyObject *errmsg;
-        errmsg = PyUString_FromFormat("ufunc %s cannot use operands "
-                            "with types ", ufunc_name);
-        PyUString_ConcatAndDel(&errmsg,
-                PyObject_Repr((PyObject *)PyArray_DESCR(operands[0])));
-        PyUString_ConcatAndDel(&errmsg,
-                PyUString_FromString(" and "));
-        PyUString_ConcatAndDel(&errmsg,
-                PyObject_Repr((PyObject *)PyArray_DESCR(operands[1])));
-        PyErr_SetObject(PyExc_TypeError, errmsg);
-        Py_DECREF(errmsg);
+
+NPY_NO_EXPORT int
+PyUFunc_RemainderTypeResolver(PyUFuncObject *ufunc,
+                                NPY_CASTING casting,
+                                PyArrayObject **operands,
+                                PyObject *type_tup,
+                                PyArray_Descr **out_dtypes)
+{
+    int type_num1, type_num2;
+    int i;
+
+    type_num1 = PyArray_DESCR(operands[0])->type_num;
+    type_num2 = PyArray_DESCR(operands[1])->type_num;
+
+    /* Use the default when datetime and timedelta are not involved */
+    if (!PyTypeNum_ISDATETIME(type_num1) && !PyTypeNum_ISDATETIME(type_num2)) {
+        return PyUFunc_DefaultTypeResolver(ufunc, casting, operands,
+                    type_tup, out_dtypes);
+    }
+    if (type_num1 == NPY_TIMEDELTA) {
+        if (type_num2 == NPY_TIMEDELTA) {
+            out_dtypes[0] = PyArray_PromoteTypes(PyArray_DESCR(operands[0]),
+                                                PyArray_DESCR(operands[1]));
+            if (out_dtypes[0] == NULL) {
+                return -1;
+            }
+            out_dtypes[1] = out_dtypes[0];
+            Py_INCREF(out_dtypes[1]);
+            out_dtypes[2] = out_dtypes[0];
+            Py_INCREF(out_dtypes[2]);
+        }
+        else {
+            return raise_binary_type_reso_error(ufunc, operands);
+        }
+    }
+    else {
+        return raise_binary_type_reso_error(ufunc, operands);
+    }
+
+    /* Check against the casting rules */
+    if (PyUFunc_ValidateCasting(ufunc, casting, operands, out_dtypes) < 0) {
+        for (i = 0; i < 3; ++i) {
+            Py_DECREF(out_dtypes[i]);
+            out_dtypes[i] = NULL;
+        }
         return -1;
     }
+
+    return 0;
 }
 
 
@@ -1275,7 +1281,7 @@ PyUFunc_MixedDivisionTypeResolver(PyUFuncObject *ufunc,
                                   PyObject *type_tup,
                                   PyArray_Descr **out_dtypes)
 {
- /* Depreciation checks needed only on python 2 */
+ /* Deprecation checks needed only on python 2 */
 #if !defined(NPY_PY3K)
     int type_num1, type_num2;
 
diff --git a/numpy/core/src/umath/ufunc_type_resolution.h b/numpy/core/src/umath/ufunc_type_resolution.h
index fa9f1dbfa..bb4823d24 100644
--- a/numpy/core/src/umath/ufunc_type_resolution.h
+++ b/numpy/core/src/umath/ufunc_type_resolution.h
@@ -92,6 +92,13 @@ PyUFunc_DivisionTypeResolver(PyUFuncObject *ufunc,
                              PyObject *type_tup,
                              PyArray_Descr **out_dtypes);
 
+NPY_NO_EXPORT int
+PyUFunc_RemainderTypeResolver(PyUFuncObject *ufunc,
+                              NPY_CASTING casting,
+                              PyArrayObject **operands,
+                              PyObject *type_tup,
+                              PyArray_Descr **out_dtypes);
+
 /*
  * Does a linear search for the best inner loop of the ufunc.
  *
diff --git a/numpy/core/tests/test_datetime.py b/numpy/core/tests/test_datetime.py
index c4918f955..b2ce0402a 100644
--- a/numpy/core/tests/test_datetime.py
+++ b/numpy/core/tests/test_datetime.py
@@ -1,6 +1,5 @@
 from __future__ import division, absolute_import, print_function
 
-import pickle
 
 import numpy
 import numpy as np
@@ -8,7 +7,9 @@ import datetime
 import pytest
 from numpy.testing import (
     assert_, assert_equal, assert_raises, assert_warns, suppress_warnings,
+    assert_raises_regex,
     )
+from numpy.core.numeric import pickle
 
 # Use pytz to test out various time zones if available
 try:
@@ -257,6 +258,21 @@ class TestDateTime(object):
         arr = np.array([dt, dt]).astype('datetime64')
         assert_equal(arr.dtype, np.dtype('M8[us]'))
 
+    @pytest.mark.parametrize("unit", [
+    # test all date / time units and use
+    # "generic" to select generic unit
+    ("Y"), ("M"), ("W"), ("D"), ("h"), ("m"),
+    ("s"), ("ms"), ("us"), ("ns"), ("ps"),
+    ("fs"), ("as"), ("generic") ])
+    def test_timedelta_np_int_construction(self, unit):
+        # regression test for gh-7617
+        if unit != "generic":
+            assert_equal(np.timedelta64(np.int64(123), unit),
+                         np.timedelta64(123, unit))
+        else:
+            assert_equal(np.timedelta64(np.int64(123)),
+                         np.timedelta64(123))
+
     def test_timedelta_scalar_construction(self):
         # Construct with different units
         assert_equal(np.timedelta64(7, 'D'),
@@ -623,14 +639,17 @@ class TestDateTime(object):
 
     def test_pickle(self):
         # Check that pickle roundtripping works
-        dt = np.dtype('M8[7D]')
-        assert_equal(pickle.loads(pickle.dumps(dt)), dt)
-        dt = np.dtype('M8[W]')
-        assert_equal(pickle.loads(pickle.dumps(dt)), dt)
-        scalar = np.datetime64('2016-01-01T00:00:00.000000000')
-        assert_equal(pickle.loads(pickle.dumps(scalar)), scalar)
-        delta = scalar - np.datetime64('2015-01-01T00:00:00.000000000')
-        assert_equal(pickle.loads(pickle.dumps(delta)), delta)
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            dt = np.dtype('M8[7D]')
+            assert_equal(pickle.loads(pickle.dumps(dt, protocol=proto)), dt)
+            dt = np.dtype('M8[W]')
+            assert_equal(pickle.loads(pickle.dumps(dt, protocol=proto)), dt)
+            scalar = np.datetime64('2016-01-01T00:00:00.000000000')
+            assert_equal(pickle.loads(pickle.dumps(scalar, protocol=proto)),
+                         scalar)
+            delta = scalar - np.datetime64('2015-01-01T00:00:00.000000000')
+            assert_equal(pickle.loads(pickle.dumps(delta, protocol=proto)),
+                         delta)
 
         # Check that loading pickles from 1.6 works
         pkl = b"cnumpy\ndtype\np0\n(S'M8'\np1\nI0\nI1\ntp2\nRp3\n" + \
@@ -1608,6 +1627,76 @@ class TestDateTime(object):
         assert_raises(TypeError, np.arange, np.timedelta64(0, 'Y'),
                                 np.timedelta64(5, 'D'))
 
+    @pytest.mark.parametrize("val1, val2, expected", [
+        # case from gh-12092
+        (np.timedelta64(7, 's'),
+         np.timedelta64(3, 's'),
+         np.timedelta64(1, 's')),
+        # negative value cases
+        (np.timedelta64(3, 's'),
+         np.timedelta64(-2, 's'),
+         np.timedelta64(-1, 's')),
+        (np.timedelta64(-3, 's'),
+         np.timedelta64(2, 's'),
+         np.timedelta64(1, 's')),
+        # larger value cases
+        (np.timedelta64(17, 's'),
+         np.timedelta64(22, 's'),
+         np.timedelta64(17, 's')),
+        (np.timedelta64(22, 's'),
+         np.timedelta64(17, 's'),
+         np.timedelta64(5, 's')),
+        # different units
+        (np.timedelta64(1, 'm'),
+         np.timedelta64(57, 's'),
+         np.timedelta64(3, 's')),
+        (np.timedelta64(1, 'us'),
+         np.timedelta64(727, 'ns'),
+         np.timedelta64(273, 'ns')),
+        # NaT is propagated
+        (np.timedelta64('NaT'),
+         np.timedelta64(50, 'ns'),
+         np.timedelta64('NaT')),
+        # Y % M works
+        (np.timedelta64(2, 'Y'),
+         np.timedelta64(22, 'M'),
+         np.timedelta64(2, 'M')),
+        ])
+    def test_timedelta_modulus(self, val1, val2, expected):
+        assert_equal(val1 % val2, expected)
+
+    @pytest.mark.parametrize("val1, val2", [
+        # years and months sometimes can't be unambiguously
+        # divided for modulus operation
+        (np.timedelta64(7, 'Y'),
+         np.timedelta64(3, 's')),
+        (np.timedelta64(7, 'M'),
+         np.timedelta64(1, 'D')),
+        ])
+    def test_timedelta_modulus_error(self, val1, val2):
+        with assert_raises_regex(TypeError, "common metadata divisor"):
+            val1 % val2
+
+    def test_timedelta_modulus_div_by_zero(self):
+        with assert_warns(RuntimeWarning):
+            actual = np.timedelta64(10, 's') % np.timedelta64(0, 's')
+            assert_equal(actual, np.timedelta64(0, 's'))
+
+    @pytest.mark.parametrize("val1, val2", [
+        # cases where one operand is not
+        # timedelta64
+        (np.timedelta64(7, 'Y'),
+         15,),
+        (7.5,
+         np.timedelta64(1, 'D')),
+        ])
+    def test_timedelta_modulus_type_resolution(self, val1, val2):
+        # NOTE: some of the operations may be supported
+        # in the future
+        with assert_raises_regex(TypeError,
+                                 "remainder cannot use operands with types"):
+            val1 % val2
+
     def test_timedelta_arange_no_dtype(self):
         d = np.array(5, dtype="m8[D]")
         assert_equal(np.arange(d, d + 1), d)
diff --git a/numpy/core/tests/test_dtype.py b/numpy/core/tests/test_dtype.py
index fc4dc952a..ecb51f72d 100644
--- a/numpy/core/tests/test_dtype.py
+++ b/numpy/core/tests/test_dtype.py
@@ -1,6 +1,5 @@
 from __future__ import division, absolute_import, print_function
 
-import pickle
 import sys
 import operator
 import pytest
@@ -9,6 +8,7 @@ import ctypes
 import numpy as np
 from numpy.core._rational_tests import rational
 from numpy.testing import assert_, assert_equal, assert_raises
+from numpy.core.numeric import pickle
 
 def assert_dtype_equal(a, b):
     assert_equal(a, b)
@@ -620,6 +620,25 @@ class TestString(object):
         # Pull request #4722
         np.array(["", ""]).astype(object)
 
+    def test_void_subclass_unsized(self):
+        dt = np.dtype(np.record)
+        assert_equal(repr(dt), "dtype('V')")
+        assert_equal(str(dt), '|V0')
+        assert_equal(dt.name, 'record')
+
+    def test_void_subclass_sized(self):
+        dt = np.dtype((np.record, 2))
+        assert_equal(repr(dt), "dtype('V2')")
+        assert_equal(str(dt), '|V2')
+        assert_equal(dt.name, 'record16')
+
+    def test_void_subclass_fields(self):
+        dt = np.dtype((np.record, [('a', '<u2')]))
+        assert_equal(repr(dt), "dtype((numpy.record, [('a', '<u2')]))")
+        assert_equal(str(dt), "(numpy.record, [('a', '<u2')])")
+        assert_equal(dt.name, 'record16')
+
+
 class TestDtypeAttributeDeletion(object):
 
     def test_dtype_non_writable_attributes_deletion(self):
@@ -775,6 +794,36 @@ class TestFromCTypes(object):
         ], align=True)
         self.check(PaddedStruct, expected)
 
+    def test_bit_fields(self):
+        class BitfieldStruct(ctypes.Structure):
+            _fields_ = [
+                ('a', ctypes.c_uint8, 7),
+                ('b', ctypes.c_uint8, 1)
+            ]
+        assert_raises(TypeError, np.dtype, BitfieldStruct)
+        assert_raises(TypeError, np.dtype, BitfieldStruct())
+
+    def test_pointer(self):
+        p_uint8 = ctypes.POINTER(ctypes.c_uint8)
+        assert_raises(TypeError, np.dtype, p_uint8)
+
+    @pytest.mark.xfail(
+        reason="Unions are not implemented",
+        raises=NotImplementedError)
+    def test_union(self):
+        class Union(ctypes.Union):
+            _fields_ = [
+                ('a', ctypes.c_uint8),
+                ('b', ctypes.c_uint16),
+            ]
+        expected = np.dtype(dict(
+            names=['a', 'b'],
+            formats=[np.uint8, np.uint16],
+            offsets=[0, 0],
+            itemsize=2
+        ))
+        self.check(Union, expected)
+
     @pytest.mark.xfail(reason="_pack_ is ignored - see gh-11651")
     def test_packed_structure(self):
         class PackedStructure(ctypes.Structure):
diff --git a/numpy/core/tests/test_getlimits.py b/numpy/core/tests/test_getlimits.py
index ca8093c62..2f6648183 100644
--- a/numpy/core/tests/test_getlimits.py
+++ b/numpy/core/tests/test_getlimits.py
@@ -7,10 +7,7 @@ import numpy as np
 from numpy.core import finfo, iinfo
 from numpy import half, single, double, longdouble
 from numpy.testing import assert_equal, assert_, assert_raises
-from numpy.core.getlimits import (
-    _discovered_machar, _float16_ma, _float32_ma, _float64_ma, _float128_ma,
-    _float80_ma
-    )
+from numpy.core.getlimits import _discovered_machar, _float_ma
 
 ##################################################
 
@@ -101,9 +98,9 @@ def assert_ma_equal(discovered, ma_like):
 
 def test_known_types():
     # Test we are correctly compiling parameters for known types
-    for ftype, ma_like in ((np.float16, _float16_ma),
-                           (np.float32, _float32_ma),
-                           (np.float64, _float64_ma)):
+    for ftype, ma_like in ((np.float16, _float_ma[16]),
+                           (np.float32, _float_ma[32]),
+                           (np.float64, _float_ma[64])):
         assert_ma_equal(_discovered_machar(ftype), ma_like)
     # Suppress warning for broken discovery of double double on PPC
     with np.errstate(all='ignore'):
@@ -111,10 +108,10 @@ def test_known_types():
     bytes = np.dtype(np.longdouble).itemsize
     if (ld_ma.it, ld_ma.maxexp) == (63, 16384) and bytes in (12, 16):
         # 80-bit extended precision
-        assert_ma_equal(ld_ma, _float80_ma)
+        assert_ma_equal(ld_ma, _float_ma[80])
     elif (ld_ma.it, ld_ma.maxexp) == (112, 16384) and bytes == 16:
         # IEE 754 128-bit
-        assert_ma_equal(ld_ma, _float128_ma)
+        assert_ma_equal(ld_ma, _float_ma[128])
 
 
 def test_plausible_finfo():
diff --git a/numpy/core/tests/test_half.py b/numpy/core/tests/test_half.py
index d715569f8..2f2d069e5 100644
--- a/numpy/core/tests/test_half.py
+++ b/numpy/core/tests/test_half.py
@@ -301,21 +301,19 @@ class TestHalf(object):
         assert_equal(np.copysign(b, a), [2, 5, 1, 4, 3])
 
         assert_equal(np.maximum(a, b), [0, 5, 2, 4, 3])
-        with suppress_warnings() as sup:
-            sup.record(RuntimeWarning)
-            x = np.maximum(b, c)
-            assert_(np.isnan(x[3]))
-        assert_equal(len(sup.log), 1)
+
+        x = np.maximum(b, c)
+        assert_(np.isnan(x[3]))
         x[3] = 0
         assert_equal(x, [0, 5, 1, 0, 6])
+
         assert_equal(np.minimum(a, b), [-2, 1, 1, 4, 2])
-        with suppress_warnings() as sup:
-            sup.record(RuntimeWarning)
-            x = np.minimum(b, c)
-            assert_(np.isnan(x[3]))
-        assert_equal(len(sup.log), 1)
+
+        x = np.minimum(b, c)
+        assert_(np.isnan(x[3]))
         x[3] = 0
         assert_equal(x, [-2, -1, -np.inf, 0, 3])
+
         assert_equal(np.fmax(a, b), [0, 5, 2, 4, 3])
         assert_equal(np.fmax(b, c), [0, 5, 1, 4, 6])
         assert_equal(np.fmin(a, b), [-2, 1, 1, 4, 2])
diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py
index f22ecdb79..4b2a38990 100644
--- a/numpy/core/tests/test_multiarray.py
+++ b/numpy/core/tests/test_multiarray.py
@@ -20,6 +20,9 @@ import gc
 import weakref
 import pytest
 from contextlib import contextmanager
+
+from numpy.core.numeric import pickle
+
 if sys.version_info[0] >= 3:
     import builtins
 else:
@@ -1371,13 +1374,28 @@ class TestZeroSizeFlexible(object):
             assert_equal(zs.view((dt, 1)).shape, (0,))
 
     def test_pickle(self):
-        import pickle
-        for dt in [bytes, np.void, unicode]:
-            zs = self._zeros(10, dt)
-            p = pickle.dumps(zs)
-            zs2 = pickle.loads(p)
-
-            assert_equal(zs.dtype, zs2.dtype)
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            for dt in [bytes, np.void, unicode]:
+                zs = self._zeros(10, dt)
+                p = pickle.dumps(zs, protocol=proto)
+                zs2 = pickle.loads(p)
+
+                assert_equal(zs.dtype, zs2.dtype)
+
+    @pytest.mark.skipif(pickle.HIGHEST_PROTOCOL < 5,
+                        reason="requires pickle protocol 5")
+    def test_pickle_with_buffercallback(self):
+        array = np.arange(10)
+        buffers = []
+        bytes_string = pickle.dumps(array, buffer_callback=buffers.append,
+                                    protocol=5)
+        array_from_buffer = pickle.loads(bytes_string, buffers=buffers)
+        # when using pickle protocol 5 with buffer callbacks,
+        # array_from_buffer is reconstructed from a buffer holding a view
+        # to the initial array's data, so modifying an element in array
+        # should modify it in array_from_buffer too.
+        array[0] = -1
+        assert array_from_buffer[0] == -1, array_from_buffer[0]
 
 
 class TestMethods(object):
@@ -3548,21 +3566,106 @@ class TestSubscripting(object):
 
 
 class TestPickling(object):
+    def test_highest_available_pickle_protocol(self):
+        try:
+            import pickle5
+        except ImportError:
+            pickle5 = None
+
+        if sys.version_info[:2] >= (3, 8) or pickle5 is not None:
+            assert pickle.HIGHEST_PROTOCOL >= 5
+        else:
+            assert pickle.HIGHEST_PROTOCOL < 5
+
+    @pytest.mark.skipif(pickle.HIGHEST_PROTOCOL >= 5,
+                        reason=('this tests the error messages when trying to'
+                                'protocol 5 although it is not available'))
+    def test_correct_protocol5_error_message(self):
+        array = np.arange(10)
+        f = io.BytesIO()
+
+        if sys.version_info[:2] in ((3, 6), (3, 7)):
+            # For the specific case of python3.6 and 3.7, raise a clear import
+            # error about the pickle5 backport when trying to use protocol=5
+            # without the pickle5 package
+            with pytest.raises(ImportError):
+                array.__reduce_ex__(5)
+
+        elif sys.version_info[:2] < (3, 6):
+            # when calling __reduce_ex__ explicitly with protocol=5 on python
+            # raise a ValueError saying that protocol 5 is not available for
+            # this python version
+            with pytest.raises(ValueError):
+                array.__reduce_ex__(5)
+
+    def test_record_array_with_object_dtype(self):
+        my_object = object()
+
+        arr_with_object = np.array(
+                [(my_object, 1, 2.0)],
+                dtype=[('a', object), ('b', int), ('c', float)])
+        arr_without_object = np.array(
+                [('xxx', 1, 2.0)],
+                dtype=[('a', str), ('b', int), ('c', float)])
+
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            depickled_arr_with_object = pickle.loads(
+                    pickle.dumps(arr_with_object, protocol=proto))
+            depickled_arr_without_object = pickle.loads(
+                    pickle.dumps(arr_without_object, protocol=proto))
+
+            assert_equal(arr_with_object.dtype,
+                         depickled_arr_with_object.dtype)
+            assert_equal(arr_without_object.dtype,
+                         depickled_arr_without_object.dtype)
+
+    @pytest.mark.skipif(pickle.HIGHEST_PROTOCOL < 5,
+                        reason="requires pickle protocol 5")
+    def test_f_contiguous_array(self):
+        f_contiguous_array = np.array([[1, 2, 3], [4, 5, 6]], order='F')
+        buffers = []
+
+        # When using pickle protocol 5, Fortran-contiguous arrays can be
+        # serialized using out-of-band buffers
+        bytes_string = pickle.dumps(f_contiguous_array, protocol=5,
+                                    buffer_callback=buffers.append)
+
+        assert len(buffers) > 0
+
+        depickled_f_contiguous_array = pickle.loads(bytes_string,
+                                                    buffers=buffers)
+
+        assert_equal(f_contiguous_array, depickled_f_contiguous_array)
+
+    def test_non_contiguous_array(self):
+        non_contiguous_array = np.arange(12).reshape(3, 4)[:, :2]
+        assert not non_contiguous_array.flags.c_contiguous
+        assert not non_contiguous_array.flags.f_contiguous
+
+        # make sure non-contiguous arrays can be pickled-depickled
+        # using any protocol
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            depickled_non_contiguous_array = pickle.loads(
+                    pickle.dumps(non_contiguous_array, protocol=proto))
+
+            assert_equal(non_contiguous_array, depickled_non_contiguous_array)
+
     def test_roundtrip(self):
-        import pickle
-        carray = np.array([[2, 9], [7, 0], [3, 8]])
-        DATA = [
-            carray,
-            np.transpose(carray),
-            np.array([('xxx', 1, 2.0)], dtype=[('a', (str, 3)), ('b', int),
-                                               ('c', float)])
-        ]
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            carray = np.array([[2, 9], [7, 0], [3, 8]])
+            DATA = [
+                carray,
+                np.transpose(carray),
+                np.array([('xxx', 1, 2.0)], dtype=[('a', (str, 3)), ('b', int),
+                                                   ('c', float)])
+            ]
 
-        for a in DATA:
-            assert_equal(a, pickle.loads(a.dumps()), err_msg="%r" % a)
+            for a in DATA:
+                assert_equal(
+                        a, pickle.loads(pickle.dumps(a, protocol=proto)),
+                        err_msg="%r" % a)
 
     def _loads(self, obj):
-        import pickle
         if sys.version_info[0] >= 3:
             return pickle.loads(obj, encoding='latin1')
         else:
@@ -4726,6 +4829,12 @@ class TestResize(object):
         x_view.resize((0, 10))
         x_view.resize((0, 100))
 
+    def test_check_weakref(self):
+        x = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
+        xref = weakref.ref(x)
+        assert_raises(ValueError, x.resize, (5, 1))
+        del xref  # avoid pyflakes unused variable warning.
+
 
 class TestRecord(object):
     def test_field_rename(self):
@@ -6666,6 +6775,18 @@ class TestNewBufferProtocol(object):
             ValueError, "format string",
             np.array, m)
 
+    def test_error_message(self):
+        # wchar has no corresponding numpy type - if this changes in future, we
+        # need a better way to construct an invalid memoryview format.
+        t = ctypes.c_wchar * 4
+        with assert_raises(ValueError) as cm:
+            np.array(t())
+
+        exc = cm.exception
+        if sys.version_info.major > 2:
+            with assert_raises_regex(ValueError, "Unknown .* specifier 'u'"):
+                raise exc.__cause__
+
     def test_ctypes_integer_via_memoryview(self):
         # gh-11150, due to bpo-10746
         for c_integer in {ctypes.c_int, ctypes.c_long, ctypes.c_longlong}:
@@ -7646,3 +7767,19 @@ def test_uintalignment_and_alignment():
     # check that copy code doesn't complain in debug mode
     dst = np.zeros((2,2), dtype='c8')
     dst[:,1] = src[:,1]  # assert in lowlevel_strided_loops fails?
+
+def test_getfield():
+    a = np.arange(32, dtype='uint16')
+    if sys.byteorder == 'little':
+        i = 0
+        j = 1
+    else:
+        i = 1
+        j = 0
+    b = a.getfield('int8', i)
+    assert_equal(b, a)
+    b = a.getfield('int8', j)
+    assert_equal(b, 0)
+    pytest.raises(ValueError, a.getfield, 'uint8', -1)
+    pytest.raises(ValueError, a.getfield, 'uint8', 16)
+    pytest.raises(ValueError, a.getfield, 'uint64', 0)
diff --git a/numpy/core/tests/test_numerictypes.py b/numpy/core/tests/test_numerictypes.py
index 70871774f..27e4fdeec 100644
--- a/numpy/core/tests/test_numerictypes.py
+++ b/numpy/core/tests/test_numerictypes.py
@@ -475,6 +475,21 @@ class Test_sctype2char(object):
     def test_non_type(self):
         assert_raises(ValueError, np.sctype2char, 1)
 
+@pytest.mark.parametrize("rep, expected", [
+    (np.int32, True),
+    (list, False),
+    (1.1, False),
+    (str, True),
+    (np.dtype(np.float64), True),
+    (np.dtype((np.int16, (3, 4))), True),
+    (np.dtype([('a', np.int8)]), True),
+    ])
+def test_issctype(rep, expected):
+    # ensure proper identification of scalar
+    # data-types by issctype()
+    actual = np.issctype(rep)
+    assert_equal(actual, expected)
+
 
 @pytest.mark.skipif(sys.flags.optimize > 1,
                     reason="no docstrings present to inspect when PYTHONOPTIMIZE/Py_OptimizeFlag > 1")
diff --git a/numpy/core/tests/test_overrides.py b/numpy/core/tests/test_overrides.py
index 7f6157a5b..ee6d5da4a 100644
--- a/numpy/core/tests/test_overrides.py
+++ b/numpy/core/tests/test_overrides.py
@@ -1,13 +1,14 @@
 from __future__ import division, absolute_import, print_function
 
-import pickle
 import sys
 
 import numpy as np
 from numpy.testing import (
     assert_, assert_equal, assert_raises, assert_raises_regex)
 from numpy.core.overrides import (
-    get_overloaded_types_and_args, array_function_dispatch)
+    get_overloaded_types_and_args, array_function_dispatch,
+    verify_matching_signatures)
+from numpy.core.numeric import pickle
 
 
 def _get_overloaded_args(relevant_args):
@@ -15,8 +16,8 @@ def _get_overloaded_args(relevant_args):
     return args
 
 
-def _return_self(self, *args, **kwargs):
-    return self
+def _return_not_implemented(self, *args, **kwargs):
+    return NotImplemented
 
 
 class TestGetOverloadedTypesAndArgs(object):
@@ -44,7 +45,7 @@ class TestGetOverloadedTypesAndArgs(object):
     def test_ndarray_subclasses(self):
 
         class OverrideSub(np.ndarray):
-            __array_function__ = _return_self
+            __array_function__ = _return_not_implemented
 
         class NoOverrideSub(np.ndarray):
             pass
@@ -69,7 +70,7 @@ class TestGetOverloadedTypesAndArgs(object):
     def test_ndarray_and_duck_array(self):
 
         class Other(object):
-            __array_function__ = _return_self
+            __array_function__ = _return_not_implemented
 
         array = np.array(1)
         other = Other()
@@ -85,10 +86,10 @@ class TestGetOverloadedTypesAndArgs(object):
     def test_ndarray_subclass_and_duck_array(self):
 
         class OverrideSub(np.ndarray):
-            __array_function__ = _return_self
+            __array_function__ = _return_not_implemented
 
         class Other(object):
-            __array_function__ = _return_self
+            __array_function__ = _return_not_implemented
 
         array = np.array(1)
         subarray = np.array(1).view(OverrideSub)
@@ -102,16 +103,16 @@ class TestGetOverloadedTypesAndArgs(object):
     def test_many_duck_arrays(self):
 
         class A(object):
-            __array_function__ = _return_self
+            __array_function__ = _return_not_implemented
 
         class B(A):
-            __array_function__ = _return_self
+            __array_function__ = _return_not_implemented
 
         class C(A):
-            __array_function__ = _return_self
+            __array_function__ = _return_not_implemented
 
         class D(object):
-            __array_function__ = _return_self
+            __array_function__ = _return_not_implemented
 
         a = A()
         b = B()
@@ -134,7 +135,7 @@ class TestNDArrayArrayFunction(object):
     def test_method(self):
 
         class SubOverride(np.ndarray):
-            __array_function__ = _return_self
+            __array_function__ = _return_not_implemented
 
         class NoOverrideSub(np.ndarray):
             pass
@@ -167,8 +168,10 @@ def dispatched_one_arg(array):
 class TestArrayFunctionDispatch(object):
 
     def test_pickle(self):
-        roundtripped = pickle.loads(pickle.dumps(dispatched_one_arg))
-        assert_(roundtripped is dispatched_one_arg)
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            roundtripped = pickle.loads(
+                    pickle.dumps(dispatched_one_arg, protocol=proto))
+            assert_(roundtripped is dispatched_one_arg)
 
     def test_name_and_docstring(self):
         assert_equal(dispatched_one_arg.__name__, 'dispatched_one_arg')
@@ -186,7 +189,8 @@ class TestArrayFunctionDispatch(object):
         assert_(obj is original)
         assert_(func is dispatched_one_arg)
         assert_equal(set(types), {MyArray})
-        assert_equal(args, (original,))
+        # assert_equal uses the overloaded np.iscomplexobj() internally
+        assert_(args == (original,))
         assert_equal(kwargs, {})
 
     def test_not_implemented(self):
@@ -200,6 +204,36 @@ class TestArrayFunctionDispatch(object):
             dispatched_one_arg(array)
 
 
+class TestVerifyMatchingSignatures(object):
+
+    def test_verify_matching_signatures(self):
+
+        verify_matching_signatures(lambda x: 0, lambda x: 0)
+        verify_matching_signatures(lambda x=None: 0, lambda x=None: 0)
+        verify_matching_signatures(lambda x=1: 0, lambda x=None: 0)
+
+        with assert_raises(RuntimeError):
+            verify_matching_signatures(lambda a: 0, lambda b: 0)
+        with assert_raises(RuntimeError):
+            verify_matching_signatures(lambda x: 0, lambda x=None: 0)
+        with assert_raises(RuntimeError):
+            verify_matching_signatures(lambda x=None: 0, lambda y=None: 0)
+        with assert_raises(RuntimeError):
+            verify_matching_signatures(lambda x=1: 0, lambda y=1: 0)
+
+    def test_array_function_dispatch(self):
+
+        with assert_raises(RuntimeError):
+            @array_function_dispatch(lambda x: (x,))
+            def f(y):
+                pass
+
+        # should not raise
+        @array_function_dispatch(lambda x: (x,), verify=False)
+        def f(y):
+            pass
+
+
 def _new_duck_type_and_implements():
     """Create a duck array type and implements functions."""
     HANDLED_FUNCTIONS = {}
@@ -262,12 +296,46 @@ class TestArrayFunctionImplementation(object):
     def test_not_implemented(self):
         MyArray, implements = _new_duck_type_and_implements()
 
-        @array_function_dispatch(lambda array: (array,))
+        @array_function_dispatch(lambda array: (array,), module='my')
         def func(array):
             return array
 
         array = np.array(1)
         assert_(func(array) is array)
 
-        with assert_raises_regex(TypeError, 'no implementation found'):
+        with assert_raises_regex(
+                TypeError, "no implementation found for 'my.func'"):
             func(MyArray())
+
+
+class TestNDArrayMethods(object):
+
+    def test_repr(self):
+        # gh-12162: should still be defined even if __array_function__ doesn't
+        # implement np.array_repr()
+
+        class MyArray(np.ndarray):
+            def __array_function__(*args, **kwargs):
+                return NotImplemented
+
+        array = np.array(1).view(MyArray)
+        assert_equal(repr(array), 'MyArray(1)')
+        assert_equal(str(array), '1')
+
+        
+class TestNumPyFunctions(object):
+
+    def test_module(self):
+        assert_equal(np.sum.__module__, 'numpy')
+        assert_equal(np.char.equal.__module__, 'numpy.char')
+        assert_equal(np.fft.fft.__module__, 'numpy.fft')
+        assert_equal(np.linalg.solve.__module__, 'numpy.linalg')
+
+    def test_override_sum(self):
+        MyArray, implements = _new_duck_type_and_implements()
+
+        @implements(np.sum)
+        def _(array):
+            return 'yes'
+
+        assert_equal(np.sum(MyArray()), 'yes')
diff --git a/numpy/core/tests/test_records.py b/numpy/core/tests/test_records.py
index d7c7d16e3..af6c86b9e 100644
--- a/numpy/core/tests/test_records.py
+++ b/numpy/core/tests/test_records.py
@@ -7,17 +7,18 @@ try:
     import collections.abc as collections_abc
 except ImportError:
     import collections as collections_abc
-import pickle
 import warnings
 import textwrap
 from os import path
 import pytest
 
 import numpy as np
+from numpy.compat import Path
 from numpy.testing import (
     assert_, assert_equal, assert_array_equal, assert_array_almost_equal,
-    assert_raises, assert_warns
+    assert_raises, assert_warns, temppath
     )
+from numpy.core.numeric import pickle
 
 
 class TestFromrecords(object):
@@ -325,6 +326,24 @@ class TestFromrecords(object):
         assert_equal(rec['f1'], [b'', b'', b''])
 
 
+@pytest.mark.skipif(Path is None, reason="No pathlib.Path")
+class TestPathUsage(object):
+    # Test that pathlib.Path can be used
+    def test_tofile_fromfile(self):
+        with temppath(suffix='.bin') as path:
+            path = Path(path)
+            a = np.empty(10, dtype='f8,i4,a5')
+            a[5] = (0.5,10,'abcde')
+            a.newbyteorder('<')
+            with path.open("wb") as fd:
+                a.tofile(fd)
+            x = np.core.records.fromfile(path,
+                                         formats='f8,i4,a5',
+                                         shape=10,
+                                         byteorder='<')
+            assert_array_equal(x, a)
+
+
 class TestRecord(object):
     def setup(self):
         self.data = np.rec.fromrecords([(1, 2, 3), (4, 5, 6)],
@@ -378,22 +397,27 @@ class TestRecord(object):
     def test_pickle_1(self):
         # Issue #1529
         a = np.array([(1, [])], dtype=[('a', np.int32), ('b', np.int32, 0)])
-        assert_equal(a, pickle.loads(pickle.dumps(a)))
-        assert_equal(a[0], pickle.loads(pickle.dumps(a[0])))
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            assert_equal(a, pickle.loads(pickle.dumps(a, protocol=proto)))
+            assert_equal(a[0], pickle.loads(pickle.dumps(a[0],
+                                                         protocol=proto)))
 
     def test_pickle_2(self):
         a = self.data
-        assert_equal(a, pickle.loads(pickle.dumps(a)))
-        assert_equal(a[0], pickle.loads(pickle.dumps(a[0])))
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            assert_equal(a, pickle.loads(pickle.dumps(a, protocol=proto)))
+            assert_equal(a[0], pickle.loads(pickle.dumps(a[0],
+                                                         protocol=proto)))
 
     def test_pickle_3(self):
         # Issue #7140
         a = self.data
-        pa = pickle.loads(pickle.dumps(a[0]))
-        assert_(pa.flags.c_contiguous)
-        assert_(pa.flags.f_contiguous)
-        assert_(pa.flags.writeable)
-        assert_(pa.flags.aligned)
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            pa = pickle.loads(pickle.dumps(a[0], protocol=proto))
+            assert_(pa.flags.c_contiguous)
+            assert_(pa.flags.f_contiguous)
+            assert_(pa.flags.writeable)
+            assert_(pa.flags.aligned)
 
     def test_objview_record(self):
         # https://github.com/numpy/numpy/issues/2599
diff --git a/numpy/core/tests/test_regression.py b/numpy/core/tests/test_regression.py
index b74216418..a929b0efd 100644
--- a/numpy/core/tests/test_regression.py
+++ b/numpy/core/tests/test_regression.py
@@ -1,7 +1,6 @@
 from __future__ import division, absolute_import, print_function
 
 import copy
-import pickle
 import sys
 import platform
 import gc
@@ -20,6 +19,7 @@ from numpy.testing import (
         _assert_valid_refcount, HAS_REFCOUNT,
         )
 from numpy.compat import asbytes, asunicode, long
+from numpy.core.numeric import pickle
 
 try:
     RecursionError
@@ -39,12 +39,13 @@ class TestRegression(object):
     def test_pickle_transposed(self):
         # Ticket #16
         a = np.transpose(np.array([[2, 9], [7, 0], [3, 8]]))
-        f = BytesIO()
-        pickle.dump(a, f)
-        f.seek(0)
-        b = pickle.load(f)
-        f.close()
-        assert_array_equal(a, b)
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            f = BytesIO()
+            pickle.dump(a, f, protocol=proto)
+            f.seek(0)
+            b = pickle.load(f)
+            f.close()
+            assert_array_equal(a, b)
 
     def test_typeNA(self):
         # Issue gh-515 
@@ -95,12 +96,13 @@ class TestRegression(object):
 
     def test_char_dump(self):
         # Ticket #50
-        f = BytesIO()
         ca = np.char.array(np.arange(1000, 1010), itemsize=4)
-        ca.dump(f)
-        f.seek(0)
-        ca = np.load(f)
-        f.close()
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            f = BytesIO()
+            pickle.dump(ca, f, protocol=proto)
+            f.seek(0)
+            ca = np.load(f)
+            f.close()
 
     def test_noncontiguous_fill(self):
         # Ticket #58.
@@ -359,12 +361,13 @@ class TestRegression(object):
     def test_unpickle_dtype_with_object(self):
         # Implemented in r2840
         dt = np.dtype([('x', int), ('y', np.object_), ('z', 'O')])
-        f = BytesIO()
-        pickle.dump(dt, f)
-        f.seek(0)
-        dt_ = pickle.load(f)
-        f.close()
-        assert_equal(dt, dt_)
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            f = BytesIO()
+            pickle.dump(dt, f, protocol=proto)
+            f.seek(0)
+            dt_ = pickle.load(f)
+            f.close()
+            assert_equal(dt, dt_)
 
     def test_mem_array_creation_invalid_specification(self):
         # Ticket #196
@@ -474,7 +477,8 @@ class TestRegression(object):
 
     def test_pickle_dtype(self):
         # Ticket #251
-        pickle.dumps(float)
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            pickle.dumps(float, protocol=proto)
 
     def test_swap_real(self):
         # Ticket #265
@@ -818,8 +822,9 @@ class TestRegression(object):
         # Ticket #600
         x = np.array(["DROND", "DROND1"], dtype="U6")
         el = x[1]
-        new = pickle.loads(pickle.dumps(el))
-        assert_equal(new, el)
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            new = pickle.loads(pickle.dumps(el, protocol=proto))
+            assert_equal(new, el)
 
     def test_arange_non_native_dtype(self):
         # Ticket #616
@@ -1066,11 +1071,12 @@ class TestRegression(object):
     def test_dot_alignment_sse2(self):
         # Test for ticket #551, changeset r5140
         x = np.zeros((30, 40))
-        y = pickle.loads(pickle.dumps(x))
-        # y is now typically not aligned on a 8-byte boundary
-        z = np.ones((1, y.shape[0]))
-        # This shouldn't cause a segmentation fault:
-        np.dot(z, y)
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            y = pickle.loads(pickle.dumps(x, protocol=proto))
+            # y is now typically not aligned on a 8-byte boundary
+            z = np.ones((1, y.shape[0]))
+            # This shouldn't cause a segmentation fault:
+            np.dot(z, y)
 
     def test_astype_copy(self):
         # Ticket #788, changeset r5155
@@ -1280,9 +1286,12 @@ class TestRegression(object):
 
         assert_(test_record_void_scalar == test_record)
 
-        #Test pickle and unpickle of void and record scalars
-        assert_(pickle.loads(pickle.dumps(test_string)) == test_string)
-        assert_(pickle.loads(pickle.dumps(test_record)) == test_record)
+        # Test pickle and unpickle of void and record scalars
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            assert_(pickle.loads(
+                pickle.dumps(test_string, protocol=proto)) == test_string)
+            assert_(pickle.loads(
+                pickle.dumps(test_record, protocol=proto)) == test_record)
 
     def test_blasdot_uninitialized_memory(self):
         # Ticket #950
@@ -1548,10 +1557,7 @@ class TestRegression(object):
 
     def test_complex_nan_maximum(self):
         cnan = complex(0, np.nan)
-        with suppress_warnings() as sup:
-            sup.record(RuntimeWarning)
-            assert_equal(np.maximum(1, cnan), cnan)
-        assert_equal(len(sup.log), 1)
+        assert_equal(np.maximum(1, cnan), cnan)
 
     def test_subclass_int_tuple_assignment(self):
         # ticket #1563
@@ -1925,11 +1931,12 @@ class TestRegression(object):
 
     def test_pickle_bytes_overwrite(self):
         if sys.version_info[0] >= 3:
-            data = np.array([1], dtype='b')
-            data = pickle.loads(pickle.dumps(data))
-            data[0] = 0xdd
-            bytestring = "\x01  ".encode('ascii')
-            assert_equal(bytestring[0:1], '\x01'.encode('ascii'))
+            for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+                data = np.array([1], dtype='b')
+                data = pickle.loads(pickle.dumps(data, protocol=proto))
+                data[0] = 0xdd
+                bytestring = "\x01  ".encode('ascii')
+                assert_equal(bytestring[0:1], '\x01'.encode('ascii'))
 
     def test_pickle_py2_array_latin1_hack(self):
         # Check that unpickling hacks in Py3 that support
@@ -2231,10 +2238,10 @@ class TestRegression(object):
 
     def test_pickle_empty_string(self):
         # gh-3926
-
-        import pickle
-        test_string = np.string_('')
-        assert_equal(pickle.loads(pickle.dumps(test_string)), test_string)
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            test_string = np.string_('')
+            assert_equal(pickle.loads(
+                pickle.dumps(test_string, protocol=proto)), test_string)
 
     def test_frompyfunc_many_args(self):
         # gh-5672
diff --git a/numpy/core/tests/test_shape_base.py b/numpy/core/tests/test_shape_base.py
index 72b3451a4..b2c610da6 100644
--- a/numpy/core/tests/test_shape_base.py
+++ b/numpy/core/tests/test_shape_base.py
@@ -1,14 +1,17 @@
 from __future__ import division, absolute_import, print_function
 
 import warnings
+import sys
 import numpy as np
 from numpy.core import (
     array, arange, atleast_1d, atleast_2d, atleast_3d, block, vstack, hstack,
     newaxis, concatenate, stack
     )
+from numpy.core.shape_base import (_block_dispatcher, _block_setup,
+                                   _block_concatenate, _block_slicing)
 from numpy.testing import (
     assert_, assert_raises, assert_array_equal, assert_equal,
-    assert_raises_regex, assert_almost_equal
+    assert_raises_regex, assert_warns, assert_almost_equal
     )
 
 from numpy.compat import long
@@ -153,6 +156,14 @@ class TestHstack(object):
         desired = array([[1, 1], [2, 2]])
         assert_array_equal(res, desired)
 
+    def test_generator(self):
+        with assert_warns(FutureWarning):
+            hstack((np.arange(3) for _ in range(2)))
+        if sys.version_info.major > 2:
+            # map returns a list on Python 2
+            with assert_warns(FutureWarning):
+                hstack(map(lambda x: x, np.ones((3, 2))))
+
 
 class TestVstack(object):
     def test_non_iterable(self):
@@ -189,8 +200,18 @@ class TestVstack(object):
         desired = array([[1, 2], [1, 2]])
         assert_array_equal(res, desired)
 
+    def test_generator(self):
+        with assert_warns(FutureWarning):
+            vstack((np.arange(3) for _ in range(2)))
+
 
 class TestConcatenate(object):
+    def test_returns_copy(self):
+        a = np.eye(3)
+        b = np.concatenate([a])
+        b[0, 0] = 2
+        assert b[0, 0] != a[0, 0]
+
     def test_exceptions(self):
         # test axis must be in bounds
         for ndim in [1, 2, 3]:
@@ -346,7 +367,7 @@ def test_stack():
     arrays = [np.random.randn(3, 4) for _ in range(10)]
     axes = [0, 1, 2, -1, -2, -3]
     expected_shapes = [(10, 3, 4), (3, 10, 4), (3, 4, 10),
-                        (3, 4, 10), (3, 10, 4), (10, 3, 4)]
+                       (3, 4, 10), (3, 10, 4), (10, 3, 4)]
     for axis, expected_shape in zip(axes, expected_shapes):
         assert_equal(np.stack(arrays, axis).shape, expected_shape)
     # empty arrays
@@ -364,10 +385,69 @@ def test_stack():
                         stack, [np.zeros((3, 3)), np.zeros(3)], axis=1)
     assert_raises_regex(ValueError, 'must have the same shape',
                         stack, [np.arange(2), np.arange(3)])
+    # generator is deprecated
+    with assert_warns(FutureWarning):
+        result = stack((x for x in range(3)))
+    assert_array_equal(result, np.array([0, 1, 2]))
+
+
+# See for more information on how to parametrize a whole class
+# https://docs.pytest.org/en/latest/example/parametrize.html#parametrizing-test-methods-through-per-class-configuration
+def pytest_generate_tests(metafunc):
+    # called once per each test function
+    if hasattr(metafunc.cls, 'params'):
+        arglist = metafunc.cls.params
+        argnames = sorted(arglist[0])
+        metafunc.parametrize(argnames,
+                             [[funcargs[name] for name in argnames]
+                              for funcargs in arglist])
+
+
+# blocking small arrays and large arrays go through different paths.
+# the algorithm is triggered depending on the number of element
+# copies required.
+# We define a test fixture that forces most tests to go through
+# both code paths.
+# Ultimately, this should be removed if a single algorithm is found
+# to be faster for both small and large arrays.s
+def _block_force_concatenate(arrays):
+    arrays, list_ndim, result_ndim, _ = _block_setup(arrays)
+    return _block_concatenate(arrays, list_ndim, result_ndim)
+
+
+def _block_force_slicing(arrays):
+    arrays, list_ndim, result_ndim, _ = _block_setup(arrays)
+    return _block_slicing(arrays, list_ndim, result_ndim)
 
 
 class TestBlock(object):
-    def test_block_simple_row_wise(self):
+    params = [dict(block=block),
+              dict(block=_block_force_concatenate),
+              dict(block=_block_force_slicing)]
+
+    def test_returns_copy(self, block):
+        a = np.eye(3)
+        b = block(a)
+        b[0, 0] = 2
+        assert b[0, 0] != a[0, 0]
+
+    def test_block_total_size_estimate(self, block):
+        _, _, _, total_size = _block_setup([1])
+        assert total_size == 1
+
+        _, _, _, total_size = _block_setup([[1]])
+        assert total_size == 1
+
+        _, _, _, total_size = _block_setup([[1, 1]])
+        assert total_size == 2
+
+        _, _, _, total_size = _block_setup([[1], [1]])
+        assert total_size == 2
+
+        _, _, _, total_size = _block_setup([[1, 2], [3, 4]])
+        assert total_size == 4
+
+    def test_block_simple_row_wise(self, block):
         a_2d = np.ones((2, 2))
         b_2d = 2 * a_2d
         desired = np.array([[1, 1, 2, 2],
@@ -375,7 +455,7 @@ class TestBlock(object):
         result = block([a_2d, b_2d])
         assert_equal(desired, result)
 
-    def test_block_simple_column_wise(self):
+    def test_block_simple_column_wise(self, block):
         a_2d = np.ones((2, 2))
         b_2d = 2 * a_2d
         expected = np.array([[1, 1],
@@ -385,7 +465,7 @@ class TestBlock(object):
         result = block([[a_2d], [b_2d]])
         assert_equal(expected, result)
 
-    def test_block_with_1d_arrays_row_wise(self):
+    def test_block_with_1d_arrays_row_wise(self, block):
         # # # 1-D vectors are treated as row arrays
         a = np.array([1, 2, 3])
         b = np.array([2, 3, 4])
@@ -393,7 +473,7 @@ class TestBlock(object):
         result = block([a, b])
         assert_equal(expected, result)
 
-    def test_block_with_1d_arrays_multiple_rows(self):
+    def test_block_with_1d_arrays_multiple_rows(self, block):
         a = np.array([1, 2, 3])
         b = np.array([2, 3, 4])
         expected = np.array([[1, 2, 3, 2, 3, 4],
@@ -401,7 +481,7 @@ class TestBlock(object):
         result = block([[a, b], [a, b]])
         assert_equal(expected, result)
 
-    def test_block_with_1d_arrays_column_wise(self):
+    def test_block_with_1d_arrays_column_wise(self, block):
         # # # 1-D vectors are treated as row arrays
         a_1d = np.array([1, 2, 3])
         b_1d = np.array([2, 3, 4])
@@ -410,7 +490,7 @@ class TestBlock(object):
         result = block([[a_1d], [b_1d]])
         assert_equal(expected, result)
 
-    def test_block_mixed_1d_and_2d(self):
+    def test_block_mixed_1d_and_2d(self, block):
         a_2d = np.ones((2, 2))
         b_1d = np.array([2, 2])
         result = block([[a_2d], [b_1d]])
@@ -419,7 +499,7 @@ class TestBlock(object):
                              [2, 2]])
         assert_equal(expected, result)
 
-    def test_block_complicated(self):
+    def test_block_complicated(self, block):
         # a bit more complicated
         one_2d = np.array([[1, 1, 1]])
         two_2d = np.array([[2, 2, 2]])
@@ -443,7 +523,7 @@ class TestBlock(object):
                         [zero_2d]])
         assert_equal(result, expected)
 
-    def test_nested(self):
+    def test_nested(self, block):
         one = np.array([1, 1, 1])
         two = np.array([[2, 2, 2], [2, 2, 2], [2, 2, 2]])
         three = np.array([3, 3, 3])
@@ -452,9 +532,9 @@ class TestBlock(object):
         six = np.array([6, 6, 6, 6, 6])
         zero = np.zeros((2, 6))
 
-        result = np.block([
+        result = block([
             [
-                np.block([
+                block([
                    [one],
                    [three],
                    [four]
@@ -473,7 +553,7 @@ class TestBlock(object):
 
         assert_equal(result, expected)
 
-    def test_3d(self):
+    def test_3d(self, block):
         a000 = np.ones((2, 2, 2), int) * 1
 
         a100 = np.ones((3, 2, 2), int) * 2
@@ -486,7 +566,7 @@ class TestBlock(object):
 
         a111 = np.ones((3, 3, 3), int) * 8
 
-        result = np.block([
+        result = block([
             [
                 [a000, a001],
                 [a010, a011],
@@ -528,55 +608,102 @@ class TestBlock(object):
 
         assert_array_equal(result, expected)
 
-    def test_block_with_mismatched_shape(self):
+    def test_block_with_mismatched_shape(self, block):
         a = np.array([0, 0])
         b = np.eye(2)
-        assert_raises(ValueError, np.block, [a, b])
-        assert_raises(ValueError, np.block, [b, a])
+        assert_raises(ValueError, block, [a, b])
+        assert_raises(ValueError, block, [b, a])
 
-    def test_no_lists(self):
-        assert_equal(np.block(1),         np.array(1))
-        assert_equal(np.block(np.eye(3)), np.eye(3))
+        to_block = [[np.ones((2,3)), np.ones((2,2))],
+                    [np.ones((2,2)), np.ones((2,2))]]
+        assert_raises(ValueError, block, to_block)
+    def test_no_lists(self, block):
+        assert_equal(block(1),         np.array(1))
+        assert_equal(block(np.eye(3)), np.eye(3))
 
-    def test_invalid_nesting(self):
+    def test_invalid_nesting(self, block):
         msg = 'depths are mismatched'
-        assert_raises_regex(ValueError, msg, np.block, [1, [2]])
-        assert_raises_regex(ValueError, msg, np.block, [1, []])
-        assert_raises_regex(ValueError, msg, np.block, [[1], 2])
-        assert_raises_regex(ValueError, msg, np.block, [[], 2])
-        assert_raises_regex(ValueError, msg, np.block, [
+        assert_raises_regex(ValueError, msg, block, [1, [2]])
+        assert_raises_regex(ValueError, msg, block, [1, []])
+        assert_raises_regex(ValueError, msg, block, [[1], 2])
+        assert_raises_regex(ValueError, msg, block, [[], 2])
+        assert_raises_regex(ValueError, msg, block, [
             [[1], [2]],
             [[3, 4]],
             [5]  # missing brackets
         ])
 
-    def test_empty_lists(self):
-        assert_raises_regex(ValueError, 'empty', np.block, [])
-        assert_raises_regex(ValueError, 'empty', np.block, [[]])
-        assert_raises_regex(ValueError, 'empty', np.block, [[1], []])
+    def test_empty_lists(self, block):
+        assert_raises_regex(ValueError, 'empty', block, [])
+        assert_raises_regex(ValueError, 'empty', block, [[]])
+        assert_raises_regex(ValueError, 'empty', block, [[1], []])
 
-    def test_tuple(self):
-        assert_raises_regex(TypeError, 'tuple', np.block, ([1, 2], [3, 4]))
-        assert_raises_regex(TypeError, 'tuple', np.block, [(1, 2), (3, 4)])
+    def test_tuple(self, block):
+        assert_raises_regex(TypeError, 'tuple', block, ([1, 2], [3, 4]))
+        assert_raises_regex(TypeError, 'tuple', block, [(1, 2), (3, 4)])
 
-    def test_different_ndims(self):
+    def test_different_ndims(self, block):
         a = 1.
         b = 2 * np.ones((1, 2))
         c = 3 * np.ones((1, 1, 3))
 
-        result = np.block([a, b, c])
+        result = block([a, b, c])
         expected = np.array([[[1., 2., 2., 3., 3., 3.]]])
 
         assert_equal(result, expected)
 
-    def test_different_ndims_depths(self):
+    def test_different_ndims_depths(self, block):
         a = 1.
         b = 2 * np.ones((1, 2))
         c = 3 * np.ones((1, 2, 3))
 
-        result = np.block([[a, b], [c]])
+        result = block([[a, b], [c]])
         expected = np.array([[[1., 2., 2.],
                               [3., 3., 3.],
                               [3., 3., 3.]]])
 
         assert_equal(result, expected)
+
+    def test_block_memory_order(self, block):
+        # 3D
+        arr_c = np.zeros((3,)*3, order='C')
+        arr_f = np.zeros((3,)*3, order='F')
+
+        b_c = [[[arr_c, arr_c],
+                [arr_c, arr_c]],
+               [[arr_c, arr_c],
+                [arr_c, arr_c]]]
+
+        b_f = [[[arr_f, arr_f],
+                [arr_f, arr_f]],
+               [[arr_f, arr_f],
+                [arr_f, arr_f]]]
+
+        assert block(b_c).flags['C_CONTIGUOUS']
+        assert block(b_f).flags['F_CONTIGUOUS']
+
+        arr_c = np.zeros((3, 3), order='C')
+        arr_f = np.zeros((3, 3), order='F')
+        # 2D
+        b_c = [[arr_c, arr_c],
+               [arr_c, arr_c]]
+
+        b_f = [[arr_f, arr_f],
+               [arr_f, arr_f]]
+
+        assert block(b_c).flags['C_CONTIGUOUS']
+        assert block(b_f).flags['F_CONTIGUOUS']
+
+
+def test_block_dispatcher():
+    class ArrayLike(object):
+        pass
+    a = ArrayLike()
+    b = ArrayLike()
+    c = ArrayLike()
+    assert_equal(list(_block_dispatcher(a)), [a])
+    assert_equal(list(_block_dispatcher([a])), [a])
+    assert_equal(list(_block_dispatcher([a, b])), [a, b])
+    assert_equal(list(_block_dispatcher([[a], [b, [c]]])), [a, b, c])
+    # don't recurse into non-lists
+    assert_equal(list(_block_dispatcher((a, b))), [(a, b)])
diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py
index 85d9f41bd..b83b8ccff 100644
--- a/numpy/core/tests/test_ufunc.py
+++ b/numpy/core/tests/test_ufunc.py
@@ -13,6 +13,7 @@ from numpy.testing import (
     assert_almost_equal, assert_array_almost_equal, assert_no_warnings,
     assert_allclose,
     )
+from numpy.core.numeric import pickle
 
 
 class TestUfuncKwargs(object):
@@ -43,16 +44,17 @@ class TestUfuncKwargs(object):
 
 class TestUfunc(object):
     def test_pickle(self):
-        import pickle
-        assert_(pickle.loads(pickle.dumps(np.sin)) is np.sin)
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            assert_(pickle.loads(pickle.dumps(np.sin,
+                                              protocol=proto)) is np.sin)
 
-        # Check that ufunc not defined in the top level numpy namespace such as
-        # numpy.core._rational_tests.test_add can also be pickled
-        res = pickle.loads(pickle.dumps(_rational_tests.test_add))
-        assert_(res is _rational_tests.test_add)
+            # Check that ufunc not defined in the top level numpy namespace
+            # such as numpy.core._rational_tests.test_add can also be pickled
+            res = pickle.loads(pickle.dumps(_rational_tests.test_add,
+                                            protocol=proto))
+            assert_(res is _rational_tests.test_add)
 
     def test_pickle_withstring(self):
-        import pickle
         astring = (b"cnumpy.core\n_ufunc_reconstruct\np0\n"
                    b"(S'numpy.core.umath'\np1\nS'cos'\np2\ntp3\nRp4\n.")
         assert_(pickle.loads(astring) is np.cos)
@@ -286,27 +288,96 @@ class TestUfunc(object):
         """
         pass
 
+    # from include/numpy/ufuncobject.h
+    size_inferred = 2
+    can_ignore = 4
     def test_signature0(self):
         # the arguments to test_signature are: nin, nout, core_signature
-        # pass
-        enabled, num_dims, ixs = umt.test_signature(2, 1, "(i),(i)->()")
+        enabled, num_dims, ixs, flags, sizes = umt.test_signature(
+            2, 1, "(i),(i)->()")
         assert_equal(enabled, 1)
         assert_equal(num_dims, (1,  1,  0))
         assert_equal(ixs, (0, 0))
+        assert_equal(flags, (self.size_inferred,))
+        assert_equal(sizes, (-1,))
 
     def test_signature1(self):
         # empty core signature; treat as plain ufunc (with trivial core)
-        enabled, num_dims, ixs = umt.test_signature(2, 1, "(),()->()")
+        enabled, num_dims, ixs, flags, sizes = umt.test_signature(
+            2, 1, "(),()->()")
         assert_equal(enabled, 0)
         assert_equal(num_dims, (0,  0,  0))
         assert_equal(ixs, ())
+        assert_equal(flags, ())
+        assert_equal(sizes, ())
 
     def test_signature2(self):
         # more complicated names for variables
-        enabled, num_dims, ixs = umt.test_signature(2, 1, "(i1,i2),(J_1)->(_kAB)")
+        enabled, num_dims, ixs, flags, sizes = umt.test_signature(
+            2, 1, "(i1,i2),(J_1)->(_kAB)")
         assert_equal(enabled, 1)
         assert_equal(num_dims, (2, 1, 1))
         assert_equal(ixs, (0, 1, 2, 3))
+        assert_equal(flags, (self.size_inferred,)*4)
+        assert_equal(sizes, (-1, -1, -1, -1))
+
+    def test_signature3(self):
+        enabled, num_dims, ixs, flags, sizes = umt.test_signature(
+            2, 1, u"(i1, i12),   (J_1)->(i12, i2)")
+        assert_equal(enabled, 1)
+        assert_equal(num_dims, (2, 1, 2))
+        assert_equal(ixs, (0, 1, 2, 1, 3))
+        assert_equal(flags, (self.size_inferred,)*4)
+        assert_equal(sizes, (-1, -1, -1, -1))
+
+    def test_signature4(self):
+        # matrix_multiply signature from _umath_tests
+        enabled, num_dims, ixs, flags, sizes = umt.test_signature(
+            2, 1, "(n,k),(k,m)->(n,m)")
+        assert_equal(enabled, 1)
+        assert_equal(num_dims, (2, 2, 2))
+        assert_equal(ixs, (0, 1, 1, 2, 0, 2))
+        assert_equal(flags, (self.size_inferred,)*3)
+        assert_equal(sizes, (-1, -1, -1))
+
+    def test_signature5(self):
+        # matmul signature from _umath_tests
+        enabled, num_dims, ixs, flags, sizes = umt.test_signature(
+            2, 1, "(n?,k),(k,m?)->(n?,m?)")
+        assert_equal(enabled, 1)
+        assert_equal(num_dims, (2, 2, 2))
+        assert_equal(ixs, (0, 1, 1, 2, 0, 2))
+        assert_equal(flags, (self.size_inferred | self.can_ignore,
+                             self.size_inferred,
+                             self.size_inferred | self.can_ignore))
+        assert_equal(sizes, (-1, -1, -1))
+
+    def test_signature6(self):
+        enabled, num_dims, ixs, flags, sizes = umt.test_signature(
+            1, 1, "(3)->()")
+        assert_equal(enabled, 1)
+        assert_equal(num_dims, (1, 0))
+        assert_equal(ixs, (0,))
+        assert_equal(flags, (0,))
+        assert_equal(sizes, (3,))
+
+    def test_signature7(self):
+        enabled, num_dims, ixs, flags, sizes = umt.test_signature(
+            3, 1, "(3),(03,3),(n)->(9)")
+        assert_equal(enabled, 1)
+        assert_equal(num_dims, (1, 2, 1, 1))
+        assert_equal(ixs, (0, 0, 0, 1, 2))
+        assert_equal(flags, (0, self.size_inferred, 0))
+        assert_equal(sizes, (3, -1, 9))
+
+    def test_signature8(self):
+        enabled, num_dims, ixs, flags, sizes = umt.test_signature(
+            3, 1, "(3?),(3?,3?),(n)->(9)")
+        assert_equal(enabled, 1)
+        assert_equal(num_dims, (1, 2, 1, 1))
+        assert_equal(ixs, (0, 0, 0, 1, 2))
+        assert_equal(flags, (self.can_ignore, self.size_inferred, 0))
+        assert_equal(sizes, (3, -1, 9))
 
     def test_signature_failure0(self):
         # in the following calls, a ValueError should be raised because
@@ -872,6 +943,89 @@ class TestUfunc(object):
         w = np.array([], dtype='f8')
         assert_array_equal(umt.innerwt(a, b, w), np.sum(a*b*w, axis=-1))
 
+    def test_cross1d(self):
+        """Test with fixed-sized signature."""
+        a = np.eye(3)
+        assert_array_equal(umt.cross1d(a, a), np.zeros((3, 3)))
+        out = np.zeros((3, 3))
+        result = umt.cross1d(a[0], a, out)
+        assert_(result is out)
+        assert_array_equal(result, np.vstack((np.zeros(3), a[2], -a[1])))
+        assert_raises(ValueError, umt.cross1d, np.eye(4), np.eye(4))
+        assert_raises(ValueError, umt.cross1d, a, np.arange(4.))
+        assert_raises(ValueError, umt.cross1d, a, np.arange(3.), np.zeros((3, 4)))
+
+    def test_can_ignore_signature(self):
+        # Comparing the effects of ? in signature:
+        # matrix_multiply: (m,n),(n,p)->(m,p)    # all must be there.
+        # matmul:        (m?,n),(n,p?)->(m?,p?)  # allow missing m, p.
+        mat = np.arange(12).reshape((2, 3, 2))
+        single_vec = np.arange(2)
+        col_vec = single_vec[:, np.newaxis]
+        col_vec_array = np.arange(8).reshape((2, 2, 2, 1)) + 1
+        # matrix @ single column vector with proper dimension
+        mm_col_vec = umt.matrix_multiply(mat, col_vec)
+        # matmul does the same thing
+        matmul_col_vec = umt.matmul(mat, col_vec)
+        assert_array_equal(matmul_col_vec, mm_col_vec)
+        # matrix @ vector without dimension making it a column vector.
+        # matrix multiply fails -> missing core dim.
+        assert_raises(ValueError, umt.matrix_multiply, mat, single_vec)
+        # matmul mimicker passes, and returns a vector.
+        matmul_col = umt.matmul(mat, single_vec)
+        assert_array_equal(matmul_col, mm_col_vec.squeeze())
+        # Now with a column array: same as for column vector,
+        # broadcasting sensibly.
+        mm_col_vec = umt.matrix_multiply(mat, col_vec_array)
+        matmul_col_vec = umt.matmul(mat, col_vec_array)
+        assert_array_equal(matmul_col_vec, mm_col_vec)
+        # As above, but for row vector
+        single_vec = np.arange(3)
+        row_vec = single_vec[np.newaxis, :]
+        row_vec_array = np.arange(24).reshape((4, 2, 1, 1, 3)) + 1
+        # row vector @ matrix
+        mm_row_vec = umt.matrix_multiply(row_vec, mat)
+        matmul_row_vec = umt.matmul(row_vec, mat)
+        assert_array_equal(matmul_row_vec, mm_row_vec)
+        # single row vector @ matrix
+        assert_raises(ValueError, umt.matrix_multiply, single_vec, mat)
+        matmul_row = umt.matmul(single_vec, mat)
+        assert_array_equal(matmul_row, mm_row_vec.squeeze())
+        # row vector array @ matrix
+        mm_row_vec = umt.matrix_multiply(row_vec_array, mat)
+        matmul_row_vec = umt.matmul(row_vec_array, mat)
+        assert_array_equal(matmul_row_vec, mm_row_vec)
+        # Now for vector combinations
+        # row vector @ column vector
+        col_vec = row_vec.T
+        col_vec_array = row_vec_array.swapaxes(-2, -1)
+        mm_row_col_vec = umt.matrix_multiply(row_vec, col_vec)
+        matmul_row_col_vec = umt.matmul(row_vec, col_vec)
+        assert_array_equal(matmul_row_col_vec, mm_row_col_vec)
+        # single row vector @ single col vector
+        assert_raises(ValueError, umt.matrix_multiply, single_vec, single_vec)
+        matmul_row_col = umt.matmul(single_vec, single_vec)
+        assert_array_equal(matmul_row_col, mm_row_col_vec.squeeze())
+        # row vector array @ matrix
+        mm_row_col_array = umt.matrix_multiply(row_vec_array, col_vec_array)
+        matmul_row_col_array = umt.matmul(row_vec_array, col_vec_array)
+        assert_array_equal(matmul_row_col_array, mm_row_col_array)
+        # Finally, check that things are *not* squeezed if one gives an
+        # output.
+        out = np.zeros_like(mm_row_col_array)
+        out = umt.matrix_multiply(row_vec_array, col_vec_array, out=out)
+        assert_array_equal(out, mm_row_col_array)
+        out[:] = 0
+        out = umt.matmul(row_vec_array, col_vec_array, out=out)
+        assert_array_equal(out, mm_row_col_array)
+        # And check one cannot put missing dimensions back.
+        out = np.zeros_like(mm_row_col_vec)
+        assert_raises(ValueError, umt.matrix_multiply, single_vec, single_vec,
+                      out)
+        # But fine for matmul, since it is just a broadcast.
+        out = umt.matmul(single_vec, single_vec, out)
+        assert_array_equal(out, mm_row_col_vec.squeeze())
+
     def test_matrix_multiply(self):
         self.compare_matrix_multiply_results(np.long)
         self.compare_matrix_multiply_results(np.double)
diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py
index c15ce83f6..bd7985dfb 100644
--- a/numpy/core/tests/test_umath.py
+++ b/numpy/core/tests/test_umath.py
@@ -1327,21 +1327,18 @@ class TestMinMax(object):
         assert_equal(d.max(), d[0])
         assert_equal(d.min(), d[0])
 
-    def test_reduce_warns(self):
+    def test_reduce_reorder(self):
         # gh 10370, 11029 Some compilers reorder the call to npy_getfloatstatus
         # and put it before the call to an intrisic function that causes
-        # invalid status to be set. Also make sure warnings are emitted
+        # invalid status to be set. Also make sure warnings are not emitted
         for n in (2, 4, 8, 16, 32):
             for dt in (np.float32, np.float16, np.complex64):
-                with suppress_warnings() as sup:
-                    sup.record(RuntimeWarning)
-                    for r in np.diagflat(np.array([np.nan] * n, dtype=dt)):
-                        assert_equal(np.min(r), np.nan)
-                assert_equal(len(sup.log), n)
-
-    def test_minimize_warns(self):
-        # gh 11589
-        assert_warns(RuntimeWarning, np.minimum, np.nan, 1)
+                for r in np.diagflat(np.array([np.nan] * n, dtype=dt)):
+                    assert_equal(np.min(r), np.nan)
+
+    def test_minimize_no_warns(self):
+        a = np.minimum(np.nan, 1)
+        assert_equal(a, np.nan)
 
 
 class TestAbsoluteNegative(object):
diff --git a/numpy/distutils/misc_util.py b/numpy/distutils/misc_util.py
index b30fc27f7..eba0d9ba1 100644
--- a/numpy/distutils/misc_util.py
+++ b/numpy/distutils/misc_util.py
@@ -13,7 +13,6 @@ import multiprocessing
 
 import distutils
 from distutils.errors import DistutilsError
-from distutils.msvccompiler import get_build_architecture
 try:
     from threading import local as tlocal
 except ImportError:
@@ -84,7 +83,9 @@ def get_num_build_jobs():
     Get number of parallel build jobs set by the --parallel command line
     argument of setup.py
     If the command did not receive a setting the environment variable
-    NPY_NUM_BUILD_JOBS checked and if that is unset it returns 1.
+    NPY_NUM_BUILD_JOBS is checked. If that is unset, return the number of
+    processors on the system, with a maximum of 8 (to prevent
+    overloading the system if there a lot of CPUs).
 
     Returns
     -------
@@ -97,6 +98,7 @@ def get_num_build_jobs():
         cpu_count = len(os.sched_getaffinity(0))
     except AttributeError:
         cpu_count = multiprocessing.cpu_count()
+    cpu_count = min(cpu_count, 8)
     envjobs = int(os.environ.get("NPY_NUM_BUILD_JOBS", cpu_count))
     dist = get_distribution()
     # may be None during configuration
@@ -2333,3 +2335,9 @@ def msvc_version(compiler):
         raise ValueError("Compiler instance is not msvc (%s)"\
                          % compiler.compiler_type)
     return compiler._MSVCCompiler__version
+
+def get_build_architecture():
+    # Importing distutils.msvccompiler triggers a warning on non-Windows
+    # systems, so delay the import to here.
+    from distutils.msvccompiler import get_build_architecture
+    return get_build_architecture()
diff --git a/numpy/doc/broadcasting.py b/numpy/doc/broadcasting.py
index 6c3a4bc75..0bdb6ae7d 100644
--- a/numpy/doc/broadcasting.py
+++ b/numpy/doc/broadcasting.py
@@ -3,6 +3,12 @@
 Broadcasting over arrays
 ========================
 
+.. note::
+    See `this article
+    <https://numpy.org/devdocs/user/theory.broadcasting.html>`_
+    for illustrations of broadcasting concepts.
+
+
 The term broadcasting describes how numpy treats arrays with different
 shapes during arithmetic operations. Subject to certain constraints,
 the smaller array is "broadcast" across the larger array so that they
@@ -172,8 +178,5 @@ Here the ``newaxis`` index operator inserts a new axis into ``a``,
 making it a two-dimensional ``4x1`` array.  Combining the ``4x1`` array
 with ``b``, which has shape ``(3,)``, yields a ``4x3`` array.
 
-See `this article <https://scipy.github.io/old-wiki/pages/EricsBroadcastingDoc>`_
-for illustrations of broadcasting concepts.
-
 """
 from __future__ import division, absolute_import, print_function
diff --git a/numpy/f2py/src/test/foomodule.c b/numpy/f2py/src/test/foomodule.c
index d7ecc2519..733fab0be 100644
--- a/numpy/f2py/src/test/foomodule.c
+++ b/numpy/f2py/src/test/foomodule.c
@@ -116,8 +116,6 @@ static PyMethodDef foo_module_methods[] = {
 void initfoo() {
     int i;
     PyObject *m, *d, *s;
-    PyTypeObject *t;
-    PyObject *f;
     import_array();
 
     m = Py_InitModule("foo", foo_module_methods);
diff --git a/numpy/f2py/tests/test_return_real.py b/numpy/f2py/tests/test_return_real.py
index cc3c4d8f2..315cfe49b 100644
--- a/numpy/f2py/tests/test_return_real.py
+++ b/numpy/f2py/tests/test_return_real.py
@@ -1,5 +1,6 @@
 from __future__ import division, absolute_import, print_function
 
+import platform
 import pytest
 
 from numpy import array
@@ -52,6 +53,11 @@ class TestReturnReal(util.F2PyTest):
             pass
 
 
+
+@pytest.mark.skipif(
+    platform.system() == 'Darwin',
+    reason="Prone to error when run with numpy/f2py/tests on mac os, "
+           "but not when run in isolation")
 class TestCReturnReal(TestReturnReal):
     suffix = ".pyf"
     module_name = "c_ext_return_real"
diff --git a/numpy/f2py/tests/test_semicolon_split.py b/numpy/f2py/tests/test_semicolon_split.py
index 5452b5708..bcd18c893 100644
--- a/numpy/f2py/tests/test_semicolon_split.py
+++ b/numpy/f2py/tests/test_semicolon_split.py
@@ -6,6 +6,10 @@ import pytest
 from . import util
 from numpy.testing import assert_equal
 
+@pytest.mark.skipif(
+    platform.system() == 'Darwin',
+    reason="Prone to error when run with numpy/f2py/tests on mac os, "
+           "but not when run in isolation")
 class TestMultiline(util.F2PyTest):
     suffix = ".pyf"
     module_name = "multiline"
@@ -26,13 +30,14 @@ void foo(int* x) {{
 end python module {module}
     """.format(module=module_name)
 
-    @pytest.mark.skipif(platform.system() == 'Darwin',
-                        reason="Prone to error when run with "
-                               "numpy/f2py/tests on mac os, "
-                               "but not when run in isolation")
     def test_multiline(self):
         assert_equal(self.module.foo(), 42)
 
+
+@pytest.mark.skipif(
+    platform.system() == 'Darwin',
+    reason="Prone to error when run with numpy/f2py/tests on mac os, "
+           "but not when run in isolation")
 class TestCallstatement(util.F2PyTest):
     suffix = ".pyf"
     module_name = "callstatement"
diff --git a/numpy/fft/fftpack.py b/numpy/fft/fftpack.py
index e0e96cc79..de675936f 100644
--- a/numpy/fft/fftpack.py
+++ b/numpy/fft/fftpack.py
@@ -35,9 +35,12 @@ from __future__ import division, absolute_import, print_function
 __all__ = ['fft', 'ifft', 'rfft', 'irfft', 'hfft', 'ihfft', 'rfftn',
            'irfftn', 'rfft2', 'irfft2', 'fft2', 'ifft2', 'fftn', 'ifftn']
 
+import functools
+
 from numpy.core import (array, asarray, zeros, swapaxes, shape, conjugate,
                         take, sqrt)
 from numpy.core.multiarray import normalize_axis_index
+from numpy.core import overrides
 from . import fftpack_lite as fftpack
 from .helper import _FFTCache
 
@@ -45,6 +48,10 @@ _fft_cache = _FFTCache(max_size_in_mb=100, max_item_count=32)
 _real_fft_cache = _FFTCache(max_size_in_mb=100, max_item_count=32)
 
 
+array_function_dispatch = functools.partial(
+    overrides.array_function_dispatch, module='numpy.fft')
+
+
 def _raw_fft(a, n=None, axis=-1, init_function=fftpack.cffti,
              work_function=fftpack.cfftf, fft_cache=_fft_cache):
     a = asarray(a)
@@ -101,6 +108,11 @@ def _unitary(norm):
     return norm is not None
 
 
+def _fft_dispatcher(a, n=None, axis=None, norm=None):
+    return (a,)
+
+
+@array_function_dispatch(_fft_dispatcher)
 def fft(a, n=None, axis=-1, norm=None):
     """
     Compute the one-dimensional discrete Fourier Transform.
@@ -197,6 +209,7 @@ def fft(a, n=None, axis=-1, norm=None):
     return output
 
 
+@array_function_dispatch(_fft_dispatcher)
 def ifft(a, n=None, axis=-1, norm=None):
     """
     Compute the one-dimensional inverse discrete Fourier Transform.
@@ -290,6 +303,8 @@ def ifft(a, n=None, axis=-1, norm=None):
     return output * (1 / (sqrt(n) if unitary else n))
 
 
+
+@array_function_dispatch(_fft_dispatcher)
 def rfft(a, n=None, axis=-1, norm=None):
     """
     Compute the one-dimensional discrete Fourier Transform for real input.
@@ -379,6 +394,7 @@ def rfft(a, n=None, axis=-1, norm=None):
     return output
 
 
+@array_function_dispatch(_fft_dispatcher)
 def irfft(a, n=None, axis=-1, norm=None):
     """
     Compute the inverse of the n-point DFT for real input.
@@ -469,6 +485,7 @@ def irfft(a, n=None, axis=-1, norm=None):
     return output * (1 / (sqrt(n) if unitary else n))
 
 
+@array_function_dispatch(_fft_dispatcher)
 def hfft(a, n=None, axis=-1, norm=None):
     """
     Compute the FFT of a signal that has Hermitian symmetry, i.e., a real
@@ -551,6 +568,7 @@ def hfft(a, n=None, axis=-1, norm=None):
     return irfft(conjugate(a), n, axis) * (sqrt(n) if unitary else n)
 
 
+@array_function_dispatch(_fft_dispatcher)
 def ihfft(a, n=None, axis=-1, norm=None):
     """
     Compute the inverse FFT of a signal that has Hermitian symmetry.
@@ -641,6 +659,11 @@ def _raw_fftnd(a, s=None, axes=None, function=fft, norm=None):
     return a
 
 
+def _fftn_dispatcher(a, s=None, axes=None, norm=None):
+    return (a,)
+
+
+@array_function_dispatch(_fftn_dispatcher)
 def fftn(a, s=None, axes=None, norm=None):
     """
     Compute the N-dimensional discrete Fourier Transform.
@@ -738,6 +761,7 @@ def fftn(a, s=None, axes=None, norm=None):
     return _raw_fftnd(a, s, axes, fft, norm)
 
 
+@array_function_dispatch(_fftn_dispatcher)
 def ifftn(a, s=None, axes=None, norm=None):
     """
     Compute the N-dimensional inverse discrete Fourier Transform.
@@ -835,6 +859,7 @@ def ifftn(a, s=None, axes=None, norm=None):
     return _raw_fftnd(a, s, axes, ifft, norm)
 
 
+@array_function_dispatch(_fftn_dispatcher)
 def fft2(a, s=None, axes=(-2, -1), norm=None):
     """
     Compute the 2-dimensional discrete Fourier Transform
@@ -925,6 +950,7 @@ def fft2(a, s=None, axes=(-2, -1), norm=None):
     return _raw_fftnd(a, s, axes, fft, norm)
 
 
+@array_function_dispatch(_fftn_dispatcher)
 def ifft2(a, s=None, axes=(-2, -1), norm=None):
     """
     Compute the 2-dimensional inverse discrete Fourier Transform.
@@ -1012,6 +1038,7 @@ def ifft2(a, s=None, axes=(-2, -1), norm=None):
     return _raw_fftnd(a, s, axes, ifft, norm)
 
 
+@array_function_dispatch(_fftn_dispatcher)
 def rfftn(a, s=None, axes=None, norm=None):
     """
     Compute the N-dimensional discrete Fourier Transform for real input.
@@ -1104,6 +1131,7 @@ def rfftn(a, s=None, axes=None, norm=None):
     return a
 
 
+@array_function_dispatch(_fftn_dispatcher)
 def rfft2(a, s=None, axes=(-2, -1), norm=None):
     """
     Compute the 2-dimensional FFT of a real array.
@@ -1141,6 +1169,7 @@ def rfft2(a, s=None, axes=(-2, -1), norm=None):
     return rfftn(a, s, axes, norm)
 
 
+@array_function_dispatch(_fftn_dispatcher)
 def irfftn(a, s=None, axes=None, norm=None):
     """
     Compute the inverse of the N-dimensional FFT of real input.
@@ -1235,6 +1264,7 @@ def irfftn(a, s=None, axes=None, norm=None):
     return a
 
 
+@array_function_dispatch(_fftn_dispatcher)
 def irfft2(a, s=None, axes=(-2, -1), norm=None):
     """
     Compute the 2-dimensional inverse FFT of a real array.
diff --git a/numpy/fft/helper.py b/numpy/fft/helper.py
index 729121f31..e65883651 100644
--- a/numpy/fft/helper.py
+++ b/numpy/fft/helper.py
@@ -11,6 +11,7 @@ except ImportError:
     import dummy_threading as threading
 from numpy.compat import integer_types
 from numpy.core import integer, empty, arange, asarray, roll
+from numpy.core.overrides import array_function_dispatch
 
 # Created by Pearu Peterson, September 2002
 
@@ -19,6 +20,11 @@ __all__ = ['fftshift', 'ifftshift', 'fftfreq', 'rfftfreq']
 integer_types = integer_types + (integer,)
 
 
+def _fftshift_dispatcher(x, axes=None):
+    return (x,)
+
+
+@array_function_dispatch(_fftshift_dispatcher, module='numpy.fft')
 def fftshift(x, axes=None):
     """
     Shift the zero-frequency component to the center of the spectrum.
@@ -75,6 +81,7 @@ def fftshift(x, axes=None):
     return roll(x, shift, axes)
 
 
+@array_function_dispatch(_fftshift_dispatcher, module='numpy.fft')
 def ifftshift(x, axes=None):
     """
     The inverse of `fftshift`. Although identical for even-length `x`, the
diff --git a/numpy/lib/arraypad.py b/numpy/lib/arraypad.py
index e9ca9de4d..d27a3918f 100644
--- a/numpy/lib/arraypad.py
+++ b/numpy/lib/arraypad.py
@@ -6,6 +6,7 @@ of an n-dimensional array.
 from __future__ import division, absolute_import, print_function
 
 import numpy as np
+from numpy.core.overrides import array_function_dispatch
 
 
 __all__ = ['pad']
@@ -990,6 +991,11 @@ def _validate_lengths(narray, number_elements):
 # Public functions
 
 
+def _pad_dispatcher(array, pad_width, mode, **kwargs):
+    return (array,)
+
+
+@array_function_dispatch(_pad_dispatcher, module='numpy')
 def pad(array, pad_width, mode, **kwargs):
     """
     Pads an array.
diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index 62e9b6d50..850e20123 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -27,7 +27,14 @@ To do: Optionally return indices analogously to unique for all functions.
 """
 from __future__ import division, absolute_import, print_function
 
+import functools
+
 import numpy as np
+from numpy.core import overrides
+
+
+array_function_dispatch = functools.partial(
+    overrides.array_function_dispatch, module='numpy')
 
 
 __all__ = [
@@ -36,6 +43,11 @@ __all__ = [
     ]
 
 
+def _ediff1d_dispatcher(ary, to_end=None, to_begin=None):
+    return (ary, to_end, to_begin)
+
+
+@array_function_dispatch(_ediff1d_dispatcher)
 def ediff1d(ary, to_end=None, to_begin=None):
     """
     The differences between consecutive elements of an array.
@@ -133,6 +145,12 @@ def _unpack_tuple(x):
         return x
 
 
+def _unique_dispatcher(ar, return_index=None, return_inverse=None,
+                       return_counts=None, axis=None):
+    return (ar,)
+
+
+@array_function_dispatch(_unique_dispatcher)
 def unique(ar, return_index=False, return_inverse=False,
            return_counts=False, axis=None):
     """
@@ -313,6 +331,12 @@ def _unique1d(ar, return_index=False, return_inverse=False,
     return ret
 
 
+def _intersect1d_dispatcher(
+        ar1, ar2, assume_unique=None, return_indices=None):
+    return (ar1, ar2)
+
+
+@array_function_dispatch(_intersect1d_dispatcher)
 def intersect1d(ar1, ar2, assume_unique=False, return_indices=False):
     """
     Find the intersection of two arrays.
@@ -408,6 +432,11 @@ def intersect1d(ar1, ar2, assume_unique=False, return_indices=False):
         return int1d
 
 
+def _setxor1d_dispatcher(ar1, ar2, assume_unique=None):
+    return (ar1, ar2)
+
+
+@array_function_dispatch(_setxor1d_dispatcher)
 def setxor1d(ar1, ar2, assume_unique=False):
     """
     Find the set exclusive-or of two arrays.
@@ -562,6 +591,11 @@ def in1d(ar1, ar2, assume_unique=False, invert=False):
         return ret[rev_idx]
 
 
+def _isin_dispatcher(element, test_elements, assume_unique=None, invert=None):
+    return (element, test_elements)
+
+
+@array_function_dispatch(_isin_dispatcher)
 def isin(element, test_elements, assume_unique=False, invert=False):
     """
     Calculates `element in test_elements`, broadcasting over `element` only.
@@ -660,6 +694,11 @@ def isin(element, test_elements, assume_unique=False, invert=False):
                 invert=invert).reshape(element.shape)
 
 
+def _union1d_dispatcher(ar1, ar2):
+    return (ar1, ar2)
+
+
+@array_function_dispatch(_union1d_dispatcher)
 def union1d(ar1, ar2):
     """
     Find the union of two arrays.
@@ -695,11 +734,17 @@ def union1d(ar1, ar2):
     """
     return unique(np.concatenate((ar1, ar2), axis=None))
 
+
+def _setdiff1d_dispatcher(ar1, ar2, assume_unique=None):
+    return (ar1, ar2)
+
+
+@array_function_dispatch(_setdiff1d_dispatcher)
 def setdiff1d(ar1, ar2, assume_unique=False):
     """
     Find the set difference of two arrays.
 
-    Return the sorted, unique values in `ar1` that are not in `ar2`.
+    Return the unique values in `ar1` that are not in `ar2`.
 
     Parameters
     ----------
@@ -714,7 +759,9 @@ def setdiff1d(ar1, ar2, assume_unique=False):
     Returns
     -------
     setdiff1d : ndarray
-        Sorted 1D array of values in `ar1` that are not in `ar2`.
+        1D array of values in `ar1` that are not in `ar2`. The result
+        is sorted when `assume_unique=False`, but otherwise only sorted
+        if the input is sorted.
 
     See Also
     --------
diff --git a/numpy/lib/financial.py b/numpy/lib/financial.py
index 06fa1bd92..e1e297492 100644
--- a/numpy/lib/financial.py
+++ b/numpy/lib/financial.py
@@ -13,8 +13,15 @@ otherwise stated.
 from __future__ import division, absolute_import, print_function
 
 from decimal import Decimal
+import functools
 
 import numpy as np
+from numpy.core import overrides
+
+
+array_function_dispatch = functools.partial(
+    overrides.array_function_dispatch, module='numpy')
+
 
 __all__ = ['fv', 'pmt', 'nper', 'ipmt', 'ppmt', 'pv', 'rate',
            'irr', 'npv', 'mirr']
@@ -36,6 +43,12 @@ def _convert_when(when):
     except (KeyError, TypeError):
         return [_when_to_num[x] for x in when]
 
+
+def _fv_dispatcher(rate, nper, pmt, pv, when=None):
+    return (rate, nper, pmt, pv)
+
+
+@array_function_dispatch(_fv_dispatcher)
 def fv(rate, nper, pmt, pv, when='end'):
     """
     Compute the future value.
@@ -124,6 +137,12 @@ def fv(rate, nper, pmt, pv, when='end'):
                     (1 + rate*when)*(temp - 1)/rate)
     return -(pv*temp + pmt*fact)
 
+
+def _pmt_dispatcher(rate, nper, pv, fv=None, when=None):
+    return (rate, nper, pv, fv)
+
+
+@array_function_dispatch(_pmt_dispatcher)
 def pmt(rate, nper, pv, fv=0, when='end'):
     """
     Compute the payment against loan principal plus interest.
@@ -216,6 +235,12 @@ def pmt(rate, nper, pv, fv=0, when='end'):
                     (1 + masked_rate*when)*(temp - 1)/masked_rate)
     return -(fv + pv*temp) / fact
 
+
+def _nper_dispatcher(rate, pmt, pv, fv=None, when=None):
+    return (rate, pmt, pv, fv)
+
+
+@array_function_dispatch(_nper_dispatcher)
 def nper(rate, pmt, pv, fv=0, when='end'):
     """
     Compute the number of periodic payments.
@@ -284,6 +309,12 @@ def nper(rate, pmt, pv, fv=0, when='end'):
         B = np.log((-fv+z) / (pv+z))/np.log(1+rate)
         return np.where(rate == 0, A, B)
 
+
+def _ipmt_dispatcher(rate, per, nper, pv, fv=None, when=None):
+    return (rate, per, nper, pv, fv)
+
+
+@array_function_dispatch(_ipmt_dispatcher)
 def ipmt(rate, per, nper, pv, fv=0, when='end'):
     """
     Compute the interest portion of a payment.
@@ -379,6 +410,7 @@ def ipmt(rate, per, nper, pv, fv=0, when='end'):
         pass
     return ipmt
 
+
 def _rbl(rate, per, pmt, pv, when):
     """
     This function is here to simply have a different name for the 'fv'
@@ -388,6 +420,12 @@ def _rbl(rate, per, pmt, pv, when):
     """
     return fv(rate, (per - 1), pmt, pv, when)
 
+
+def _ppmt_dispatcher(rate, per, nper, pv, fv=None, when=None):
+    return (rate, per, nper, pv, fv)
+
+
+@array_function_dispatch(_ppmt_dispatcher)
 def ppmt(rate, per, nper, pv, fv=0, when='end'):
     """
     Compute the payment against loan principal.
@@ -416,6 +454,12 @@ def ppmt(rate, per, nper, pv, fv=0, when='end'):
     total = pmt(rate, nper, pv, fv, when)
     return total - ipmt(rate, per, nper, pv, fv, when)
 
+
+def _pv_dispatcher(rate, nper, pmt, fv=None, when=None):
+    return (rate, nper, nper, pv, fv)
+
+
+@array_function_dispatch(_pv_dispatcher)
 def pv(rate, nper, pmt, fv=0, when='end'):
     """
     Compute the present value.
@@ -520,6 +564,12 @@ def _g_div_gp(r, n, p, x, y, w):
                 (n*t2*x - p*(t1 - 1)*(r*w + 1)/(r**2) + n*p*t2*(r*w + 1)/r +
                  p*(t1 - 1)*w/r))
 
+
+def _rate_dispatcher(nper, pmt, pv, fv, when=None, guess=None, tol=None,
+                     maxiter=None):
+    return (nper, pmt, pv, fv)
+
+
 # Use Newton's iteration until the change is less than 1e-6
 #  for all values or a maximum of 100 iterations is reached.
 #  Newton's rule is
@@ -527,6 +577,7 @@ def _g_div_gp(r, n, p, x, y, w):
 #     where
 #  g(r) is the formula
 #  g'(r) is the derivative with respect to r.
+@array_function_dispatch(_rate_dispatcher)
 def rate(nper, pmt, pv, fv, when='end', guess=None, tol=None, maxiter=100):
     """
     Compute the rate of interest per period.
@@ -598,6 +649,12 @@ def rate(nper, pmt, pv, fv, when='end', guess=None, tol=None, maxiter=100):
     else:
         return rn
 
+
+def _irr_dispatcher(values):
+    return (values,)
+
+
+@array_function_dispatch(_irr_dispatcher)
 def irr(values):
     """
     Return the Internal Rate of Return (IRR).
@@ -677,6 +734,12 @@ def irr(values):
     rate = rate.item(np.argmin(np.abs(rate)))
     return rate
 
+
+def _npv_dispatcher(rate, values):
+    return (values,)
+
+
+@array_function_dispatch(_npv_dispatcher)
 def npv(rate, values):
     """
     Returns the NPV (Net Present Value) of a cash flow series.
@@ -722,6 +785,12 @@ def npv(rate, values):
     values = np.asarray(values)
     return (values / (1+rate)**np.arange(0, len(values))).sum(axis=0)
 
+
+def _mirr_dispatcher(values, finance_rate, reinvest_rate):
+    return (values,)
+
+
+@array_function_dispatch(_mirr_dispatcher)
 def mirr(values, finance_rate, reinvest_rate):
     """
     Modified internal rate of return.
diff --git a/numpy/lib/format.py b/numpy/lib/format.py
index ef5ec57e3..1ef3dca47 100644
--- a/numpy/lib/format.py
+++ b/numpy/lib/format.py
@@ -161,12 +161,11 @@ import sys
 import io
 import warnings
 from numpy.lib.utils import safe_eval
-from numpy.compat import asbytes, asstr, isfileobj, long, basestring
+from numpy.compat import (
+    asbytes, asstr, isfileobj, long, os_fspath
+    )
+from numpy.core.numeric import pickle
 
-if sys.version_info[0] >= 3:
-    import pickle
-else:
-    import cPickle as pickle
 
 MAGIC_PREFIX = b'\x93NUMPY'
 MAGIC_LEN = len(MAGIC_PREFIX) + 2
@@ -709,7 +708,7 @@ def open_memmap(filename, mode='r+', dtype=None, shape=None,
 
     Parameters
     ----------
-    filename : str
+    filename : str or path-like
         The name of the file on disk.  This may *not* be a file-like
         object.
     mode : str, optional
@@ -750,9 +749,9 @@ def open_memmap(filename, mode='r+', dtype=None, shape=None,
     memmap
 
     """
-    if not isinstance(filename, basestring):
-        raise ValueError("Filename must be a string.  Memmap cannot use"
-                         " existing file handles.")
+    if isfileobj(filename):
+        raise ValueError("Filename must be a string or a path-like object."
+                         "  Memmap cannot use existing file handles.")
 
     if 'w' in mode:
         # We are creating the file, not reading it.
@@ -770,7 +769,7 @@ def open_memmap(filename, mode='r+', dtype=None, shape=None,
             shape=shape,
         )
         # If we got here, then it should be safe to create the file.
-        fp = open(filename, mode+'b')
+        fp = open(os_fspath(filename), mode+'b')
         try:
             used_ver = _write_array_header(fp, d, version)
             # this warning can be removed when 1.9 has aged enough
@@ -782,7 +781,7 @@ def open_memmap(filename, mode='r+', dtype=None, shape=None,
             fp.close()
     else:
         # Read the header of the file first.
-        fp = open(filename, 'rb')
+        fp = open(os_fspath(filename), 'rb')
         try:
             version = read_magic(fp)
             _check_version(version)
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index e2a8f4bc2..fae6541bc 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -6,6 +6,7 @@ try:
     import collections.abc as collections_abc
 except ImportError:
     import collections as collections_abc
+import functools
 import re
 import sys
 import warnings
@@ -26,6 +27,7 @@ from numpy.core.fromnumeric import (
     ravel, nonzero, partition, mean, any, sum
     )
 from numpy.core.numerictypes import typecodes
+from numpy.core import overrides
 from numpy.core.function_base import add_newdoc
 from numpy.lib.twodim_base import diag
 from .utils import deprecate
@@ -43,6 +45,11 @@ if sys.version_info[0] < 3:
 else:
     import builtins
 
+
+array_function_dispatch = functools.partial(
+    overrides.array_function_dispatch, module='numpy')
+
+
 # needed in this module for compatibility
 from numpy.lib.histograms import histogram, histogramdd
 
@@ -58,6 +65,11 @@ __all__ = [
     ]
 
 
+def _rot90_dispatcher(m, k=None, axes=None):
+    return (m,)
+
+
+@array_function_dispatch(_rot90_dispatcher)
 def rot90(m, k=1, axes=(0,1)):
     """
     Rotate an array by 90 degrees in the plane specified by axes.
@@ -144,6 +156,11 @@ def rot90(m, k=1, axes=(0,1)):
         return flip(transpose(m, axes_list), axes[1])
 
 
+def _flip_dispatcher(m, axis=None):
+    return (m,)
+
+
+@array_function_dispatch(_flip_dispatcher)
 def flip(m, axis=None):
     """
     Reverse the order of elements in an array along the given axis.
@@ -268,6 +285,11 @@ def iterable(y):
     return True
 
 
+def _average_dispatcher(a, axis=None, weights=None, returned=None):
+    return (a, weights)
+
+
+@array_function_dispatch(_average_dispatcher)
 def average(a, axis=None, weights=None, returned=False):
     """
     Compute the weighted average along the specified axis.
@@ -474,6 +496,15 @@ def asarray_chkfinite(a, dtype=None, order=None):
     return a
 
 
+def _piecewise_dispatcher(x, condlist, funclist, *args, **kw):
+    yield x
+    # support the undocumented behavior of allowing scalars
+    if np.iterable(condlist):
+        for c in condlist:
+            yield c
+
+
+@array_function_dispatch(_piecewise_dispatcher)
 def piecewise(x, condlist, funclist, *args, **kw):
     """
     Evaluate a piecewise-defined function.
@@ -595,6 +626,14 @@ def piecewise(x, condlist, funclist, *args, **kw):
     return y
 
 
+def _select_dispatcher(condlist, choicelist, default=None):
+    for c in condlist:
+        yield c
+    for c in choicelist:
+        yield c
+
+
+@array_function_dispatch(_select_dispatcher)
 def select(condlist, choicelist, default=0):
     """
     Return an array drawn from elements in choicelist, depending on conditions.
@@ -698,6 +737,11 @@ def select(condlist, choicelist, default=0):
     return result
 
 
+def _copy_dispatcher(a, order=None):
+    return (a,)
+
+
+@array_function_dispatch(_copy_dispatcher)
 def copy(a, order='K'):
     """
     Return an array copy of the given object.
@@ -747,6 +791,13 @@ def copy(a, order='K'):
 # Basic operations
 
 
+def _gradient_dispatcher(f, *varargs, **kwargs):
+    yield f
+    for v in varargs:
+        yield v
+
+
+@array_function_dispatch(_gradient_dispatcher)
 def gradient(f, *varargs, **kwargs):
     """
     Return the gradient of an N-dimensional array.
@@ -1088,6 +1139,11 @@ def gradient(f, *varargs, **kwargs):
         return outvals
 
 
+def _diff_dispatcher(a, n=None, axis=None, prepend=None, append=None):
+    return (a, prepend, append)
+
+
+@array_function_dispatch(_diff_dispatcher)
 def diff(a, n=1, axis=-1, prepend=np._NoValue, append=np._NoValue):
     """
     Calculate the n-th discrete difference along the given axis.
@@ -1216,6 +1272,11 @@ def diff(a, n=1, axis=-1, prepend=np._NoValue, append=np._NoValue):
     return a
 
 
+def _interp_dispatcher(x, xp, fp, left=None, right=None, period=None):
+    return (x, xp, fp)
+
+
+@array_function_dispatch(_interp_dispatcher)
 def interp(x, xp, fp, left=None, right=None, period=None):
     """
     One-dimensional linear interpolation.
@@ -1348,6 +1409,11 @@ def interp(x, xp, fp, left=None, right=None, period=None):
     return interp_func(x, xp, fp, left, right)
 
 
+def _angle_dispatcher(z, deg=None):
+    return (z,)
+
+
+@array_function_dispatch(_angle_dispatcher)
 def angle(z, deg=False):
     """
     Return the angle of the complex argument.
@@ -1395,6 +1461,11 @@ def angle(z, deg=False):
     return a
 
 
+def _unwrap_dispatcher(p, discont=None, axis=None):
+    return (p,)
+
+
+@array_function_dispatch(_unwrap_dispatcher)
 def unwrap(p, discont=pi, axis=-1):
     """
     Unwrap by changing deltas between values to 2*pi complement.
@@ -1451,6 +1522,11 @@ def unwrap(p, discont=pi, axis=-1):
     return up
 
 
+def _sort_complex(a):
+    return (a,)
+
+
+@array_function_dispatch(_sort_complex)
 def sort_complex(a):
     """
     Sort a complex array using the real part first, then the imaginary part.
@@ -1487,6 +1563,11 @@ def sort_complex(a):
         return b
 
 
+def _trim_zeros(filt, trim=None):
+    return (filt,)
+
+
+@array_function_dispatch(_trim_zeros)
 def trim_zeros(filt, trim='fb'):
     """
     Trim the leading and/or trailing zeros from a 1-D array or sequence.
@@ -1556,6 +1637,11 @@ def unique(x):
         return asarray(items)
 
 
+def _extract_dispatcher(condition, arr):
+    return (condition, arr)
+
+
+@array_function_dispatch(_extract_dispatcher)
 def extract(condition, arr):
     """
     Return the elements of an array that satisfy some condition.
@@ -1607,6 +1693,11 @@ def extract(condition, arr):
     return _nx.take(ravel(arr), nonzero(ravel(condition))[0])
 
 
+def _place_dispatcher(arr, mask, vals):
+    return (arr, mask, vals)
+
+
+@array_function_dispatch(_place_dispatcher)
 def place(arr, mask, vals):
     """
     Change elements of an array based on conditional and input values.
@@ -2161,6 +2252,12 @@ class vectorize(object):
         return outputs[0] if nout == 1 else outputs
 
 
+def _cov_dispatcher(m, y=None, rowvar=None, bias=None, ddof=None,
+                    fweights=None, aweights=None):
+    return (m, y, fweights, aweights)
+
+
+@array_function_dispatch(_cov_dispatcher)
 def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None,
         aweights=None):
     """
@@ -2370,6 +2467,11 @@ def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None,
     return c.squeeze()
 
 
+def _corrcoef_dispatcher(x, y=None, rowvar=None, bias=None, ddof=None):
+    return (x, y)
+
+
+@array_function_dispatch(_corrcoef_dispatcher)
 def corrcoef(x, y=None, rowvar=True, bias=np._NoValue, ddof=np._NoValue):
     """
     Return Pearson product-moment correlation coefficients.
@@ -2938,6 +3040,11 @@ def _i0_2(x):
     return exp(x) * _chbevl(32.0/x - 2.0, _i0B) / sqrt(x)
 
 
+def _i0_dispatcher(x):
+    return (x,)
+
+
+@array_function_dispatch(_i0_dispatcher)
 def i0(x):
     """
     Modified Bessel function of the first kind, order 0.
@@ -3132,6 +3239,11 @@ def kaiser(M, beta):
     return i0(beta * sqrt(1-((n-alpha)/alpha)**2.0))/i0(float(beta))
 
 
+def _sinc_dispatcher(x):
+    return (x,)
+
+
+@array_function_dispatch(_sinc_dispatcher)
 def sinc(x):
     """
     Return the sinc function.
@@ -3211,6 +3323,11 @@ def sinc(x):
     return sin(y)/y
 
 
+def _msort_dispatcher(a):
+    return (a,)
+
+
+@array_function_dispatch(_msort_dispatcher)
 def msort(a):
     """
     Return a copy of an array sorted along the first axis.
@@ -3294,6 +3411,12 @@ def _ureduce(a, func, **kwargs):
     return r, keepdim
 
 
+def _median_dispatcher(
+        a, axis=None, out=None, overwrite_input=None, keepdims=None):
+    return (a, out)
+
+
+@array_function_dispatch(_median_dispatcher)
 def median(a, axis=None, out=None, overwrite_input=False, keepdims=False):
     """
     Compute the median along the specified axis.
@@ -3438,6 +3561,12 @@ def _median(a, axis=None, out=None, overwrite_input=False):
         return mean(part[indexer], axis=axis, out=out)
 
 
+def _percentile_dispatcher(a, q, axis=None, out=None, overwrite_input=None,
+                           interpolation=None, keepdims=None):
+    return (a, q, out)
+
+
+@array_function_dispatch(_percentile_dispatcher)
 def percentile(a, q, axis=None, out=None,
                overwrite_input=False, interpolation='linear', keepdims=False):
     """
@@ -3583,6 +3712,12 @@ def percentile(a, q, axis=None, out=None,
         a, q, axis, out, overwrite_input, interpolation, keepdims)
 
 
+def _quantile_dispatcher(a, q, axis=None, out=None, overwrite_input=None,
+                         interpolation=None, keepdims=None):
+    return (a, q, out)
+
+
+@array_function_dispatch(_quantile_dispatcher)
 def quantile(a, q, axis=None, out=None,
              overwrite_input=False, interpolation='linear', keepdims=False):
     """
@@ -3845,6 +3980,11 @@ def _quantile_ureduce_func(a, q, axis=None, out=None, overwrite_input=False,
     return r
 
 
+def _trapz_dispatcher(y, x=None, dx=None, axis=None):
+    return (y, x)
+
+
+@array_function_dispatch(_trapz_dispatcher)
 def trapz(y, x=None, dx=1.0, axis=-1):
     """
     Integrate along the given axis using the composite trapezoidal rule.
@@ -3935,7 +4075,12 @@ def trapz(y, x=None, dx=1.0, axis=-1):
     return ret
 
 
+def _meshgrid_dispatcher(*xi, **kwargs):
+    return xi
+
+
 # Based on scitools meshgrid
+@array_function_dispatch(_meshgrid_dispatcher)
 def meshgrid(*xi, **kwargs):
     """
     Return coordinate matrices from coordinate vectors.
@@ -4073,6 +4218,11 @@ def meshgrid(*xi, **kwargs):
     return output
 
 
+def _delete_dispatcher(arr, obj, axis=None):
+    return (arr, obj)
+
+
+@array_function_dispatch(_delete_dispatcher)
 def delete(arr, obj, axis=None):
     """
     Return a new array with sub-arrays along an axis deleted. For a one
@@ -4278,6 +4428,11 @@ def delete(arr, obj, axis=None):
         return new
 
 
+def _insert_dispatcher(arr, obj, values, axis=None):
+    return (arr, obj, values)
+
+
+@array_function_dispatch(_insert_dispatcher)
 def insert(arr, obj, values, axis=None):
     """
     Insert values along the given axis before the given indices.
@@ -4484,6 +4639,11 @@ def insert(arr, obj, values, axis=None):
     return new
 
 
+def _append_dispatcher(arr, values, axis=None):
+    return (arr, values)
+
+
+@array_function_dispatch(_append_dispatcher)
 def append(arr, values, axis=None):
     """
     Append values to the end of an array.
@@ -4539,6 +4699,11 @@ def append(arr, values, axis=None):
     return concatenate((arr, values), axis=axis)
 
 
+def _digitize_dispatcher(x, bins, right=None):
+    return (x, bins)
+
+
+@array_function_dispatch(_digitize_dispatcher)
 def digitize(x, bins, right=False):
     """
     Return the indices of the bins to which each value in input array belongs.
diff --git a/numpy/lib/histograms.py b/numpy/lib/histograms.py
index f03f30fb0..1ff25b81f 100644
--- a/numpy/lib/histograms.py
+++ b/numpy/lib/histograms.py
@@ -8,6 +8,7 @@ import warnings
 
 import numpy as np
 from numpy.compat.py3k import basestring
+from numpy.core.overrides import array_function_dispatch
 
 __all__ = ['histogram', 'histogramdd', 'histogram_bin_edges']
 
@@ -220,6 +221,14 @@ _hist_bin_selectors = {'auto': _hist_bin_auto,
 def _ravel_and_check_weights(a, weights):
     """ Check a and weights have matching shapes, and ravel both """
     a = np.asarray(a)
+
+    # Ensure that the array is a "subtractable" dtype
+    if a.dtype == np.bool_:
+        warnings.warn("Converting input from {} to {} for compatibility."
+                      .format(a.dtype, np.uint8),
+                      RuntimeWarning, stacklevel=2)
+        a = a.astype(np.uint8)
+
     if weights is not None:
         weights = np.asarray(weights)
         if weights.shape != a.shape:
@@ -392,6 +401,11 @@ def _search_sorted_inclusive(a, v):
     ))
 
 
+def _histogram_bin_edges_dispatcher(a, bins=None, range=None, weights=None):
+    return (a, bins, weights)
+
+
+@array_function_dispatch(_histogram_bin_edges_dispatcher)
 def histogram_bin_edges(a, bins=10, range=None, weights=None):
     r"""
     Function to calculate only the edges of the bins used by the `histogram` function.
@@ -586,6 +600,12 @@ def histogram_bin_edges(a, bins=10, range=None, weights=None):
     return bin_edges
 
 
+def _histogram_dispatcher(
+        a, bins=None, range=None, normed=None, weights=None, density=None):
+    return (a, bins, weights)
+
+
+@array_function_dispatch(_histogram_dispatcher)
 def histogram(a, bins=10, range=None, normed=None, weights=None,
               density=None):
     r"""
@@ -838,6 +858,12 @@ def histogram(a, bins=10, range=None, normed=None, weights=None,
         return n, bin_edges
 
 
+def _histogramdd_dispatcher(sample, bins=None, range=None, normed=None,
+                            weights=None, density=None):
+    return (sample, bins, weights)
+
+
+@array_function_dispatch(_histogramdd_dispatcher)
 def histogramdd(sample, bins=10, range=None, normed=None, weights=None,
                 density=None):
     """
diff --git a/numpy/lib/index_tricks.py b/numpy/lib/index_tricks.py
index 009e6d229..ff2e00d3e 100644
--- a/numpy/lib/index_tricks.py
+++ b/numpy/lib/index_tricks.py
@@ -1,5 +1,6 @@
 from __future__ import division, absolute_import, print_function
 
+import functools
 import sys
 import math
 
@@ -9,13 +10,17 @@ from numpy.core.numeric import (
     )
 from numpy.core.numerictypes import find_common_type, issubdtype
 
-from . import function_base
 import numpy.matrixlib as matrixlib
 from .function_base import diff
 from numpy.core.multiarray import ravel_multi_index, unravel_index
+from numpy.core import overrides, linspace
 from numpy.lib.stride_tricks import as_strided
 
 
+array_function_dispatch = functools.partial(
+    overrides.array_function_dispatch, module='numpy')
+
+
 __all__ = [
     'ravel_multi_index', 'unravel_index', 'mgrid', 'ogrid', 'r_', 'c_',
     's_', 'index_exp', 'ix_', 'ndenumerate', 'ndindex', 'fill_diagonal',
@@ -23,6 +28,11 @@ __all__ = [
     ]
 
 
+def _ix__dispatcher(*args):
+    return args
+
+
+@array_function_dispatch(_ix__dispatcher)
 def ix_(*args):
     """
     Construct an open mesh from multiple sequences.
@@ -194,9 +204,6 @@ class nd_grid(object):
             else:
                 return _nx.arange(start, stop, step)
 
-    def __len__(self):
-        return 0
-
 
 class MGridClass(nd_grid):
     """
@@ -338,7 +345,7 @@ class AxisConcatenator(object):
                     step = 1
                 if isinstance(step, complex):
                     size = int(abs(step))
-                    newobj = function_base.linspace(start, stop, num=size)
+                    newobj = linspace(start, stop, num=size)
                 else:
                     newobj = _nx.arange(start, stop, step)
                 if ndmin > 1:
@@ -729,6 +736,12 @@ s_ = IndexExpression(maketuple=False)
 # The following functions complement those in twodim_base, but are
 # applicable to N-dimensions.
 
+
+def _fill_diagonal_dispatcher(a, val, wrap=None):
+    return (a,)
+
+
+@array_function_dispatch(_fill_diagonal_dispatcher)
 def fill_diagonal(a, val, wrap=False):
     """Fill the main diagonal of the given array of any dimensionality.
 
@@ -911,6 +924,11 @@ def diag_indices(n, ndim=2):
     return (idx,) * ndim
 
 
+def _diag_indices_from(arr):
+    return (arr,)
+
+
+@array_function_dispatch(_diag_indices_from)
 def diag_indices_from(arr):
     """
     Return the indices to access the main diagonal of an n-dimensional array.
diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py
index 8d6b0f139..d73d84467 100644
--- a/numpy/lib/nanfunctions.py
+++ b/numpy/lib/nanfunctions.py
@@ -22,9 +22,15 @@ Functions
 """
 from __future__ import division, absolute_import, print_function
 
+import functools
 import warnings
 import numpy as np
 from numpy.lib import function_base
+from numpy.core import overrides
+
+
+array_function_dispatch = functools.partial(
+    overrides.array_function_dispatch, module='numpy')
 
 
 __all__ = [
@@ -188,6 +194,11 @@ def _divide_by_count(a, b, out=None):
                 return np.divide(a, b, out=out, casting='unsafe')
 
 
+def _nanmin_dispatcher(a, axis=None, out=None, keepdims=None):
+    return (a, out)
+
+
+@array_function_dispatch(_nanmin_dispatcher)
 def nanmin(a, axis=None, out=None, keepdims=np._NoValue):
     """
     Return minimum of an array or minimum along an axis, ignoring any NaNs.
@@ -296,6 +307,11 @@ def nanmin(a, axis=None, out=None, keepdims=np._NoValue):
     return res
 
 
+def _nanmax_dispatcher(a, axis=None, out=None, keepdims=None):
+    return (a, out)
+
+
+@array_function_dispatch(_nanmax_dispatcher)
 def nanmax(a, axis=None, out=None, keepdims=np._NoValue):
     """
     Return the maximum of an array or maximum along an axis, ignoring any
@@ -404,6 +420,11 @@ def nanmax(a, axis=None, out=None, keepdims=np._NoValue):
     return res
 
 
+def _nanargmin_dispatcher(a, axis=None):
+    return (a,)
+
+
+@array_function_dispatch(_nanargmin_dispatcher)
 def nanargmin(a, axis=None):
     """
     Return the indices of the minimum values in the specified axis ignoring
@@ -448,6 +469,11 @@ def nanargmin(a, axis=None):
     return res
 
 
+def _nanargmax_dispatcher(a, axis=None):
+    return (a,)
+
+
+@array_function_dispatch(_nanargmax_dispatcher)
 def nanargmax(a, axis=None):
     """
     Return the indices of the maximum values in the specified axis ignoring
@@ -493,6 +519,11 @@ def nanargmax(a, axis=None):
     return res
 
 
+def _nansum_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None):
+    return (a, out)
+
+
+@array_function_dispatch(_nansum_dispatcher)
 def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
     """
     Return the sum of array elements over a given axis treating Not a
@@ -583,6 +614,11 @@ def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
     return np.sum(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims)
 
 
+def _nanprod_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None):
+    return (a, out)
+
+
+@array_function_dispatch(_nanprod_dispatcher)
 def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
     """
     Return the product of array elements over a given axis treating Not a
@@ -648,6 +684,11 @@ def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
     return np.prod(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims)
 
 
+def _nancumsum_dispatcher(a, axis=None, dtype=None, out=None):
+    return (a, out)
+
+
+@array_function_dispatch(_nancumsum_dispatcher)
 def nancumsum(a, axis=None, dtype=None, out=None):
     """
     Return the cumulative sum of array elements over a given axis treating Not a
@@ -713,6 +754,11 @@ def nancumsum(a, axis=None, dtype=None, out=None):
     return np.cumsum(a, axis=axis, dtype=dtype, out=out)
 
 
+def _nancumprod_dispatcher(a, axis=None, dtype=None, out=None):
+    return (a, out)
+
+
+@array_function_dispatch(_nancumprod_dispatcher)
 def nancumprod(a, axis=None, dtype=None, out=None):
     """
     Return the cumulative product of array elements over a given axis treating Not a
@@ -775,6 +821,11 @@ def nancumprod(a, axis=None, dtype=None, out=None):
     return np.cumprod(a, axis=axis, dtype=dtype, out=out)
 
 
+def _nanmean_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None):
+    return (a, out)
+
+
+@array_function_dispatch(_nanmean_dispatcher)
 def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
     """
     Compute the arithmetic mean along the specified axis, ignoring NaNs.
@@ -928,6 +979,12 @@ def _nanmedian_small(a, axis=None, out=None, overwrite_input=False):
     return m.filled(np.nan)
 
 
+def _nanmedian_dispatcher(
+        a, axis=None, out=None, overwrite_input=None, keepdims=None):
+    return (a, out)
+
+
+@array_function_dispatch(_nanmedian_dispatcher)
 def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=np._NoValue):
     """
     Compute the median along the specified axis, while ignoring NaNs.
@@ -1026,6 +1083,12 @@ def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=np._NoValu
         return r
 
 
+def _nanpercentile_dispatcher(a, q, axis=None, out=None, overwrite_input=None,
+                              interpolation=None, keepdims=None):
+    return (a, q, out)
+
+
+@array_function_dispatch(_nanpercentile_dispatcher)
 def nanpercentile(a, q, axis=None, out=None, overwrite_input=False,
                   interpolation='linear', keepdims=np._NoValue):
     """
@@ -1146,6 +1209,12 @@ def nanpercentile(a, q, axis=None, out=None, overwrite_input=False,
         a, q, axis, out, overwrite_input, interpolation, keepdims)
 
 
+def _nanquantile_dispatcher(a, q, axis=None, out=None, overwrite_input=None,
+                            interpolation=None, keepdims=None):
+    return (a, q, out)
+
+
+@array_function_dispatch(_nanquantile_dispatcher)
 def nanquantile(a, q, axis=None, out=None, overwrite_input=False,
                 interpolation='linear', keepdims=np._NoValue):
     """
@@ -1308,6 +1377,12 @@ def _nanquantile_1d(arr1d, q, overwrite_input=False, interpolation='linear'):
         arr1d, q, overwrite_input=overwrite_input, interpolation=interpolation)
 
 
+def _nanvar_dispatcher(
+        a, axis=None, dtype=None, out=None, ddof=None, keepdims=None):
+    return (a, out)
+
+
+@array_function_dispatch(_nanvar_dispatcher)
 def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
     """
     Compute the variance along the specified axis, while ignoring NaNs.
@@ -1449,6 +1524,12 @@ def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
     return var
 
 
+def _nanstd_dispatcher(
+        a, axis=None, dtype=None, out=None, ddof=None, keepdims=None):
+    return (a, out)
+
+
+@array_function_dispatch(_nanstd_dispatcher)
 def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
     """
     Compute the standard deviation along the specified axis, while
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py
index 7eb203868..6fbb7e805 100644
--- a/numpy/lib/npyio.py
+++ b/numpy/lib/npyio.py
@@ -12,6 +12,7 @@ import numpy as np
 from . import format
 from ._datasource import DataSource
 from numpy.core.multiarray import packbits, unpackbits
+from numpy.core.overrides import array_function_dispatch
 from numpy.core._internal import recursive
 from ._iotools import (
     LineSplitter, NameValidator, StringConverter, ConverterError,
@@ -21,14 +22,13 @@ from ._iotools import (
 
 from numpy.compat import (
     asbytes, asstr, asunicode, asbytes_nested, bytes, basestring, unicode,
-    is_pathlib_path
+    os_fspath, os_PathLike
     )
+from numpy.core.numeric import pickle
 
 if sys.version_info[0] >= 3:
-    import pickle
     from collections.abc import Mapping
 else:
-    import cPickle as pickle
     from future_builtins import map
     from collections import Mapping
 
@@ -105,8 +105,8 @@ def zipfile_factory(file, *args, **kwargs):
     pathlib.Path objects. `args` and `kwargs` are passed to the zipfile.ZipFile
     constructor.
     """
-    if is_pathlib_path(file):
-        file = str(file)
+    if not hasattr(file, 'read'):
+        file = os_fspath(file)
     import zipfile
     kwargs['allowZip64'] = True
     return zipfile.ZipFile(file, *args, **kwargs)
@@ -400,15 +400,12 @@ def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True,
         pickle_kwargs = {}
 
     # TODO: Use contextlib.ExitStack once we drop Python 2
-    if isinstance(file, basestring):
-        fid = open(file, "rb")
-        own_fid = True
-    elif is_pathlib_path(file):
-        fid = file.open("rb")
-        own_fid = True
-    else:
+    if hasattr(file, 'read'):
         fid = file
         own_fid = False
+    else:
+        fid = open(os_fspath(file), "rb")
+        own_fid = True
 
     try:
         # Code to distinguish from NumPy binary files and pickles.
@@ -448,6 +445,11 @@ def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True,
             fid.close()
 
 
+def _save_dispatcher(file, arr, allow_pickle=None, fix_imports=None):
+    return (arr,)
+
+
+@array_function_dispatch(_save_dispatcher)
 def save(file, arr, allow_pickle=True, fix_imports=True):
     """
     Save an array to a binary file in NumPy ``.npy`` format.
@@ -498,18 +500,14 @@ def save(file, arr, allow_pickle=True, fix_imports=True):
 
     """
     own_fid = False
-    if isinstance(file, basestring):
+    if hasattr(file, 'read'):
+        fid = file
+    else:
+        file = os_fspath(file)
         if not file.endswith('.npy'):
             file = file + '.npy'
         fid = open(file, "wb")
         own_fid = True
-    elif is_pathlib_path(file):
-        if not file.name.endswith('.npy'):
-            file = file.parent / (file.name + '.npy')
-        fid = file.open("wb")
-        own_fid = True
-    else:
-        fid = file
 
     if sys.version_info[0] >= 3:
         pickle_kwargs = dict(fix_imports=fix_imports)
@@ -526,6 +524,14 @@ def save(file, arr, allow_pickle=True, fix_imports=True):
             fid.close()
 
 
+def _savez_dispatcher(file, *args, **kwds):
+    for a in args:
+        yield a
+    for v in kwds.values():
+        yield v
+
+
+@array_function_dispatch(_savez_dispatcher)
 def savez(file, *args, **kwds):
     """
     Save several arrays into a single file in uncompressed ``.npz`` format.
@@ -605,6 +611,14 @@ def savez(file, *args, **kwds):
     _savez(file, args, kwds, False)
 
 
+def _savez_compressed_dispatcher(file, *args, **kwds):
+    for a in args:
+        yield a
+    for v in kwds.values():
+        yield v
+
+
+@array_function_dispatch(_savez_compressed_dispatcher)
 def savez_compressed(file, *args, **kwds):
     """
     Save several arrays into a single file in compressed ``.npz`` format.
@@ -674,12 +688,10 @@ def _savez(file, args, kwds, compress, allow_pickle=True, pickle_kwargs=None):
     # component of the so-called standard library.
     import zipfile
 
-    if isinstance(file, basestring):
+    if not hasattr(file, 'read'):
+        file = os_fspath(file)
         if not file.endswith('.npz'):
             file = file + '.npz'
-    elif is_pathlib_path(file):
-        if not file.name.endswith('.npz'):
-            file = file.parent / (file.name + '.npz')
 
     namedict = kwds
     for i, val in enumerate(args):
@@ -927,8 +939,8 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
 
     fown = False
     try:
-        if is_pathlib_path(fname):
-            fname = str(fname)
+        if isinstance(fname, os_PathLike):
+            fname = os_fspath(fname)
         if _is_string_like(fname):
             fh = np.lib._datasource.open(fname, 'rt', encoding=encoding)
             fencoding = getattr(fh, 'encoding', 'latin1')
@@ -1155,6 +1167,13 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
         return X
 
 
+def _savetxt_dispatcher(fname, X, fmt=None, delimiter=None, newline=None,
+                        header=None, footer=None, comments=None,
+                        encoding=None):
+    return (X,)
+
+
+@array_function_dispatch(_savetxt_dispatcher)
 def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='',
             footer='', comments='# ', encoding=None):
     """
@@ -1316,8 +1335,8 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='',
                 self.write = self.write_bytes
 
     own_fh = False
-    if is_pathlib_path(fname):
-        fname = str(fname)
+    if isinstance(fname, os_PathLike):
+        fname = os_fspath(fname)
     if _is_string_like(fname):
         # datasource doesn't support creating a new file ...
         open(fname, 'wt').close()
@@ -1700,8 +1719,8 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
     # Initialize the filehandle, the LineSplitter and the NameValidator
     own_fhd = False
     try:
-        if is_pathlib_path(fname):
-            fname = str(fname)
+        if isinstance(fname, os_PathLike):
+            fname = os_fspath(fname)
         if isinstance(fname, basestring):
             fhd = iter(np.lib._datasource.open(fname, 'rt', encoding=encoding))
             own_fhd = True
diff --git a/numpy/lib/polynomial.py b/numpy/lib/polynomial.py
index 9f3b84732..c2702f0a7 100644
--- a/numpy/lib/polynomial.py
+++ b/numpy/lib/polynomial.py
@@ -8,17 +8,24 @@ __all__ = ['poly', 'roots', 'polyint', 'polyder', 'polyadd',
            'polysub', 'polymul', 'polydiv', 'polyval', 'poly1d',
            'polyfit', 'RankWarning']
 
+import functools
 import re
 import warnings
 import numpy.core.numeric as NX
 
 from numpy.core import (isscalar, abs, finfo, atleast_1d, hstack, dot, array,
                         ones)
+from numpy.core import overrides
 from numpy.lib.twodim_base import diag, vander
 from numpy.lib.function_base import trim_zeros
 from numpy.lib.type_check import iscomplex, real, imag, mintypecode
 from numpy.linalg import eigvals, lstsq, inv
 
+
+array_function_dispatch = functools.partial(
+    overrides.array_function_dispatch, module='numpy')
+
+
 class RankWarning(UserWarning):
     """
     Issued by `polyfit` when the Vandermonde matrix is rank deficient.
@@ -29,6 +36,12 @@ class RankWarning(UserWarning):
     """
     pass
 
+
+def _poly_dispatcher(seq_of_zeros):
+    return seq_of_zeros
+
+
+@array_function_dispatch(_poly_dispatcher)
 def poly(seq_of_zeros):
     """
     Find the coefficients of a polynomial with the given sequence of roots.
@@ -145,6 +158,12 @@ def poly(seq_of_zeros):
 
     return a
 
+
+def _roots_dispatcher(p):
+    return p
+
+
+@array_function_dispatch(_roots_dispatcher)
 def roots(p):
     """
     Return the roots of a polynomial with coefficients given in p.
@@ -229,6 +248,12 @@ def roots(p):
     roots = hstack((roots, NX.zeros(trailing_zeros, roots.dtype)))
     return roots
 
+
+def _polyint_dispatcher(p, m=None, k=None):
+    return (p,)
+
+
+@array_function_dispatch(_polyint_dispatcher)
 def polyint(p, m=1, k=None):
     """
     Return an antiderivative (indefinite integral) of a polynomial.
@@ -322,6 +347,12 @@ def polyint(p, m=1, k=None):
             return poly1d(val)
         return val
 
+
+def _polyder_dispatcher(p, m=None):
+    return (p,)
+
+
+@array_function_dispatch(_polyder_dispatcher)
 def polyder(p, m=1):
     """
     Return the derivative of the specified order of a polynomial.
@@ -390,6 +421,12 @@ def polyder(p, m=1):
         val = poly1d(val)
     return val
 
+
+def _polyfit_dispatcher(x, y, deg, rcond=None, full=None, w=None, cov=None):
+    return (x, y, w)
+
+
+@array_function_dispatch(_polyfit_dispatcher)
 def polyfit(x, y, deg, rcond=None, full=False, w=None, cov=False):
     """
     Least squares polynomial fit.
@@ -610,6 +647,11 @@ def polyfit(x, y, deg, rcond=None, full=False, w=None, cov=False):
         return c
 
 
+def _polyval_dispatcher(p, x):
+    return (p, x)
+
+
+@array_function_dispatch(_polyval_dispatcher)
 def polyval(p, x):
     """
     Evaluate a polynomial at specific values.
@@ -679,6 +721,12 @@ def polyval(p, x):
         y = y * x + p[i]
     return y
 
+
+def _binary_op_dispatcher(a1, a2):
+    return (a1, a2)
+
+
+@array_function_dispatch(_binary_op_dispatcher)
 def polyadd(a1, a2):
     """
     Find the sum of two polynomials.
@@ -739,6 +787,8 @@ def polyadd(a1, a2):
         val = poly1d(val)
     return val
 
+
+@array_function_dispatch(_binary_op_dispatcher)
 def polysub(a1, a2):
     """
     Difference (subtraction) of two polynomials.
@@ -786,6 +836,7 @@ def polysub(a1, a2):
     return val
 
 
+@array_function_dispatch(_binary_op_dispatcher)
 def polymul(a1, a2):
     """
     Find the product of two polynomials.
@@ -842,6 +893,12 @@ def polymul(a1, a2):
         val = poly1d(val)
     return val
 
+
+def _polydiv_dispatcher(u, v):
+    return (u, v)
+
+
+@array_function_dispatch(_polydiv_dispatcher)
 def polydiv(u, v):
     """
     Returns the quotient and remainder of polynomial division.
diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py
index b6453d5a2..53a586f56 100644
--- a/numpy/lib/recfunctions.py
+++ b/numpy/lib/recfunctions.py
@@ -14,6 +14,7 @@ import numpy.ma as ma
 from numpy import ndarray, recarray
 from numpy.ma import MaskedArray
 from numpy.ma.mrecords import MaskedRecords
+from numpy.core.overrides import array_function_dispatch
 from numpy.lib._iotools import _is_string_like
 from numpy.compat import basestring
 
@@ -31,6 +32,11 @@ __all__ = [
     ]
 
 
+def _recursive_fill_fields_dispatcher(input, output):
+    return (input, output)
+
+
+@array_function_dispatch(_recursive_fill_fields_dispatcher)
 def recursive_fill_fields(input, output):
     """
     Fills fields from output with fields from input,
@@ -189,6 +195,11 @@ def flatten_descr(ndtype):
         return tuple(descr)
 
 
+def _zip_dtype_dispatcher(seqarrays, flatten=None):
+    return seqarrays
+
+
+@array_function_dispatch(_zip_dtype_dispatcher)
 def zip_dtype(seqarrays, flatten=False):
     newdtype = []
     if flatten:
@@ -205,6 +216,7 @@ def zip_dtype(seqarrays, flatten=False):
     return np.dtype(newdtype)
 
 
+@array_function_dispatch(_zip_dtype_dispatcher)
 def zip_descr(seqarrays, flatten=False):
     """
     Combine the dtype description of a series of arrays.
@@ -297,6 +309,11 @@ def _izip_fields(iterable):
             yield element
 
 
+def _izip_records_dispatcher(seqarrays, fill_value=None, flatten=None):
+    return seqarrays
+
+
+@array_function_dispatch(_izip_records_dispatcher)
 def izip_records(seqarrays, fill_value=None, flatten=True):
     """
     Returns an iterator of concatenated items from a sequence of arrays.
@@ -357,6 +374,12 @@ def _fix_defaults(output, defaults=None):
     return output
 
 
+def _merge_arrays_dispatcher(seqarrays, fill_value=None, flatten=None,
+                             usemask=None, asrecarray=None):
+    return seqarrays
+
+
+@array_function_dispatch(_merge_arrays_dispatcher)
 def merge_arrays(seqarrays, fill_value=-1, flatten=False,
                  usemask=False, asrecarray=False):
     """
@@ -494,6 +517,11 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False,
     return output
 
 
+def _drop_fields_dispatcher(base, drop_names, usemask=None, asrecarray=None):
+    return (base,)
+
+
+@array_function_dispatch(_drop_fields_dispatcher)
 def drop_fields(base, drop_names, usemask=True, asrecarray=False):
     """
     Return a new array with fields in `drop_names` dropped.
@@ -583,6 +611,11 @@ def _keep_fields(base, keep_names, usemask=True, asrecarray=False):
     return _fix_output(output, usemask=usemask, asrecarray=asrecarray)
 
 
+def _rec_drop_fields_dispatcher(base, drop_names):
+    return (base,)
+
+
+@array_function_dispatch(_rec_drop_fields_dispatcher)
 def rec_drop_fields(base, drop_names):
     """
     Returns a new numpy.recarray with fields in `drop_names` dropped.
@@ -590,6 +623,11 @@ def rec_drop_fields(base, drop_names):
     return drop_fields(base, drop_names, usemask=False, asrecarray=True)
 
 
+def _rename_fields_dispatcher(base, namemapper):
+    return (base,)
+
+
+@array_function_dispatch(_rename_fields_dispatcher)
 def rename_fields(base, namemapper):
     """
     Rename the fields from a flexible-datatype ndarray or recarray.
@@ -629,6 +667,14 @@ def rename_fields(base, namemapper):
     return base.view(newdtype)
 
 
+def _append_fields_dispatcher(base, names, data, dtypes=None,
+                              fill_value=None, usemask=None, asrecarray=None):
+    yield base
+    for d in data:
+        yield d
+
+
+@array_function_dispatch(_append_fields_dispatcher)
 def append_fields(base, names, data, dtypes=None,
                   fill_value=-1, usemask=True, asrecarray=False):
     """
@@ -699,6 +745,13 @@ def append_fields(base, names, data, dtypes=None,
     return _fix_output(output, usemask=usemask, asrecarray=asrecarray)
 
 
+def _rec_append_fields_dispatcher(base, names, data, dtypes=None):
+    yield base
+    for d in data:
+        yield d
+
+
+@array_function_dispatch(_rec_append_fields_dispatcher)
 def rec_append_fields(base, names, data, dtypes=None):
     """
     Add new fields to an existing array.
@@ -732,6 +785,12 @@ def rec_append_fields(base, names, data, dtypes=None):
     return append_fields(base, names, data=data, dtypes=dtypes,
                          asrecarray=True, usemask=False)
 
+
+def _repack_fields_dispatcher(a, align=None, recurse=None):
+    return (a,)
+
+
+@array_function_dispatch(_repack_fields_dispatcher)
 def repack_fields(a, align=False, recurse=False):
     """
     Re-pack the fields of a structured array or dtype in memory.
@@ -811,6 +870,13 @@ def repack_fields(a, align=False, recurse=False):
     dt = np.dtype(fieldinfo, align=align)
     return np.dtype((a.type, dt))
 
+
+def _stack_arrays_dispatcher(arrays, defaults=None, usemask=None,
+                             asrecarray=None, autoconvert=None):
+    return arrays
+
+
+@array_function_dispatch(_stack_arrays_dispatcher)
 def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False,
                  autoconvert=False):
     """
@@ -897,6 +963,12 @@ def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False,
                        usemask=usemask, asrecarray=asrecarray)
 
 
+def _find_duplicates_dispatcher(
+        a, key=None, ignoremask=None, return_index=None):
+    return (a,)
+
+
+@array_function_dispatch(_find_duplicates_dispatcher)
 def find_duplicates(a, key=None, ignoremask=True, return_index=False):
     """
     Find the duplicates in a structured array along a given key
@@ -951,8 +1023,15 @@ def find_duplicates(a, key=None, ignoremask=True, return_index=False):
         return duplicates
 
 
+def _join_by_dispatcher(
+        key, r1, r2, jointype=None, r1postfix=None, r2postfix=None,
+        defaults=None, usemask=None, asrecarray=None):
+    return (r1, r2)
+
+
+@array_function_dispatch(_join_by_dispatcher)
 def join_by(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2',
-                defaults=None, usemask=True, asrecarray=False):
+            defaults=None, usemask=True, asrecarray=False):
     """
     Join arrays `r1` and `r2` on key `key`.
 
@@ -1130,6 +1209,13 @@ def join_by(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2',
     return _fix_output(_fix_defaults(output, defaults), **kwargs)
 
 
+def _rec_join_dispatcher(
+        key, r1, r2, jointype=None, r1postfix=None, r2postfix=None,
+        defaults=None):
+    return (r1, r2)
+
+
+@array_function_dispatch(_rec_join_dispatcher)
 def rec_join(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2',
              defaults=None):
     """
diff --git a/numpy/lib/scimath.py b/numpy/lib/scimath.py
index f1838fee6..9ca006841 100644
--- a/numpy/lib/scimath.py
+++ b/numpy/lib/scimath.py
@@ -20,6 +20,7 @@ from __future__ import division, absolute_import, print_function
 import numpy.core.numeric as nx
 import numpy.core.numerictypes as nt
 from numpy.core.numeric import asarray, any
+from numpy.core.overrides import array_function_dispatch
 from numpy.lib.type_check import isreal
 
 
@@ -94,6 +95,7 @@ def _tocomplex(arr):
     else:
         return arr.astype(nt.cdouble)
 
+
 def _fix_real_lt_zero(x):
     """Convert `x` to complex if it has real, negative components.
 
@@ -121,6 +123,7 @@ def _fix_real_lt_zero(x):
         x = _tocomplex(x)
     return x
 
+
 def _fix_int_lt_zero(x):
     """Convert `x` to double if it has real, negative components.
 
@@ -147,6 +150,7 @@ def _fix_int_lt_zero(x):
         x = x * 1.0
     return x
 
+
 def _fix_real_abs_gt_1(x):
     """Convert `x` to complex if it has real components x_i with abs(x_i)>1.
 
@@ -173,6 +177,12 @@ def _fix_real_abs_gt_1(x):
         x = _tocomplex(x)
     return x
 
+
+def _unary_dispatcher(x):
+    return (x,)
+
+
+@array_function_dispatch(_unary_dispatcher)
 def sqrt(x):
     """
     Compute the square root of x.
@@ -215,6 +225,8 @@ def sqrt(x):
     x = _fix_real_lt_zero(x)
     return nx.sqrt(x)
 
+
+@array_function_dispatch(_unary_dispatcher)
 def log(x):
     """
     Compute the natural logarithm of `x`.
@@ -261,6 +273,8 @@ def log(x):
     x = _fix_real_lt_zero(x)
     return nx.log(x)
 
+
+@array_function_dispatch(_unary_dispatcher)
 def log10(x):
     """
     Compute the logarithm base 10 of `x`.
@@ -309,6 +323,12 @@ def log10(x):
     x = _fix_real_lt_zero(x)
     return nx.log10(x)
 
+
+def _logn_dispatcher(n, x):
+    return (n, x,)
+
+
+@array_function_dispatch(_logn_dispatcher)
 def logn(n, x):
     """
     Take log base n of x.
@@ -318,8 +338,8 @@ def logn(n, x):
 
     Parameters
     ----------
-    n : int
-       The base in which the log is taken.
+    n : array_like
+       The integer base(s) in which the log is taken.
     x : array_like
        The value(s) whose log base `n` is (are) required.
 
@@ -343,6 +363,8 @@ def logn(n, x):
     n = _fix_real_lt_zero(n)
     return nx.log(x)/nx.log(n)
 
+
+@array_function_dispatch(_unary_dispatcher)
 def log2(x):
     """
     Compute the logarithm base 2 of `x`.
@@ -389,6 +411,12 @@ def log2(x):
     x = _fix_real_lt_zero(x)
     return nx.log2(x)
 
+
+def _power_dispatcher(x, p):
+    return (x, p)
+
+
+@array_function_dispatch(_power_dispatcher)
 def power(x, p):
     """
     Return x to the power p, (x**p).
@@ -432,6 +460,8 @@ def power(x, p):
     p = _fix_int_lt_zero(p)
     return nx.power(x, p)
 
+
+@array_function_dispatch(_unary_dispatcher)
 def arccos(x):
     """
     Compute the inverse cosine of x.
@@ -475,6 +505,8 @@ def arccos(x):
     x = _fix_real_abs_gt_1(x)
     return nx.arccos(x)
 
+
+@array_function_dispatch(_unary_dispatcher)
 def arcsin(x):
     """
     Compute the inverse sine of x.
@@ -519,6 +551,8 @@ def arcsin(x):
     x = _fix_real_abs_gt_1(x)
     return nx.arcsin(x)
 
+
+@array_function_dispatch(_unary_dispatcher)
 def arctanh(x):
     """
     Compute the inverse hyperbolic tangent of `x`.
diff --git a/numpy/lib/shape_base.py b/numpy/lib/shape_base.py
index 66f534734..6e7cab3fa 100644
--- a/numpy/lib/shape_base.py
+++ b/numpy/lib/shape_base.py
@@ -1,5 +1,6 @@
 from __future__ import division, absolute_import, print_function
 
+import functools
 import warnings
 
 import numpy.core.numeric as _nx
@@ -8,7 +9,9 @@ from numpy.core.numeric import (
     )
 from numpy.core.fromnumeric import product, reshape, transpose
 from numpy.core.multiarray import normalize_axis_index
+from numpy.core import overrides
 from numpy.core import vstack, atleast_3d
+from numpy.core.shape_base import _arrays_for_stack_dispatcher
 from numpy.lib.index_tricks import ndindex
 from numpy.matrixlib.defmatrix import matrix  # this raises all the right alarm bells
 
@@ -21,6 +24,10 @@ __all__ = [
     ]
 
 
+array_function_dispatch = functools.partial(
+    overrides.array_function_dispatch, module='numpy')
+
+
 def _make_along_axis_idx(arr_shape, indices, axis):
 	# compute dimensions to iterate over
     if not _nx.issubdtype(indices.dtype, _nx.integer):
@@ -44,6 +51,11 @@ def _make_along_axis_idx(arr_shape, indices, axis):
     return tuple(fancy_index)
 
 
+def _take_along_axis_dispatcher(arr, indices, axis):
+    return (arr, indices)
+
+
+@array_function_dispatch(_take_along_axis_dispatcher)
 def take_along_axis(arr, indices, axis):
     """
     Take values from the input array by matching 1d index and data slices.
@@ -160,6 +172,11 @@ def take_along_axis(arr, indices, axis):
     return arr[_make_along_axis_idx(arr_shape, indices, axis)]
 
 
+def _put_along_axis_dispatcher(arr, indices, values, axis):
+    return (arr, indices, values)
+
+
+@array_function_dispatch(_put_along_axis_dispatcher)
 def put_along_axis(arr, indices, values, axis):
     """
     Put values into the destination array by matching 1d index and data slices.
@@ -245,6 +262,11 @@ def put_along_axis(arr, indices, values, axis):
     arr[_make_along_axis_idx(arr_shape, indices, axis)] = values
 
 
+def _apply_along_axis_dispatcher(func1d, axis, arr, *args, **kwargs):
+    return (arr,)
+
+
+@array_function_dispatch(_apply_along_axis_dispatcher)
 def apply_along_axis(func1d, axis, arr, *args, **kwargs):
     """
     Apply a function to 1-D slices along the given axis.
@@ -392,6 +414,11 @@ def apply_along_axis(func1d, axis, arr, *args, **kwargs):
         return res.__array_wrap__(out_arr)
 
 
+def _apply_over_axes_dispatcher(func, a, axes):
+    return (a,)
+
+
+@array_function_dispatch(_apply_over_axes_dispatcher)
 def apply_over_axes(func, a, axes):
     """
     Apply a function repeatedly over multiple axes.
@@ -474,9 +501,15 @@ def apply_over_axes(func, a, axes):
                 val = res
             else:
                 raise ValueError("function is not returning "
-                        "an array of the correct shape")
+                                 "an array of the correct shape")
     return val
 
+
+def _expand_dims_dispatcher(a, axis):
+    return (a,)
+
+
+@array_function_dispatch(_expand_dims_dispatcher)
 def expand_dims(a, axis):
     """
     Expand the shape of an array.
@@ -554,8 +587,15 @@ def expand_dims(a, axis):
     # axis = normalize_axis_index(axis, a.ndim + 1)
     return a.reshape(shape[:axis] + (1,) + shape[axis:])
 
+
 row_stack = vstack
 
+
+def _column_stack_dispatcher(tup):
+    return _arrays_for_stack_dispatcher(tup)
+
+
+@array_function_dispatch(_column_stack_dispatcher)
 def column_stack(tup):
     """
     Stack 1-D arrays as columns into a 2-D array.
@@ -597,6 +637,12 @@ def column_stack(tup):
         arrays.append(arr)
     return _nx.concatenate(arrays, 1)
 
+
+def _dstack_dispatcher(tup):
+    return _arrays_for_stack_dispatcher(tup)
+
+
+@array_function_dispatch(_dstack_dispatcher)
 def dstack(tup):
     """
     Stack arrays in sequence depth wise (along third axis).
@@ -649,6 +695,7 @@ def dstack(tup):
     """
     return _nx.concatenate([atleast_3d(_m) for _m in tup], 2)
 
+
 def _replace_zero_by_x_arrays(sub_arys):
     for i in range(len(sub_arys)):
         if _nx.ndim(sub_arys[i]) == 0:
@@ -657,6 +704,12 @@ def _replace_zero_by_x_arrays(sub_arys):
             sub_arys[i] = _nx.empty(0, dtype=sub_arys[i].dtype)
     return sub_arys
 
+
+def _array_split_dispatcher(ary, indices_or_sections, axis=None):
+    return (ary, indices_or_sections)
+
+
+@array_function_dispatch(_array_split_dispatcher)
 def array_split(ary, indices_or_sections, axis=0):
     """
     Split an array into multiple sub-arrays.
@@ -712,7 +765,12 @@ def array_split(ary, indices_or_sections, axis=0):
     return sub_arys
 
 
-def split(ary,indices_or_sections,axis=0):
+def _split_dispatcher(ary, indices_or_sections, axis=None):
+    return (ary, indices_or_sections)
+
+
+@array_function_dispatch(_split_dispatcher)
+def split(ary, indices_or_sections, axis=0):
     """
     Split an array into multiple sub-arrays.
 
@@ -789,6 +847,12 @@ def split(ary,indices_or_sections,axis=0):
     res = array_split(ary, indices_or_sections, axis)
     return res
 
+
+def _hvdsplit_dispatcher(ary, indices_or_sections):
+    return (ary, indices_or_sections)
+
+
+@array_function_dispatch(_hvdsplit_dispatcher)
 def hsplit(ary, indices_or_sections):
     """
     Split an array into multiple sub-arrays horizontally (column-wise).
@@ -851,6 +915,8 @@ def hsplit(ary, indices_or_sections):
     else:
         return split(ary, indices_or_sections, 0)
 
+
+@array_function_dispatch(_hvdsplit_dispatcher)
 def vsplit(ary, indices_or_sections):
     """
     Split an array into multiple sub-arrays vertically (row-wise).
@@ -902,6 +968,8 @@ def vsplit(ary, indices_or_sections):
         raise ValueError('vsplit only works on arrays of 2 or more dimensions')
     return split(ary, indices_or_sections, 0)
 
+
+@array_function_dispatch(_hvdsplit_dispatcher)
 def dsplit(ary, indices_or_sections):
     """
     Split array into multiple sub-arrays along the 3rd axis (depth).
@@ -971,6 +1039,12 @@ def get_array_wrap(*args):
         return wrappers[-1][-1]
     return None
 
+
+def _kron_dispatcher(a, b):
+    return (a, b)
+
+
+@array_function_dispatch(_kron_dispatcher)
 def kron(a, b):
     """
     Kronecker product of two arrays.
@@ -1070,6 +1144,11 @@ def kron(a, b):
     return result
 
 
+def _tile_dispatcher(A, reps):
+    return (A, reps)
+
+
+@array_function_dispatch(_tile_dispatcher)
 def tile(A, reps):
     """
     Construct an array by repeating A the number of times given by reps.
diff --git a/numpy/lib/stride_tricks.py b/numpy/lib/stride_tricks.py
index ca13738c1..0dc36e41c 100644
--- a/numpy/lib/stride_tricks.py
+++ b/numpy/lib/stride_tricks.py
@@ -8,6 +8,7 @@ NumPy reference guide.
 from __future__ import division, absolute_import, print_function
 
 import numpy as np
+from numpy.core.overrides import array_function_dispatch
 
 __all__ = ['broadcast_to', 'broadcast_arrays']
 
@@ -135,6 +136,11 @@ def _broadcast_to(array, shape, subok, readonly):
     return result
 
 
+def _broadcast_to_dispatcher(array, shape, subok=None):
+    return (array,)
+
+
+@array_function_dispatch(_broadcast_to_dispatcher, module='numpy')
 def broadcast_to(array, shape, subok=False):
     """Broadcast an array to a new shape.
 
@@ -195,6 +201,11 @@ def _broadcast_shape(*args):
     return b.shape
 
 
+def _broadcast_arrays_dispatcher(*args, **kwargs):
+    return args
+
+
+@array_function_dispatch(_broadcast_arrays_dispatcher, module='numpy')
 def broadcast_arrays(*args, **kwargs):
     """
     Broadcast any number of arrays against each other.
diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py
index 4b61726d2..fef06ba53 100644
--- a/numpy/lib/tests/test_arraysetops.py
+++ b/numpy/lib/tests/test_arraysetops.py
@@ -388,6 +388,13 @@ class TestSetOps(object):
         a = np.array((), np.uint32)
         assert_equal(setdiff1d(a, []).dtype, np.uint32)
 
+    def test_setdiff1d_unique(self):
+        a = np.array([3, 2, 1])
+        b = np.array([7, 5, 2])
+        expected = np.array([3, 1])
+        actual = setdiff1d(a, b, assume_unique=True)
+        assert_equal(actual, expected)
+
     def test_setdiff1d_char_array(self):
         a = np.array(['a', 'b', 'c'])
         b = np.array(['a', 'b', 's'])
diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py
index 40cca1dbb..0c789e012 100644
--- a/numpy/lib/tests/test_function_base.py
+++ b/numpy/lib/tests/test_function_base.py
@@ -3114,3 +3114,29 @@ class TestAdd_newdoc(object):
         assert_equal(np.core.flatiter.index.__doc__[:len(tgt)], tgt)
         assert_(len(np.core.ufunc.identity.__doc__) > 300)
         assert_(len(np.lib.index_tricks.mgrid.__doc__) > 300)
+
+class TestSortComplex(object):
+
+    @pytest.mark.parametrize("type_in, type_out", [
+        ('l', 'D'),
+        ('h', 'F'),
+        ('H', 'F'),
+        ('b', 'F'),
+        ('B', 'F'),
+        ('g', 'G'),
+        ])
+    def test_sort_real(self, type_in, type_out):
+        # sort_complex() type casting for real input types
+        a = np.array([5, 3, 6, 2, 1], dtype=type_in)
+        actual = np.sort_complex(a)
+        expected = np.sort(a).astype(type_out)
+        assert_equal(actual, expected)
+        assert_equal(actual.dtype, expected.dtype)
+
+    def test_sort_complex(self):
+        # sort_complex() handling of complex input
+        a = np.array([2 + 3j, 1 - 2j, 1 - 3j, 2 + 1j], dtype='D')
+        expected = np.array([1 - 3j, 1 - 2j, 2 + 1j, 2 + 3j], dtype='D')
+        actual = np.sort_complex(a)
+        assert_equal(actual, expected)
+        assert_equal(actual.dtype, expected.dtype)
diff --git a/numpy/lib/tests/test_histograms.py b/numpy/lib/tests/test_histograms.py
index 561f5f938..1b5a71d0e 100644
--- a/numpy/lib/tests/test_histograms.py
+++ b/numpy/lib/tests/test_histograms.py
@@ -119,6 +119,13 @@ class TestHistogram(object):
         h, b = histogram(a, bins=8, range=[1, 9], weights=w)
         assert_equal(h, w[1:-1])
 
+    def test_arr_weights_mismatch(self):
+        a = np.arange(10) + .5
+        w = np.arange(11) + .5
+        with assert_raises_regex(ValueError, "same shape as"):
+            h, b = histogram(a, range=[1, 9], weights=w, density=True)
+
+
     def test_type(self):
         # Check the type of the returned histogram
         a = np.arange(10) + .5
@@ -141,6 +148,23 @@ class TestHistogram(object):
         counts_hist, xedges, yedges = np.histogram2d(x, y, bins=100)
         assert_equal(counts_hist.sum(), 3.)
 
+    def test_bool_conversion(self):
+        # gh-12107
+        # Reference integer histogram
+        a = np.array([1, 1, 0], dtype=np.uint8)
+        int_hist, int_edges = np.histogram(a)
+
+        # Should raise an warning on booleans
+        # Ensure that the histograms are equivalent, need to suppress
+        # the warnings to get the actual outputs
+        with suppress_warnings() as sup:
+            rec = sup.record(RuntimeWarning, 'Converting input from .*')
+            hist, edges = np.histogram([True, True, False])
+            # A warning should be issued
+            assert_equal(len(rec), 1)
+            assert_array_equal(hist, int_hist)
+            assert_array_equal(edges, int_edges)
+
     def test_weights(self):
         v = np.random.rand(100)
         w = np.ones(100) * 5
@@ -225,6 +249,12 @@ class TestHistogram(object):
         assert_raises(ValueError, histogram, vals, range=[np.nan,0.75])
         assert_raises(ValueError, histogram, vals, range=[0.25,np.inf])
 
+    def test_invalid_range(self):
+        # start of range must be < end of range
+        vals = np.linspace(0.0, 1.0, num=100)
+        with assert_raises_regex(ValueError, "max must be larger than"):
+            np.histogram(vals, range=[0.1, 0.01])
+
     def test_bin_edge_cases(self):
         # Ensure that floating-point computations correctly place edge cases.
         arr = np.array([337, 404, 739, 806, 1007, 1811, 2012])
@@ -241,6 +271,13 @@ class TestHistogram(object):
         hist, edges = np.histogram(arr, bins=30, range=(-0.5, 5))
         assert_equal(hist[-1], 1)
 
+    def test_bin_array_dims(self):
+        # gracefully handle bins object > 1 dimension
+        vals = np.linspace(0.0, 1.0, num=100)
+        bins = np.array([[0, 0.5], [0.6, 1.0]])
+        with assert_raises_regex(ValueError, "must be 1d"):
+            np.histogram(vals, bins=bins)
+
     def test_unsigned_monotonicity_check(self):
         # Ensures ValueError is raised if bins not increasing monotonically
         # when bins contain unsigned values (see #9222)
diff --git a/numpy/lib/tests/test_index_tricks.py b/numpy/lib/tests/test_index_tricks.py
index 7e9c026e4..3246f68ff 100644
--- a/numpy/lib/tests/test_index_tricks.py
+++ b/numpy/lib/tests/test_index_tricks.py
@@ -1,9 +1,12 @@
 from __future__ import division, absolute_import, print_function
 
+import pytest
+
 import numpy as np
 from numpy.testing import (
     assert_, assert_equal, assert_array_equal, assert_almost_equal,
-    assert_array_almost_equal, assert_raises, assert_raises_regex
+    assert_array_almost_equal, assert_raises, assert_raises_regex,
+    assert_warns
     )
 from numpy.lib.index_tricks import (
     mgrid, ogrid, ndenumerate, fill_diagonal, diag_indices, diag_indices_from,
@@ -14,6 +17,33 @@ from numpy.lib.index_tricks import (
 class TestRavelUnravelIndex(object):
     def test_basic(self):
         assert_equal(np.unravel_index(2, (2, 2)), (1, 0))
+
+        # test backwards compatibility with older dims
+        # keyword argument; see Issue #10586
+        with assert_warns(DeprecationWarning):
+            # we should achieve the correct result
+            # AND raise the appropriate warning
+            # when using older "dims" kw argument
+            assert_equal(np.unravel_index(indices=2,
+                                          dims=(2, 2)),
+                                          (1, 0))
+
+        # test that new shape argument works properly
+        assert_equal(np.unravel_index(indices=2,
+                                      shape=(2, 2)),
+                                      (1, 0))
+
+        # test that an invalid second keyword argument
+        # is properly handled
+        with assert_raises(TypeError):
+            np.unravel_index(indices=2, hape=(2, 2))
+
+        with assert_raises(TypeError):
+            np.unravel_index(2, hape=(2, 2))
+
+        with assert_raises(TypeError):
+            np.unravel_index(254, ims=(17, 94))
+
         assert_equal(np.ravel_multi_index((1, 0), (2, 2)), 2)
         assert_equal(np.unravel_index(254, (17, 94)), (2, 66))
         assert_equal(np.ravel_multi_index((2, 66), (17, 94)), 254)
@@ -164,6 +194,22 @@ class TestGrid(object):
         for f, b in zip(grid_full, grid_broadcast):
             assert_equal(f, b)
 
+    @pytest.mark.parametrize("start, stop, step, expected", [
+        (None, 10, 10j, (200, 10)),
+        (-10, 20, None, (1800, 30)),
+        ])
+    def test_mgrid_size_none_handling(self, start, stop, step, expected):
+        # regression test None value handling for
+        # start and step values used by mgrid;
+        # internally, this aims to cover previously
+        # unexplored code paths in nd_grid()
+        grid = mgrid[start:stop:step, start:stop:step]
+        # need a smaller grid to explore one of the
+        # untested code paths
+        grid_small = mgrid[start:stop:step]
+        assert_equal(grid.size, expected[0])
+        assert_equal(grid_small.size, expected[1])
+
 
 class TestConcatenator(object):
     def test_1d(self):
@@ -180,6 +226,11 @@ class TestConcatenator(object):
         g = r_[-10.1, np.array([1]), np.array([2, 3, 4]), 10.0]
         assert_(g.dtype == 'f8')
 
+    def test_complex_step(self):
+        # Regression test for #12262
+        g = r_[0:36:100j]
+        assert_(g.shape == (100,))
+
     def test_2d(self):
         b = np.random.rand(5, 5)
         c = np.random.rand(5, 5)
@@ -318,6 +369,19 @@ class TestFillDiagonal(object):
         i = np.array([0, 1, 2])
         assert_equal(np.where(a != 0), (i, i, i, i))
 
+    def test_low_dim_handling(self):
+        # raise error with low dimensionality
+        a = np.zeros(3, int)
+        with assert_raises_regex(ValueError, "at least 2-d"):
+            fill_diagonal(a, 5)
+
+    def test_hetero_shape_handling(self):
+        # raise error with high dimensionality and
+        # shape mismatch
+        a = np.zeros((3,3,7,3), int)
+        with assert_raises_regex(ValueError, "equal length"):
+            fill_diagonal(a, 2)
+
 
 def test_diag_indices():
     di = diag_indices(4)
@@ -347,11 +411,23 @@ def test_diag_indices():
         )
 
 
-def test_diag_indices_from():
-    x = np.random.random((4, 4))
-    r, c = diag_indices_from(x)
-    assert_array_equal(r, np.arange(4))
-    assert_array_equal(c, np.arange(4))
+class TestDiagIndicesFrom(object):
+
+    def test_diag_indices_from(self):
+        x = np.random.random((4, 4))
+        r, c = diag_indices_from(x)
+        assert_array_equal(r, np.arange(4))
+        assert_array_equal(c, np.arange(4))
+
+    def test_error_small_input(self):
+        x = np.ones(7)
+        with assert_raises_regex(ValueError, "at least 2-d"):
+            diag_indices_from(x)
+
+    def test_error_shape_mismatch(self):
+        x = np.zeros((3, 3, 2, 3), int)
+        with assert_raises_regex(ValueError, "equal length"):
+            diag_indices_from(x)
 
 
 def test_ndindex():
diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py
index 08800ff97..b746937b9 100644
--- a/numpy/lib/tests/test_io.py
+++ b/numpy/lib/tests/test_io.py
@@ -2295,11 +2295,35 @@ class TestPathUsage(object):
             assert_array_equal(x, a)
 
     def test_save_load(self):
-        # Test that pathlib.Path instances can be used with savez.
+        # Test that pathlib.Path instances can be used with save.
+        with temppath(suffix='.npy') as path:
+            path = Path(path)
+            a = np.array([[1, 2], [3, 4]], int)
+            np.save(path, a)
+            data = np.load(path)
+            assert_array_equal(data, a)
+
+    def test_save_load_memmap(self):
+        # Test that pathlib.Path instances can be loaded mem-mapped.
+        with temppath(suffix='.npy') as path:
+            path = Path(path)
+            a = np.array([[1, 2], [3, 4]], int)
+            np.save(path, a)
+            data = np.load(path, mmap_mode='r')
+            assert_array_equal(data, a)
+            # close the mem-mapped file
+            del data
+
+    def test_save_load_memmap_readwrite(self):
+        # Test that pathlib.Path instances can be written mem-mapped.
         with temppath(suffix='.npy') as path:
             path = Path(path)
             a = np.array([[1, 2], [3, 4]], int)
             np.save(path, a)
+            b = np.load(path, mmap_mode='r+')
+            a[0][0] = 5
+            b[0][0] = 5
+            del b  # closes the file
             data = np.load(path)
             assert_array_equal(data, a)
 
diff --git a/numpy/lib/tests/test_shape_base.py b/numpy/lib/tests/test_shape_base.py
index 6e4cd225d..e338467f9 100644
--- a/numpy/lib/tests/test_shape_base.py
+++ b/numpy/lib/tests/test_shape_base.py
@@ -457,10 +457,35 @@ class TestSplit(object):
         a = np.arange(10)
         assert_raises(ValueError, split, a, 3)
 
+
 class TestColumnStack(object):
     def test_non_iterable(self):
         assert_raises(TypeError, column_stack, 1)
 
+    def test_1D_arrays(self):
+        # example from docstring
+        a = np.array((1, 2, 3))
+        b = np.array((2, 3, 4))
+        expected = np.array([[1, 2],
+                             [2, 3],
+                             [3, 4]])
+        actual = np.column_stack((a, b))
+        assert_equal(actual, expected)
+
+    def test_2D_arrays(self):
+        # same as hstack 2D docstring example
+        a = np.array([[1], [2], [3]])
+        b = np.array([[2], [3], [4]])
+        expected = np.array([[1, 2],
+                             [2, 3],
+                             [3, 4]])
+        actual = np.column_stack((a, b))
+        assert_equal(actual, expected)
+
+    def test_generator(self):
+        with assert_warns(FutureWarning):
+            column_stack((np.arange(3) for _ in range(2)))
+
 
 class TestDstack(object):
     def test_non_iterable(self):
@@ -494,6 +519,10 @@ class TestDstack(object):
         desired = np.array([[[1, 1], [2, 2]]])
         assert_array_equal(res, desired)
 
+    def test_generator(self):
+        with assert_warns(FutureWarning):
+            dstack((np.arange(3) for _ in range(2)))
+
 
 # array_split has more comprehensive test of splitting.
 # only do simple test on hsplit, vsplit, and dsplit
diff --git a/numpy/lib/tests/test_utils.py b/numpy/lib/tests/test_utils.py
index c27c3cbf5..2723f3440 100644
--- a/numpy/lib/tests/test_utils.py
+++ b/numpy/lib/tests/test_utils.py
@@ -56,10 +56,34 @@ def test_safe_eval_nameconstant():
     utils.safe_eval('None')
 
 
-def test_byte_bounds():
-    a = arange(12).reshape(3, 4)
-    low, high = utils.byte_bounds(a)
-    assert_equal(high - low, a.size * a.itemsize)
+class TestByteBounds(object):
+
+    def test_byte_bounds(self):
+        # pointer difference matches size * itemsize
+        # due to contiguity
+        a = arange(12).reshape(3, 4)
+        low, high = utils.byte_bounds(a)
+        assert_equal(high - low, a.size * a.itemsize)
+
+    def test_unusual_order_positive_stride(self):
+        a = arange(12).reshape(3, 4)
+        b = a.T
+        low, high = utils.byte_bounds(b)
+        assert_equal(high - low, b.size * b.itemsize)
+
+    def test_unusual_order_negative_stride(self):
+        a = arange(12).reshape(3, 4)
+        b = a.T[::-1]
+        low, high = utils.byte_bounds(b)
+        assert_equal(high - low, b.size * b.itemsize)
+
+    def test_strided(self):
+        a = arange(12)
+        b = a[::2]
+        low, high = utils.byte_bounds(b)
+        # the largest pointer address is lost (even numbers only in the
+        # stride), and compensate addresses for striding by 2
+        assert_equal(high - low, b.size * 2 * b.itemsize - b.itemsize)
 
 
 def test_assert_raises_regex_context_manager():
diff --git a/numpy/lib/twodim_base.py b/numpy/lib/twodim_base.py
index 98efba191..a05e68375 100644
--- a/numpy/lib/twodim_base.py
+++ b/numpy/lib/twodim_base.py
@@ -3,11 +3,14 @@
 """
 from __future__ import division, absolute_import, print_function
 
+import functools
+
 from numpy.core.numeric import (
     absolute, asanyarray, arange, zeros, greater_equal, multiply, ones,
     asarray, where, int8, int16, int32, int64, empty, promote_types, diagonal,
     nonzero
     )
+from numpy.core import overrides
 from numpy.core import iinfo, transpose
 
 
@@ -17,6 +20,10 @@ __all__ = [
     'tril_indices_from', 'triu_indices', 'triu_indices_from', ]
 
 
+array_function_dispatch = functools.partial(
+    overrides.array_function_dispatch, module='numpy')
+
+
 i1 = iinfo(int8)
 i2 = iinfo(int16)
 i4 = iinfo(int32)
@@ -33,6 +40,11 @@ def _min_int(low, high):
     return int64
 
 
+def _flip_dispatcher(m):
+    return (m,)
+
+
+@array_function_dispatch(_flip_dispatcher)
 def fliplr(m):
     """
     Flip array in the left/right direction.
@@ -83,6 +95,7 @@ def fliplr(m):
     return m[:, ::-1]
 
 
+@array_function_dispatch(_flip_dispatcher)
 def flipud(m):
     """
     Flip array in the up/down direction.
@@ -194,6 +207,11 @@ def eye(N, M=None, k=0, dtype=float, order='C'):
     return m
 
 
+def _diag_dispatcher(v, k=None):
+    return (v,)
+
+
+@array_function_dispatch(_diag_dispatcher)
 def diag(v, k=0):
     """
     Extract a diagonal or construct a diagonal array.
@@ -265,6 +283,7 @@ def diag(v, k=0):
         raise ValueError("Input must be 1- or 2-d.")
 
 
+@array_function_dispatch(_diag_dispatcher)
 def diagflat(v, k=0):
     """
     Create a two-dimensional array with the flattened input as a diagonal.
@@ -373,6 +392,11 @@ def tri(N, M=None, k=0, dtype=float):
     return m
 
 
+def _trilu_dispatcher(m, k=None):
+    return (m,)
+
+
+@array_function_dispatch(_trilu_dispatcher)
 def tril(m, k=0):
     """
     Lower triangle of an array.
@@ -411,6 +435,7 @@ def tril(m, k=0):
     return where(mask, m, zeros(1, m.dtype))
 
 
+@array_function_dispatch(_trilu_dispatcher)
 def triu(m, k=0):
     """
     Upper triangle of an array.
@@ -439,7 +464,12 @@ def triu(m, k=0):
     return where(mask, zeros(1, m.dtype), m)
 
 
+def _vander_dispatcher(x, N=None, increasing=None):
+    return (x,)
+
+
 # Originally borrowed from John Hunter and matplotlib
+@array_function_dispatch(_vander_dispatcher)
 def vander(x, N=None, increasing=False):
     """
     Generate a Vandermonde matrix.
@@ -530,6 +560,12 @@ def vander(x, N=None, increasing=False):
     return v
 
 
+def _histogram2d_dispatcher(x, y, bins=None, range=None, normed=None,
+                            weights=None, density=None):
+    return (x, y, bins, weights)
+
+
+@array_function_dispatch(_histogram2d_dispatcher)
 def histogram2d(x, y, bins=10, range=None, normed=None, weights=None,
                 density=None):
     """
@@ -812,6 +848,11 @@ def tril_indices(n, k=0, m=None):
     return nonzero(tri(n, m, k=k, dtype=bool))
 
 
+def _trilu_indices_form_dispatcher(arr, k=None):
+    return (arr,)
+
+
+@array_function_dispatch(_trilu_indices_form_dispatcher)
 def tril_indices_from(arr, k=0):
     """
     Return the indices for the lower-triangle of arr.
@@ -922,6 +963,7 @@ def triu_indices(n, k=0, m=None):
     return nonzero(~tri(n, m, k=k-1, dtype=bool))
 
 
+@array_function_dispatch(_trilu_indices_form_dispatcher)
 def triu_indices_from(arr, k=0):
     """
     Return the indices for the upper-triangle of arr.
diff --git a/numpy/lib/type_check.py b/numpy/lib/type_check.py
index 3f7aa32fa..9153e1692 100644
--- a/numpy/lib/type_check.py
+++ b/numpy/lib/type_check.py
@@ -2,6 +2,8 @@
 
 """
 from __future__ import division, absolute_import, print_function
+import functools
+import warnings
 
 __all__ = ['iscomplexobj', 'isrealobj', 'imag', 'iscomplex',
            'isreal', 'nan_to_num', 'real', 'real_if_close',
@@ -10,10 +12,17 @@ __all__ = ['iscomplexobj', 'isrealobj', 'imag', 'iscomplex',
 
 import numpy.core.numeric as _nx
 from numpy.core.numeric import asarray, asanyarray, array, isnan, zeros
+from numpy.core import overrides
 from .ufunclike import isneginf, isposinf
 
+
+array_function_dispatch = functools.partial(
+    overrides.array_function_dispatch, module='numpy')
+
+
 _typecodes_by_elsize = 'GDFgdfQqLlIiHhBb?'
 
+
 def mintypecode(typechars,typeset='GDFgdf',default='d'):
     """
     Return the character for the minimum-size type to which given types can
@@ -103,6 +112,11 @@ def asfarray(a, dtype=_nx.float_):
     return asarray(a, dtype=dtype)
 
 
+def _real_dispatcher(val):
+    return (val,)
+
+
+@array_function_dispatch(_real_dispatcher)
 def real(val):
     """
     Return the real part of the complex argument.
@@ -144,6 +158,11 @@ def real(val):
         return asanyarray(val).real
 
 
+def _imag_dispatcher(val):
+    return (val,)
+
+
+@array_function_dispatch(_imag_dispatcher)
 def imag(val):
     """
     Return the imaginary part of the complex argument.
@@ -182,6 +201,11 @@ def imag(val):
         return asanyarray(val).imag
 
 
+def _is_type_dispatcher(x):
+    return (x,)
+
+
+@array_function_dispatch(_is_type_dispatcher)
 def iscomplex(x):
     """
     Returns a bool array, where True if input element is complex.
@@ -217,6 +241,8 @@ def iscomplex(x):
     res = zeros(ax.shape, bool)
     return res[()]   # convert to scalar if needed
 
+
+@array_function_dispatch(_is_type_dispatcher)
 def isreal(x):
     """
     Returns a bool array, where True if input element is real.
@@ -247,6 +273,8 @@ def isreal(x):
     """
     return imag(x) == 0
 
+
+@array_function_dispatch(_is_type_dispatcher)
 def iscomplexobj(x):
     """
     Check for a complex type or an array of complex numbers.
@@ -287,6 +315,7 @@ def iscomplexobj(x):
     return issubclass(type_, _nx.complexfloating)
 
 
+@array_function_dispatch(_is_type_dispatcher)
 def isrealobj(x):
     """
     Return True if x is a not complex type or an array of complex numbers.
@@ -328,6 +357,12 @@ def _getmaxmin(t):
     f = getlimits.finfo(t)
     return f.max, f.min
 
+
+def _nan_to_num_dispatcher(x, copy=None):
+    return (x,)
+
+
+@array_function_dispatch(_nan_to_num_dispatcher)
 def nan_to_num(x, copy=True):
     """
     Replace NaN with zero and infinity with large finite numbers.
@@ -410,7 +445,12 @@ def nan_to_num(x, copy=True):
 
 #-----------------------------------------------------------------------------
 
-def real_if_close(a,tol=100):
+def _real_if_close_dispatcher(a, tol=None):
+    return (a,)
+
+
+@array_function_dispatch(_real_if_close_dispatcher)
+def real_if_close(a, tol=100):
     """
     If complex input returns a real array if complex parts are close to zero.
 
@@ -465,10 +505,19 @@ def real_if_close(a,tol=100):
     return a
 
 
+def _asscalar_dispatcher(a):
+    return (a,)
+
+
+@array_function_dispatch(_asscalar_dispatcher)
 def asscalar(a):
     """
     Convert an array of size 1 to its scalar equivalent.
 
+    .. deprecated:: 1.16
+
+        Deprecated, use `numpy.ndarray.item()` instead.
+
     Parameters
     ----------
     a : ndarray
@@ -486,6 +535,10 @@ def asscalar(a):
     24
 
     """
+
+    # 2018-10-10, 1.16
+    warnings.warn('np.asscalar(a) is deprecated since NumPy v1.16, use '
+                  'a.item() instead', DeprecationWarning, stacklevel=1)
     return a.item()
 
 #-----------------------------------------------------------------------------
@@ -577,6 +630,13 @@ array_precision = {_nx.half: 0,
                    _nx.csingle: 1,
                    _nx.cdouble: 2,
                    _nx.clongdouble: 3}
+
+
+def _common_type_dispatcher(*arrays):
+    return arrays
+
+
+@array_function_dispatch(_common_type_dispatcher)
 def common_type(*arrays):
     """
     Return a scalar type which is common to the input arrays.
diff --git a/numpy/lib/ufunclike.py b/numpy/lib/ufunclike.py
index 6259c5445..ac0af0b37 100644
--- a/numpy/lib/ufunclike.py
+++ b/numpy/lib/ufunclike.py
@@ -8,6 +8,7 @@ from __future__ import division, absolute_import, print_function
 __all__ = ['fix', 'isneginf', 'isposinf']
 
 import numpy.core.numeric as nx
+from numpy.core.overrides import array_function_dispatch
 import warnings
 import functools
 
@@ -37,7 +38,30 @@ def _deprecate_out_named_y(f):
     return func
 
 
+def _fix_out_named_y(f):
+    """
+    Allow the out argument to be passed as the name `y` (deprecated)
+
+    This decorator should only be used if _deprecate_out_named_y is used on
+    a corresponding dispatcher fucntion.
+    """
+    @functools.wraps(f)
+    def func(x, out=None, **kwargs):
+        if 'y' in kwargs:
+            # we already did error checking in _deprecate_out_named_y
+            out = kwargs.pop('y')
+        return f(x, out=out, **kwargs)
+
+    return func
+
+
 @_deprecate_out_named_y
+def _dispatcher(x, out=None):
+    return (x, out)
+
+
+@array_function_dispatch(_dispatcher, verify=False, module='numpy')
+@_fix_out_named_y
 def fix(x, out=None):
     """
     Round to nearest integer towards zero.
@@ -83,7 +107,8 @@ def fix(x, out=None):
     return res
 
 
-@_deprecate_out_named_y
+@array_function_dispatch(_dispatcher, verify=False, module='numpy')
+@_fix_out_named_y
 def isposinf(x, out=None):
     """
     Test element-wise for positive infinity, return result as bool array.
@@ -151,7 +176,8 @@ def isposinf(x, out=None):
         return nx.logical_and(is_inf, signbit, out)
 
 
-@_deprecate_out_named_y
+@array_function_dispatch(_dispatcher, verify=False, module='numpy')
+@_fix_out_named_y
 def isneginf(x, out=None):
     """
     Test element-wise for negative infinity, return result as bool array.
diff --git a/numpy/linalg/linalg.py b/numpy/linalg/linalg.py
index ccc437663..771481e8e 100644
--- a/numpy/linalg/linalg.py
+++ b/numpy/linalg/linalg.py
@@ -16,6 +16,7 @@ __all__ = ['matrix_power', 'solve', 'tensorsolve', 'tensorinv', 'inv',
            'svd', 'eig', 'eigh', 'lstsq', 'norm', 'qr', 'cond', 'matrix_rank',
            'LinAlgError', 'multi_dot']
 
+import functools
 import operator
 import warnings
 
@@ -28,9 +29,15 @@ from numpy.core import (
     swapaxes, divide, count_nonzero, isnan
 )
 from numpy.core.multiarray import normalize_axis_index
+from numpy.core import overrides
 from numpy.lib.twodim_base import triu, eye
 from numpy.linalg import lapack_lite, _umath_linalg
 
+
+array_function_dispatch = functools.partial(
+    overrides.array_function_dispatch, module='numpy.linalg')
+
+
 # For Python2/3 compatibility
 _N = b'N'
 _V = b'V'
@@ -198,11 +205,6 @@ def _assertRankAtLeast2(*arrays):
             raise LinAlgError('%d-dimensional array given. Array must be '
                     'at least two-dimensional' % a.ndim)
 
-def _assertSquareness(*arrays):
-    for a in arrays:
-        if max(a.shape) != min(a.shape):
-            raise LinAlgError('Array must be square')
-
 def _assertNdSquareness(*arrays):
     for a in arrays:
         m, n = a.shape[-2:]
@@ -242,6 +244,11 @@ def transpose(a):
 
 # Linear equations
 
+def _tensorsolve_dispatcher(a, b, axes=None):
+    return (a, b)
+
+
+@array_function_dispatch(_tensorsolve_dispatcher)
 def tensorsolve(a, b, axes=None):
     """
     Solve the tensor equation ``a x = b`` for x.
@@ -311,6 +318,12 @@ def tensorsolve(a, b, axes=None):
     res.shape = oldshape
     return res
 
+
+def _solve_dispatcher(a, b):
+    return (a, b)
+
+
+@array_function_dispatch(_solve_dispatcher)
 def solve(a, b):
     """
     Solve a linear matrix equation, or system of linear scalar equations.
@@ -391,6 +404,11 @@ def solve(a, b):
     return wrap(r.astype(result_t, copy=False))
 
 
+def _tensorinv_dispatcher(a, ind=None):
+    return (a,)
+
+
+@array_function_dispatch(_tensorinv_dispatcher)
 def tensorinv(a, ind=2):
     """
     Compute the 'inverse' of an N-dimensional array.
@@ -460,6 +478,11 @@ def tensorinv(a, ind=2):
 
 # Matrix inversion
 
+def _unary_dispatcher(a):
+    return (a,)
+
+
+@array_function_dispatch(_unary_dispatcher)
 def inv(a):
     """
     Compute the (multiplicative) inverse of a matrix.
@@ -528,6 +551,11 @@ def inv(a):
     return wrap(ainv.astype(result_t, copy=False))
 
 
+def _matrix_power_dispatcher(a, n):
+    return (a,)
+
+
+@array_function_dispatch(_matrix_power_dispatcher)
 def matrix_power(a, n):
     """
     Raise a square matrix to the (integer) power `n`.
@@ -645,6 +673,8 @@ def matrix_power(a, n):
 
 # Cholesky decomposition
 
+
+@array_function_dispatch(_unary_dispatcher)
 def cholesky(a):
     """
     Cholesky decomposition.
@@ -728,8 +758,14 @@ def cholesky(a):
     r = gufunc(a, signature=signature, extobj=extobj)
     return wrap(r.astype(result_t, copy=False))
 
+
 # QR decompostion
 
+def _qr_dispatcher(a, mode=None):
+    return (a,)
+
+
+@array_function_dispatch(_qr_dispatcher)
 def qr(a, mode='reduced'):
     """
     Compute the qr factorization of a matrix.
@@ -945,6 +981,7 @@ def qr(a, mode='reduced'):
 # Eigenvalues
 
 
+@array_function_dispatch(_unary_dispatcher)
 def eigvals(a):
     """
     Compute the eigenvalues of a general matrix.
@@ -1034,6 +1071,12 @@ def eigvals(a):
 
     return w.astype(result_t, copy=False)
 
+
+def _eigvalsh_dispatcher(a, UPLO=None):
+    return (a,)
+
+
+@array_function_dispatch(_eigvalsh_dispatcher)
 def eigvalsh(a, UPLO='L'):
     """
     Compute the eigenvalues of a complex Hermitian or real symmetric matrix.
@@ -1135,6 +1178,7 @@ def _convertarray(a):
 # Eigenvectors
 
 
+@array_function_dispatch(_unary_dispatcher)
 def eig(a):
     """
     Compute the eigenvalues and right eigenvectors of a square array.
@@ -1276,6 +1320,7 @@ def eig(a):
     return w.astype(result_t, copy=False), wrap(vt)
 
 
+@array_function_dispatch(_eigvalsh_dispatcher)
 def eigh(a, UPLO='L'):
     """
     Return the eigenvalues and eigenvectors of a complex Hermitian
@@ -1415,6 +1460,11 @@ def eigh(a, UPLO='L'):
 
 # Singular value decomposition
 
+def _svd_dispatcher(a, full_matrices=None, compute_uv=None):
+    return (a,)
+
+
+@array_function_dispatch(_svd_dispatcher)
 def svd(a, full_matrices=True, compute_uv=True):
     """
     Singular Value Decomposition.
@@ -1575,6 +1625,11 @@ def svd(a, full_matrices=True, compute_uv=True):
         return s
 
 
+def _cond_dispatcher(x, p=None):
+    return (x,)
+
+
+@array_function_dispatch(_cond_dispatcher)
 def cond(x, p=None):
     """
     Compute the condition number of a matrix.
@@ -1692,6 +1747,11 @@ def cond(x, p=None):
     return r
 
 
+def _matrix_rank_dispatcher(M, tol=None, hermitian=None):
+    return (M,)
+
+
+@array_function_dispatch(_matrix_rank_dispatcher)
 def matrix_rank(M, tol=None, hermitian=False):
     """
     Return matrix rank of array using SVD method
@@ -1796,7 +1856,12 @@ def matrix_rank(M, tol=None, hermitian=False):
 
 # Generalized inverse
 
-def pinv(a, rcond=1e-15 ):
+def _pinv_dispatcher(a, rcond=None):
+    return (a,)
+
+
+@array_function_dispatch(_pinv_dispatcher)
+def pinv(a, rcond=1e-15):
     """
     Compute the (Moore-Penrose) pseudo-inverse of a matrix.
 
@@ -1880,8 +1945,11 @@ def pinv(a, rcond=1e-15 ):
     res = matmul(transpose(vt), multiply(s[..., newaxis], transpose(u)))
     return wrap(res)
 
+
 # Determinant
 
+
+@array_function_dispatch(_unary_dispatcher)
 def slogdet(a):
     """
     Compute the sign and (natural) logarithm of the determinant of an array.
@@ -1967,6 +2035,8 @@ def slogdet(a):
     logdet = logdet.astype(real_t, copy=False)
     return sign, logdet
 
+
+@array_function_dispatch(_unary_dispatcher)
 def det(a):
     """
     Compute the determinant of an array.
@@ -2023,8 +2093,14 @@ def det(a):
     r = r.astype(result_t, copy=False)
     return r
 
+
 # Linear Least Squares
 
+def _lstsq_dispatcher(a, b, rcond=None):
+    return (a, b)
+
+
+@array_function_dispatch(_lstsq_dispatcher)
 def lstsq(a, b, rcond="warn"):
     """
     Return the least-squares solution to a linear matrix equation.
@@ -2208,6 +2284,11 @@ def _multi_svd_norm(x, row_axis, col_axis, op):
     return result
 
 
+def _norm_dispatcher(x, ord=None, axis=None, keepdims=None):
+    return (x,)
+
+
+@array_function_dispatch(_norm_dispatcher)
 def norm(x, ord=None, axis=None, keepdims=False):
     """
     Matrix or vector norm.
@@ -2450,6 +2531,11 @@ def norm(x, ord=None, axis=None, keepdims=False):
 
 # multi_dot
 
+def _multidot_dispatcher(arrays):
+    return arrays
+
+
+@array_function_dispatch(_multidot_dispatcher)
 def multi_dot(arrays):
     """
     Compute the dot product of two or more arrays in a single function call,
diff --git a/numpy/linalg/tests/test_linalg.py b/numpy/linalg/tests/test_linalg.py
index 320d123e7..905f3a549 100644
--- a/numpy/linalg/tests/test_linalg.py
+++ b/numpy/linalg/tests/test_linalg.py
@@ -19,7 +19,8 @@ from numpy.linalg import matrix_power, norm, matrix_rank, multi_dot, LinAlgError
 from numpy.linalg.linalg import _multi_dot_matrix_chain_order
 from numpy.testing import (
     assert_, assert_equal, assert_raises, assert_array_equal,
-    assert_almost_equal, assert_allclose, suppress_warnings
+    assert_almost_equal, assert_allclose, suppress_warnings,
+    assert_raises_regex,
     )
 
 
@@ -931,6 +932,14 @@ class TestLstsq(LstsqCases):
         assert_equal(rank, min(m, n))
         assert_equal(s.shape, (min(m, n),))
 
+    def test_incompatible_dims(self):
+        # use modified version of docstring example
+        x = np.array([0, 1, 2, 3])
+        y = np.array([-1, 0.2, 0.9, 2.1, 3.3])
+        A = np.vstack([x, np.ones(len(x))]).T
+        with assert_raises_regex(LinAlgError, "Incompatible dimensions"):
+            linalg.lstsq(A, y, rcond=None)
+
 
 @pytest.mark.parametrize('dt', [np.dtype(c) for c in '?bBhHiIqQefdgFDGO']) 
 class TestMatrixPower(object):
@@ -1835,6 +1844,14 @@ class TestMultiDot(object):
         assert_almost_equal(multi_dot([A, B, C]), A.dot(B).dot(C))
         assert_almost_equal(multi_dot([A, B, C]), np.dot(A, np.dot(B, C)))
 
+    def test_basic_function_with_two_arguments(self):
+        # separate code path with two arguments
+        A = np.random.random((6, 2))
+        B = np.random.random((2, 6))
+
+        assert_almost_equal(multi_dot([A, B]), A.dot(B))
+        assert_almost_equal(multi_dot([A, B]), np.dot(A, B))
+
     def test_basic_function_with_dynamic_programing_optimization(self):
         # multi_dot with four or more arguments uses the dynamic programing
         # optimization and therefore deserve a separate
@@ -1907,3 +1924,44 @@ class TestMultiDot(object):
     def test_too_few_input_arrays(self):
         assert_raises(ValueError, multi_dot, [])
         assert_raises(ValueError, multi_dot, [np.random.random((3, 3))])
+
+
+class TestTensorinv(object):
+
+    @pytest.mark.parametrize("arr, ind", [
+        (np.ones((4, 6, 8, 2)), 2),
+        (np.ones((3, 3, 2)), 1),
+        ])
+    def test_non_square_handling(self, arr, ind):
+        with assert_raises(LinAlgError):
+            linalg.tensorinv(arr, ind=ind)
+
+    @pytest.mark.parametrize("shape, ind", [
+        # examples from docstring
+        ((4, 6, 8, 3), 2),
+        ((24, 8, 3), 1),
+        ])
+    def test_tensorinv_shape(self, shape, ind):
+        a = np.eye(24)
+        a.shape = shape
+        ainv = linalg.tensorinv(a=a, ind=ind)
+        expected = a.shape[ind:] + a.shape[:ind]
+        actual = ainv.shape
+        assert_equal(actual, expected)
+
+    @pytest.mark.parametrize("ind", [
+        0, -2,
+        ])
+    def test_tensorinv_ind_limit(self, ind):
+        a = np.eye(24)
+        a.shape = (4, 6, 8, 3)
+        with assert_raises(ValueError):
+            linalg.tensorinv(a=a, ind=ind)
+
+    def test_tensorinv_result(self):
+        # mimic a docstring example
+        a = np.eye(24)
+        a.shape = (24, 8, 3)
+        ainv = linalg.tensorinv(a, ind=1)
+        b = np.ones(24)
+        assert_allclose(np.tensordot(ainv, b, 1), np.linalg.tensorsolve(a, b))
diff --git a/numpy/ma/core.py b/numpy/ma/core.py
index a6c3e64d6..9ee44e9ff 100644
--- a/numpy/ma/core.py
+++ b/numpy/ma/core.py
@@ -47,13 +47,9 @@ from numpy import expand_dims
 from numpy.core.multiarray import normalize_axis_index
 from numpy.core.numeric import normalize_axis_tuple
 from numpy.core._internal import recursive
+from numpy.core.numeric import pickle
 
 
-if sys.version_info[0] >= 3:
-    import pickle
-else:
-    import cPickle as pickle
-
 __all__ = [
     'MAError', 'MaskError', 'MaskType', 'MaskedArray', 'abs', 'absolute',
     'add', 'all', 'allclose', 'allequal', 'alltrue', 'amax', 'amin',
@@ -450,6 +446,7 @@ def _check_fill_value(fill_value, ndtype):
     If fill_value is not None, its value is forced to the given dtype.
 
     The result is always a 0d array.
+
     """
     ndtype = np.dtype(ndtype)
     fields = ndtype.fields
@@ -469,17 +466,19 @@ def _check_fill_value(fill_value, ndtype):
                                   dtype=ndtype)
     else:
         if isinstance(fill_value, basestring) and (ndtype.char not in 'OSVU'):
+            # Note this check doesn't work if fill_value is not a scalar
             err_msg = "Cannot set fill value of string with array of dtype %s"
             raise TypeError(err_msg % ndtype)
         else:
             # In case we want to convert 1e20 to int.
+            # Also in case of converting string arrays.
             try:
                 fill_value = np.array(fill_value, copy=False, dtype=ndtype)
-            except OverflowError:
-                # Raise TypeError instead of OverflowError. OverflowError
-                # is seldom used, and the real problem here is that the
-                # passed fill_value is not compatible with the ndtype.
-                err_msg = "Fill value %s overflows dtype %s"
+            except (OverflowError, ValueError):
+                # Raise TypeError instead of OverflowError or ValueError.
+                # OverflowError is seldom used, and the real problem here is
+                # that the passed fill_value is not compatible with the ndtype.
+                err_msg = "Cannot convert fill_value %s to dtype %s"
                 raise TypeError(err_msg % (fill_value, ndtype))
     return np.array(fill_value)
 
@@ -3012,11 +3011,13 @@ class MaskedArray(ndarray):
             except (TypeError, AttributeError):
                 # When _mask.shape is not writable (because it's a void)
                 pass
-        # Finalize the fill_value for structured arrays
-        if self.dtype.names is not None:
-            if self._fill_value is None:
-                self._fill_value = _check_fill_value(None, self.dtype)
-        return
+
+        # Finalize the fill_value
+        if self._fill_value is not None:
+            self._fill_value = _check_fill_value(self._fill_value, self.dtype)
+        elif self.dtype.names is not None:
+            # Finalize the default fill_value for structured arrays
+            self._fill_value = _check_fill_value(None, self.dtype)
 
     def __array_wrap__(self, obj, context=None):
         """
@@ -4016,6 +4017,16 @@ class MaskedArray(ndarray):
         check = check.view(type(self))
         check._update_from(self)
         check._mask = mask
+
+        # Cast fill value to bool_ if needed. If it cannot be cast, the
+        # default boolean fill value is used.
+        if check._fill_value is not None:
+            try:
+                fill = _check_fill_value(check._fill_value, np.bool_)
+            except (TypeError, ValueError):
+                fill = _check_fill_value(None, np.bool_)
+            check._fill_value = fill
+
         return check
 
     def __eq__(self, other):
diff --git a/numpy/ma/tests/test_core.py b/numpy/ma/tests/test_core.py
index 21e0fc41a..8a015e609 100644
--- a/numpy/ma/tests/test_core.py
+++ b/numpy/ma/tests/test_core.py
@@ -10,7 +10,6 @@ __author__ = "Pierre GF Gerard-Marchant"
 
 import sys
 import warnings
-import pickle
 import operator
 import itertools
 import textwrap
@@ -50,6 +49,7 @@ from numpy.ma.core import (
     ravel, repeat, reshape, resize, shape, sin, sinh, sometrue, sort, sqrt,
     subtract, sum, take, tan, tanh, transpose, where, zeros,
     )
+from numpy.core.numeric import pickle
 
 pi = np.pi
 
@@ -60,6 +60,11 @@ suppress_copy_mask_on_assignment.filter(
     "setting an item on a masked array which has a shared mask will not copy")
 
 
+# For parametrized numeric testing
+num_dts = [np.dtype(dt_) for dt_ in '?bhilqBHILQefdgFD']
+num_ids = [dt_.char for dt_ in num_dts]
+
+
 class TestMaskedArray(object):
     # Base test class for MaskedArrays.
 
@@ -555,50 +560,55 @@ class TestMaskedArray(object):
                      True,                            # Fully masked
                      False)                           # Fully unmasked
 
-            for mask in masks:
-                a.mask = mask
-                a_pickled = pickle.loads(a.dumps())
-                assert_equal(a_pickled._mask, a._mask)
-                assert_equal(a_pickled._data, a._data)
-                if dtype in (object, int):
-                    assert_equal(a_pickled.fill_value, 999)
-                else:
-                    assert_equal(a_pickled.fill_value, dtype(999))
-                assert_array_equal(a_pickled.mask, mask)
+            for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+                for mask in masks:
+                    a.mask = mask
+                    a_pickled = pickle.loads(pickle.dumps(a, protocol=proto))
+                    assert_equal(a_pickled._mask, a._mask)
+                    assert_equal(a_pickled._data, a._data)
+                    if dtype in (object, int):
+                        assert_equal(a_pickled.fill_value, 999)
+                    else:
+                        assert_equal(a_pickled.fill_value, dtype(999))
+                    assert_array_equal(a_pickled.mask, mask)
 
     def test_pickling_subbaseclass(self):
         # Test pickling w/ a subclass of ndarray
         x = np.array([(1.0, 2), (3.0, 4)],
                      dtype=[('x', float), ('y', int)]).view(np.recarray)
         a = masked_array(x, mask=[(True, False), (False, True)])
-        a_pickled = pickle.loads(a.dumps())
-        assert_equal(a_pickled._mask, a._mask)
-        assert_equal(a_pickled, a)
-        assert_(isinstance(a_pickled._data, np.recarray))
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            a_pickled = pickle.loads(pickle.dumps(a, protocol=proto))
+            assert_equal(a_pickled._mask, a._mask)
+            assert_equal(a_pickled, a)
+            assert_(isinstance(a_pickled._data, np.recarray))
 
     def test_pickling_maskedconstant(self):
         # Test pickling MaskedConstant
         mc = np.ma.masked
-        mc_pickled = pickle.loads(mc.dumps())
-        assert_equal(mc_pickled._baseclass, mc._baseclass)
-        assert_equal(mc_pickled._mask, mc._mask)
-        assert_equal(mc_pickled._data, mc._data)
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            mc_pickled = pickle.loads(pickle.dumps(mc, protocol=proto))
+            assert_equal(mc_pickled._baseclass, mc._baseclass)
+            assert_equal(mc_pickled._mask, mc._mask)
+            assert_equal(mc_pickled._data, mc._data)
 
     def test_pickling_wstructured(self):
         # Tests pickling w/ structured array
         a = array([(1, 1.), (2, 2.)], mask=[(0, 0), (0, 1)],
                   dtype=[('a', int), ('b', float)])
-        a_pickled = pickle.loads(a.dumps())
-        assert_equal(a_pickled._mask, a._mask)
-        assert_equal(a_pickled, a)
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            a_pickled = pickle.loads(pickle.dumps(a, protocol=proto))
+            assert_equal(a_pickled._mask, a._mask)
+            assert_equal(a_pickled, a)
 
     def test_pickling_keepalignment(self):
         # Tests pickling w/ F_CONTIGUOUS arrays
         a = arange(10)
         a.shape = (-1, 2)
         b = a.T
-        test = pickle.loads(pickle.dumps(b))
-        assert_equal(test, b)
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            test = pickle.loads(pickle.dumps(b, protocol=proto))
+            assert_equal(test, b)
 
     def test_single_element_subscript(self):
         # Tests single element subscripts of Maskedarrays.
@@ -1410,23 +1420,34 @@ class TestMaskedArrayArithmetic(object):
         # Test the equality of structured arrays
         ndtype = [('A', int), ('B', int)]
         a = array([(1, 1), (2, 2)], mask=[(0, 1), (0, 0)], dtype=ndtype)
+
         test = (a == a)
         assert_equal(test.data, [True, True])
         assert_equal(test.mask, [False, False])
+        assert_(test.fill_value == True)
+
         test = (a == a[0])
         assert_equal(test.data, [True, False])
         assert_equal(test.mask, [False, False])
+        assert_(test.fill_value == True)
+
         b = array([(1, 1), (2, 2)], mask=[(1, 0), (0, 0)], dtype=ndtype)
         test = (a == b)
         assert_equal(test.data, [False, True])
         assert_equal(test.mask, [True, False])
+        assert_(test.fill_value == True)
+
         test = (a[0] == b)
         assert_equal(test.data, [False, False])
         assert_equal(test.mask, [True, False])
+        assert_(test.fill_value == True)
+
         b = array([(1, 1), (2, 2)], mask=[(0, 1), (1, 0)], dtype=ndtype)
         test = (a == b)
         assert_equal(test.data, [True, True])
         assert_equal(test.mask, [False, False])
+        assert_(test.fill_value == True)
+
         # complicated dtype, 2-dimensional array.
         ndtype = [('A', int), ('B', [('BA', int), ('BB', int)])]
         a = array([[(1, (1, 1)), (2, (2, 2))],
@@ -1436,28 +1457,40 @@ class TestMaskedArrayArithmetic(object):
         test = (a[0, 0] == a)
         assert_equal(test.data, [[True, False], [False, False]])
         assert_equal(test.mask, [[False, False], [False, True]])
+        assert_(test.fill_value == True)
 
     def test_ne_on_structured(self):
         # Test the equality of structured arrays
         ndtype = [('A', int), ('B', int)]
         a = array([(1, 1), (2, 2)], mask=[(0, 1), (0, 0)], dtype=ndtype)
+
         test = (a != a)
         assert_equal(test.data, [False, False])
         assert_equal(test.mask, [False, False])
+        assert_(test.fill_value == True)
+
         test = (a != a[0])
         assert_equal(test.data, [False, True])
         assert_equal(test.mask, [False, False])
+        assert_(test.fill_value == True)
+
         b = array([(1, 1), (2, 2)], mask=[(1, 0), (0, 0)], dtype=ndtype)
         test = (a != b)
         assert_equal(test.data, [True, False])
         assert_equal(test.mask, [True, False])
+        assert_(test.fill_value == True)
+
         test = (a[0] != b)
         assert_equal(test.data, [True, True])
         assert_equal(test.mask, [True, False])
+        assert_(test.fill_value == True)
+
         b = array([(1, 1), (2, 2)], mask=[(0, 1), (1, 0)], dtype=ndtype)
         test = (a != b)
         assert_equal(test.data, [False, False])
         assert_equal(test.mask, [False, False])
+        assert_(test.fill_value == True)
+
         # complicated dtype, 2-dimensional array.
         ndtype = [('A', int), ('B', [('BA', int), ('BB', int)])]
         a = array([[(1, (1, 1)), (2, (2, 2))],
@@ -1467,6 +1500,7 @@ class TestMaskedArrayArithmetic(object):
         test = (a[0, 0] != a)
         assert_equal(test.data, [[False, True], [True, True]])
         assert_equal(test.mask, [[False, False], [False, True]])
+        assert_(test.fill_value == True)
 
     def test_eq_ne_structured_extra(self):
         # ensure simple examples are symmetric and make sense.
@@ -1502,6 +1536,120 @@ class TestMaskedArrayArithmetic(object):
                 el_by_el = [m1[name] != m2[name] for name in dt.names]
                 assert_equal(array(el_by_el, dtype=bool).any(), ne_expected)
 
+    @pytest.mark.parametrize('dt', ['S', 'U'])
+    @pytest.mark.parametrize('fill', [None, 'A'])
+    def test_eq_for_strings(self, dt, fill):
+        # Test the equality of structured arrays
+        a = array(['a', 'b'], dtype=dt, mask=[0, 1], fill_value=fill)
+
+        test = (a == a)
+        assert_equal(test.data, [True, True])
+        assert_equal(test.mask, [False, True])
+        assert_(test.fill_value == True)
+
+        test = (a == a[0])
+        assert_equal(test.data, [True, False])
+        assert_equal(test.mask, [False, True])
+        assert_(test.fill_value == True)
+
+        b = array(['a', 'b'], dtype=dt, mask=[1, 0], fill_value=fill)
+        test = (a == b)
+        assert_equal(test.data, [False, False])
+        assert_equal(test.mask, [True, True])
+        assert_(test.fill_value == True)
+
+        # test = (a[0] == b)  # doesn't work in Python2
+        test = (b == a[0])
+        assert_equal(test.data, [False, False])
+        assert_equal(test.mask, [True, False])
+        assert_(test.fill_value == True)
+
+    @pytest.mark.parametrize('dt', ['S', 'U'])
+    @pytest.mark.parametrize('fill', [None, 'A'])
+    def test_ne_for_strings(self, dt, fill):
+        # Test the equality of structured arrays
+        a = array(['a', 'b'], dtype=dt, mask=[0, 1], fill_value=fill)
+
+        test = (a != a)
+        assert_equal(test.data, [False, False])
+        assert_equal(test.mask, [False, True])
+        assert_(test.fill_value == True)
+
+        test = (a != a[0])
+        assert_equal(test.data, [False, True])
+        assert_equal(test.mask, [False, True])
+        assert_(test.fill_value == True)
+
+        b = array(['a', 'b'], dtype=dt, mask=[1, 0], fill_value=fill)
+        test = (a != b)
+        assert_equal(test.data, [True, True])
+        assert_equal(test.mask, [True, True])
+        assert_(test.fill_value == True)
+
+        # test = (a[0] != b)  # doesn't work in Python2
+        test = (b != a[0])
+        assert_equal(test.data, [True, True])
+        assert_equal(test.mask, [True, False])
+        assert_(test.fill_value == True)
+
+    @pytest.mark.parametrize('dt1', num_dts, ids=num_ids)
+    @pytest.mark.parametrize('dt2', num_dts, ids=num_ids)
+    @pytest.mark.parametrize('fill', [None, 1])
+    def test_eq_for_numeric(self, dt1, dt2, fill):
+        # Test the equality of structured arrays
+        a = array([0, 1], dtype=dt1, mask=[0, 1], fill_value=fill)
+
+        test = (a == a)
+        assert_equal(test.data, [True, True])
+        assert_equal(test.mask, [False, True])
+        assert_(test.fill_value == True)
+
+        test = (a == a[0])
+        assert_equal(test.data, [True, False])
+        assert_equal(test.mask, [False, True])
+        assert_(test.fill_value == True)
+
+        b = array([0, 1], dtype=dt2, mask=[1, 0], fill_value=fill)
+        test = (a == b)
+        assert_equal(test.data, [False, False])
+        assert_equal(test.mask, [True, True])
+        assert_(test.fill_value == True)
+
+        # test = (a[0] == b)  # doesn't work in Python2
+        test = (b == a[0])
+        assert_equal(test.data, [False, False])
+        assert_equal(test.mask, [True, False])
+        assert_(test.fill_value == True)
+
+    @pytest.mark.parametrize('dt1', num_dts, ids=num_ids)
+    @pytest.mark.parametrize('dt2', num_dts, ids=num_ids)
+    @pytest.mark.parametrize('fill', [None, 1])
+    def test_ne_for_numeric(self, dt1, dt2, fill):
+        # Test the equality of structured arrays
+        a = array([0, 1], dtype=dt1, mask=[0, 1], fill_value=fill)
+
+        test = (a != a)
+        assert_equal(test.data, [False, False])
+        assert_equal(test.mask, [False, True])
+        assert_(test.fill_value == True)
+
+        test = (a != a[0])
+        assert_equal(test.data, [False, True])
+        assert_equal(test.mask, [False, True])
+        assert_(test.fill_value == True)
+
+        b = array([0, 1], dtype=dt2, mask=[1, 0], fill_value=fill)
+        test = (a != b)
+        assert_equal(test.data, [True, True])
+        assert_equal(test.mask, [True, True])
+        assert_(test.fill_value == True)
+
+        # test = (a[0] != b)  # doesn't work in Python2
+        test = (b != a[0])
+        assert_equal(test.data, [True, True])
+        assert_equal(test.mask, [True, False])
+        assert_(test.fill_value == True)
+
     def test_eq_with_None(self):
         # Really, comparisons with None should not be done, but check them
         # anyway. Note that pep8 will flag these tests.
@@ -4801,13 +4949,13 @@ class TestMaskedConstant(object):
 
     def test_pickle(self):
         from io import BytesIO
-        import pickle
 
-        with BytesIO() as f:
-            pickle.dump(np.ma.masked, f)
-            f.seek(0)
-            res = pickle.load(f)
-        assert_(res is np.ma.masked)
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            with BytesIO() as f:
+                pickle.dump(np.ma.masked, f, protocol=proto)
+                f.seek(0)
+                res = pickle.load(f)
+            assert_(res is np.ma.masked)
 
     def test_copy(self):
         # gh-9328
@@ -4980,7 +5128,7 @@ def test_ufunc_with_out_varied():
     assert_equal(res_pos.data, expected.data)
 
 
-def test_astype():
+def test_astype_mask_ordering():
     descr = [('v', int, 3), ('x', [('y', float)])]
     x = array([
         [([1, 2, 3], (1.0,)),  ([1, 2, 3], (2.0,))],
@@ -5012,6 +5160,25 @@ def test_astype():
     assert_(x_f2.mask.flags.f_contiguous)
 
 
+@pytest.mark.parametrize('dt1', num_dts, ids=num_ids)
+@pytest.mark.parametrize('dt2', num_dts, ids=num_ids)
+@pytest.mark.filterwarnings('ignore::numpy.ComplexWarning')
+def test_astype_basic(dt1, dt2):
+    # See gh-12070
+    src = np.ma.array(ones(3, dt1), fill_value=1)
+    dst = src.astype(dt2)
+
+    assert_(src.fill_value == 1)
+    assert_(src.dtype == dt1)
+    assert_(src.fill_value.dtype == dt1)
+
+    assert_(dst.fill_value == 1)
+    assert_(dst.dtype == dt2)
+    assert_(dst.fill_value.dtype == dt2)
+
+    assert_equal(src, dst)
+
+
 def test_fieldless_void():
     dt = np.dtype([])  # a void dtype with no fields
     x = np.empty(4, dt)
diff --git a/numpy/ma/tests/test_mrecords.py b/numpy/ma/tests/test_mrecords.py
index e08dc1326..8b9e3fbc9 100644
--- a/numpy/ma/tests/test_mrecords.py
+++ b/numpy/ma/tests/test_mrecords.py
@@ -8,7 +8,6 @@
 from __future__ import division, absolute_import, print_function
 
 import warnings
-import pickle
 
 import numpy as np
 import numpy.ma as ma
@@ -26,6 +25,7 @@ from numpy.ma.testutils import (
     assert_, assert_equal,
     assert_equal_records,
     )
+from numpy.core.numeric import pickle
 
 
 class TestMRecords(object):
@@ -288,12 +288,13 @@ class TestMRecords(object):
         # Test pickling
         base = self.base.copy()
         mrec = base.view(mrecarray)
-        _ = pickle.dumps(mrec)
-        mrec_ = pickle.loads(_)
-        assert_equal(mrec_.dtype, mrec.dtype)
-        assert_equal_records(mrec_._data, mrec._data)
-        assert_equal(mrec_._mask, mrec._mask)
-        assert_equal_records(mrec_._mask, mrec._mask)
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            _ = pickle.dumps(mrec, protocol=proto)
+            mrec_ = pickle.loads(_)
+            assert_equal(mrec_.dtype, mrec.dtype)
+            assert_equal_records(mrec_._data, mrec._data)
+            assert_equal(mrec_._mask, mrec._mask)
+            assert_equal_records(mrec_._mask, mrec._mask)
 
     def test_filled(self):
         # Test filling the array
diff --git a/numpy/ma/tests/test_old_ma.py b/numpy/ma/tests/test_old_ma.py
index d7b1e3c18..807121184 100644
--- a/numpy/ma/tests/test_old_ma.py
+++ b/numpy/ma/tests/test_old_ma.py
@@ -22,6 +22,7 @@ from numpy.ma import (
     repeat, resize, shape, sin, sinh, sometrue, sort, sqrt, subtract, sum,
     take, tan, tanh, transpose, where, zeros,
     )
+from numpy.core.numeric import pickle
 
 pi = np.pi
 
@@ -549,13 +550,13 @@ class TestMa(object):
 
     def test_testPickle(self):
         # Test of pickling
-        import pickle
         x = arange(12)
         x[4:10:2] = masked
         x = x.reshape(4, 3)
-        s = pickle.dumps(x)
-        y = pickle.loads(s)
-        assert_(eq(x, y))
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            s = pickle.dumps(x, protocol=proto)
+            y = pickle.loads(s)
+            assert_(eq(x, y))
 
     def test_testMasked(self):
         # Test of masked element
diff --git a/numpy/matrixlib/tests/test_masked_matrix.py b/numpy/matrixlib/tests/test_masked_matrix.py
index 5ed8044aa..1751020db 100644
--- a/numpy/matrixlib/tests/test_masked_matrix.py
+++ b/numpy/matrixlib/tests/test_masked_matrix.py
@@ -1,6 +1,5 @@
 from __future__ import division, absolute_import, print_function
 
-import pickle
 import pytest
 
 import numpy as np
@@ -10,6 +9,7 @@ from numpy.ma.core import (masked_array, masked_values, masked, allequal,
                            MaskType, getmask, MaskedArray, nomask,
                            log, add, hypot, divide)
 from numpy.ma.extras import mr_
+from numpy.core.numeric import pickle
 
 
 class MMatrix(MaskedArray, np.matrix,):
@@ -79,10 +79,11 @@ class TestMaskedMatrix(object):
     def test_pickling_subbaseclass(self):
         # Test pickling w/ a subclass of ndarray
         a = masked_array(np.matrix(list(range(10))), mask=[1, 0, 1, 0, 0] * 2)
-        a_pickled = pickle.loads(a.dumps())
-        assert_equal(a_pickled._mask, a._mask)
-        assert_equal(a_pickled, a)
-        assert_(isinstance(a_pickled._data, np.matrix))
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            a_pickled = pickle.loads(pickle.dumps(a, protocol=proto))
+            assert_equal(a_pickled._mask, a._mask)
+            assert_equal(a_pickled, a)
+            assert_(isinstance(a_pickled._data, np.matrix))
 
     def test_count_mean_with_matrix(self):
         m = masked_array(np.matrix([[1, 2], [3, 4]]), mask=np.zeros((2, 2)))
diff --git a/numpy/random/mtrand/mtrand.pyx b/numpy/random/mtrand/mtrand.pyx
index 5097ad88f..6b054a20f 100644
--- a/numpy/random/mtrand/mtrand.pyx
+++ b/numpy/random/mtrand/mtrand.pyx
@@ -4143,15 +4143,15 @@ cdef class RandomState:
         if op.shape == ():
             fp = PyFloat_AsDouble(p)
 
-            if fp < 0.0:
-                raise ValueError("p < 0.0")
+            if fp <= 0.0:
+                raise ValueError("p <= 0.0")
             if fp > 1.0:
                 raise ValueError("p > 1.0")
             return discd_array_sc(self.internal_state, rk_geometric, size, fp,
                                   self.lock)
 
-        if np.any(np.less(op, 0.0)):
-            raise ValueError("p < 0.0")
+        if np.any(np.less_equal(op, 0.0)):
+            raise ValueError("p <= 0.0")
         if np.any(np.greater(op, 1.0)):
             raise ValueError("p > 1.0")
         return discd_array(self.internal_state, rk_geometric, size, op,
@@ -4836,9 +4836,8 @@ cdef class RandomState:
                     self._shuffle_raw(n, sizeof(npy_intp), stride, x_ptr, buf_ptr)
                 else:
                     self._shuffle_raw(n, itemsize, stride, x_ptr, buf_ptr)
-        elif isinstance(x, np.ndarray) and x.ndim > 1 and x.size:
-            # Multidimensional ndarrays require a bounce buffer.
-            buf = np.empty_like(x[0])
+        elif isinstance(x, np.ndarray) and x.ndim and x.size:
+            buf = np.empty_like(x[0,...])
             with self.lock:
                 for i in reversed(range(1, n)):
                     j = rk_interval(i, self.internal_state)
@@ -4907,8 +4906,8 @@ cdef class RandomState:
     
         # shuffle has fast-path for 1-d
         if arr.ndim == 1:
-            # must return a copy
-            if arr is x:
+            # Return a copy if same memory
+            if np.may_share_memory(arr, x):
                 arr = np.array(arr)
             self.shuffle(arr)
             return arr
diff --git a/numpy/random/tests/test_random.py b/numpy/random/tests/test_random.py
index 42816a943..276517363 100644
--- a/numpy/random/tests/test_random.py
+++ b/numpy/random/tests/test_random.py
@@ -466,6 +466,10 @@ class TestRandomDist(object):
                      lambda x: [(i, i) for i in x],
                      lambda x: np.asarray([[i, i] for i in x]),
                      lambda x: np.vstack([x, x]).T,
+                     # gh-11442
+                     lambda x: (np.asarray([(i, i) for i in x],
+                                           [("a", int), ("b", int)])
+                                .view(np.recarray)),
                      # gh-4270
                      lambda x: np.asarray([(i, i) for i in x],
                                           [("a", object, 1),
diff --git a/numpy/random/tests/test_regression.py b/numpy/random/tests/test_regression.py
index 3b4b4ed40..ca9bbbc71 100644
--- a/numpy/random/tests/test_regression.py
+++ b/numpy/random/tests/test_regression.py
@@ -133,3 +133,25 @@ class TestRegression(object):
         # Force Garbage Collection - should not segfault.
         import gc
         gc.collect()
+
+    def test_permutation_subclass(self):
+        class N(np.ndarray):
+            pass
+
+        np.random.seed(1)
+        orig = np.arange(3).view(N)
+        perm = np.random.permutation(orig)
+        assert_array_equal(perm, np.array([0, 2, 1]))
+        assert_array_equal(orig, np.arange(3).view(N))
+
+        class M(object):
+            a = np.arange(5)
+
+            def __array__(self):
+                return self.a
+
+        np.random.seed(1)
+        m = M()
+        perm = np.random.permutation(m)
+        assert_array_equal(perm, np.array([2, 1, 4, 0, 3]))
+        assert_array_equal(m.__array__(), np.arange(5))
diff --git a/numpy/testing/_private/utils.py b/numpy/testing/_private/utils.py
index a3832fcde..20a7dfd0b 100644
--- a/numpy/testing/_private/utils.py
+++ b/numpy/testing/_private/utils.py
@@ -19,7 +19,7 @@ from warnings import WarningMessage
 import pprint
 
 from numpy.core import(
-     float32, empty, arange, array_repr, ndarray, isnat, array)
+     bool_, float32, empty, arange, array_repr, ndarray, isnat, array)
 from numpy.lib.utils import deprecate
 
 if sys.version_info[0] >= 3:
@@ -352,7 +352,7 @@ def assert_equal(actual, desired, err_msg='', verbose=True):
     # XXX: catch ValueError for subclasses of ndarray where iscomplex fail
     try:
         usecomplex = iscomplexobj(actual) or iscomplexobj(desired)
-    except ValueError:
+    except (ValueError, TypeError):
         usecomplex = False
 
     if usecomplex:
@@ -692,6 +692,9 @@ def assert_array_compare(comparison, x, y, err_msg='', verbose=True,
     x = array(x, copy=False, subok=True)
     y = array(y, copy=False, subok=True)
 
+    # original array for output formating
+    ox, oy = x, y
+
     def isnumber(x):
         return x.dtype.char in '?bhilqpBHILQPefdgFDG'
 
@@ -705,15 +708,20 @@ def assert_array_compare(comparison, x, y, err_msg='', verbose=True,
         at the same locations.
 
         """
-        # Both the != True comparison here and the cast to bool_ at the end are
-        # done to deal with `masked`, which cannot be compared usefully, and
-        # for which np.all yields masked.  The use of the function np.all is
-        # for back compatibility with ndarray subclasses that changed the
-        # return values of the all method.  We are not committed to supporting
-        # such subclasses, but some used to work.
         x_id = func(x)
         y_id = func(y)
-        if npall(x_id == y_id) != True:
+        # We include work-arounds here to handle three types of slightly
+        # pathological ndarray subclasses:
+        # (1) all() on `masked` array scalars can return masked arrays, so we
+        #     use != True
+        # (2) __eq__ on some ndarray subclasses returns Python booleans
+        #     instead of element-wise comparisons, so we cast to bool_() and
+        #     use isinstance(..., bool) checks
+        # (3) subclasses with bare-bones __array_function__ implemenations may
+        #     not implement np.all(), so favor using the .all() method
+        # We are not committed to supporting such subclasses, but it's nice to
+        # support them if possible.
+        if bool_(x_id == y_id).all() != True:
             msg = build_err_msg([x, y],
                                 err_msg + '\nx and y %s location mismatch:'
                                 % (hasval), verbose=verbose, header=header,
@@ -721,9 +729,9 @@ def assert_array_compare(comparison, x, y, err_msg='', verbose=True,
             raise AssertionError(msg)
         # If there is a scalar, then here we know the array has the same
         # flag as it everywhere, so we should return the scalar flag.
-        if x_id.ndim == 0:
+        if isinstance(x_id, bool) or x_id.ndim == 0:
             return bool_(x_id)
-        elif y_id.ndim == 0:
+        elif isinstance(x_id, bool) or y_id.ndim == 0:
             return bool_(y_id)
         else:
             return y_id
@@ -780,10 +788,10 @@ def assert_array_compare(comparison, x, y, err_msg='', verbose=True,
         # do not trigger a failure (np.ma.masked != True evaluates as
         # np.ma.masked, which is falsy).
         if cond != True:
-            match = 100-100.0*reduced.count(1)/len(reduced)
-            msg = build_err_msg([x, y],
+            mismatch = 100.0 * reduced.count(0) / ox.size
+            msg = build_err_msg([ox, oy],
                                 err_msg
-                                + '\n(mismatch %s%%)' % (match,),
+                                + '\n(mismatch %s%%)' % (mismatch,),
                                 verbose=verbose, header=header,
                                 names=('x', 'y'), precision=precision)
             raise AssertionError(msg)
diff --git a/numpy/testing/tests/test_utils.py b/numpy/testing/tests/test_utils.py
index e0d3414f7..e54fbc390 100644
--- a/numpy/testing/tests/test_utils.py
+++ b/numpy/testing/tests/test_utils.py
@@ -158,6 +158,44 @@ class TestArrayEqual(_GenericTest):
         self._test_equal(a, b)
         self._test_equal(b, a)
 
+    def test_subclass_that_overrides_eq(self):
+        # While we cannot guarantee testing functions will always work for
+        # subclasses, the tests should ideally rely only on subclasses having
+        # comparison operators, not on them being able to store booleans
+        # (which, e.g., astropy Quantity cannot usefully do). See gh-8452.
+        class MyArray(np.ndarray):
+            def __eq__(self, other):
+                return bool(np.equal(self, other).all())
+
+            def __ne__(self, other):
+                return not self == other
+
+        a = np.array([1., 2.]).view(MyArray)
+        b = np.array([2., 3.]).view(MyArray)
+        assert_(type(a == a), bool)
+        assert_(a == a)
+        assert_(a != b)
+        self._test_equal(a, a)
+        self._test_not_equal(a, b)
+        self._test_not_equal(b, a)
+
+    def test_subclass_that_does_not_implement_npall(self):
+        # While we cannot guarantee testing functions will always work for
+        # subclasses, the tests should ideally rely only on subclasses having
+        # comparison operators, not on them being able to store booleans
+        # (which, e.g., astropy Quantity cannot usefully do). See gh-8452.
+        class MyArray(np.ndarray):
+            def __array_function__(self, *args, **kwargs):
+                return NotImplemented
+
+        a = np.array([1., 2.]).view(MyArray)
+        b = np.array([2., 3.]).view(MyArray)
+        with assert_raises(TypeError):
+            np.all(a)
+        self._test_equal(a, a)
+        self._test_not_equal(a, b)
+        self._test_not_equal(b, a)
+
 
 class TestBuildErrorMessage(object):
 
@@ -469,7 +507,8 @@ class TestAlmostEqual(_GenericTest):
         self._test_not_equal(x, z)
 
     def test_error_message(self):
-        """Check the message is formatted correctly for the decimal value"""
+        """Check the message is formatted correctly for the decimal value.
+           Also check the message when input includes inf or nan (gh12200)"""
         x = np.array([1.00000000001, 2.00000000002, 3.00003])
         y = np.array([1.00000000002, 2.00000000003, 3.00004])
 
@@ -493,6 +532,19 @@ class TestAlmostEqual(_GenericTest):
             # remove anything that's not the array string
             assert_equal(str(e).split('%)\n ')[1], b)
 
+        # Check the error message when input includes inf or nan
+        x = np.array([np.inf, 0])
+        y = np.array([np.inf, 1])
+        try:
+            self._assert_func(x, y)
+        except AssertionError as e:
+            msgs = str(e).split('\n')
+            # assert error percentage is 50%
+            assert_equal(msgs[3], '(mismatch 50.0%)')
+            # assert output array contains inf
+            assert_equal(msgs[4], ' x: array([inf,  0.])')
+            assert_equal(msgs[5], ' y: array([inf,  1.])')
+
     def test_subclass_that_cannot_be_bool(self):
         # While we cannot guarantee testing functions will always work for
         # subclasses, the tests should ideally rely only on subclasses having
@@ -1077,7 +1129,7 @@ class TestStringEqual(object):
 
         assert_raises(AssertionError,
                       lambda: assert_string_equal("foo", "hello"))
-        
+
     def test_regex(self):
         assert_string_equal("a+*b", "a+*b")
 
diff --git a/numpy/tests/test_reloading.py b/numpy/tests/test_reloading.py
index cd42252e3..a073d691f 100644
--- a/numpy/tests/test_reloading.py
+++ b/numpy/tests/test_reloading.py
@@ -1,9 +1,9 @@
 from __future__ import division, absolute_import, print_function
 
 import sys
-import pickle
 
 from numpy.testing import assert_raises, assert_, assert_equal
+from numpy.core.numeric import pickle
 
 if sys.version_info[:2] >= (3, 4):
     from importlib import reload
@@ -32,5 +32,7 @@ def test_numpy_reloading():
 
 def test_novalue():
     import numpy as np
-    assert_equal(repr(np._NoValue), '<no value>')
-    assert_(pickle.loads(pickle.dumps(np._NoValue)) is np._NoValue)
+    for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+        assert_equal(repr(np._NoValue), '<no value>')
+        assert_(pickle.loads(pickle.dumps(np._NoValue,
+                                          protocol=proto)) is np._NoValue)