168 files changed, 7144 insertions, 3790 deletions
diff --git a/numpy/_pytesttester.py b/numpy/_pytesttester.py
index 6a1b3274e..30ecc69c7 100644
--- a/numpy/_pytesttester.py
+++ b/numpy/_pytesttester.py
@@ -160,6 +160,24 @@ class PytestTester(object):
             "-W ignore::UserWarning:cpuinfo",
             ]
 
+        # When testing matrices, ignore their PendingDeprecationWarnings
+        pytest_args += [
+            "-W ignore:the matrix subclass is not",
+            ]
+
+        # Ignore python2.7 -3 warnings
+        pytest_args += [
+            r"-W ignore:sys\.exc_clear\(\) not supported in 3\.x:DeprecationWarning",
+            r"-W ignore:in 3\.x, __setslice__:DeprecationWarning",
+            r"-W ignore:in 3\.x, __getslice__:DeprecationWarning",
+            r"-W ignore:buffer\(\) not supported in 3\.x:DeprecationWarning",
+            r"-W ignore:CObject type is not supported in 3\.x:DeprecationWarning",
+            r"-W ignore:comparing unequal types not supported in 3\.x:DeprecationWarning",
+            r"-W ignore:the commands module has been removed in Python 3\.0:DeprecationWarning",
+            r"-W ignore:The 'new' module has been removed in Python 3\.0:DeprecationWarning",
+            ]
+
+
         if doctests:
             raise ValueError("Doctests not supported")
 
diff --git a/numpy/compat/py3k.py b/numpy/compat/py3k.py
index d5bb2e4c7..ce4543bc3 100644
--- a/numpy/compat/py3k.py
+++ b/numpy/compat/py3k.py
@@ -7,7 +7,8 @@ from __future__ import division, absolute_import, print_function
 __all__ = ['bytes', 'asbytes', 'isfileobj', 'getexception', 'strchar',
            'unicode', 'asunicode', 'asbytes_nested', 'asunicode_nested',
            'asstr', 'open_latin1', 'long', 'basestring', 'sixu',
-           'integer_types', 'is_pathlib_path', 'npy_load_module', 'Path']
+           'integer_types', 'is_pathlib_path', 'npy_load_module', 'Path',
+           'contextlib_nullcontext']
 
 import sys
 try:
@@ -97,6 +98,28 @@ def is_pathlib_path(obj):
     """
     return Path is not None and isinstance(obj, Path)
 
+# from Python 3.7
+class contextlib_nullcontext(object):
+    """Context manager that does no additional processing.
+
+    Used as a stand-in for a normal context manager, when a particular
+    block of code is only sometimes used with a normal context manager:
+
+    cm = optional_cm if condition else nullcontext()
+    with cm:
+        # Perform operation, using optional_cm if condition is True
+    """
+
+    def __init__(self, enter_result=None):
+        self.enter_result = enter_result
+
+    def __enter__(self):
+        return self.enter_result
+
+    def __exit__(self, *excinfo):
+        pass
+
+
 if sys.version_info[0] >= 3 and sys.version_info[1] >= 4:
     def npy_load_module(name, fn, info=None):
         """
diff --git a/numpy/conftest.py b/numpy/conftest.py
index 7b1771748..4d4d055ec 100644
--- a/numpy/conftest.py
+++ b/numpy/conftest.py
@@ -3,10 +3,8 @@ Pytest configuration and fixtures for the Numpy test suite.
 """
 from __future__ import division, absolute_import, print_function
 
-import warnings
 import pytest
 import numpy
-import importlib
 
 from numpy.core._multiarray_tests import get_fpu_mode
 
diff --git a/numpy/core/__init__.py b/numpy/core/__init__.py
index 9ef30b018..80ce84f00 100644
--- a/numpy/core/__init__.py
+++ b/numpy/core/__init__.py
@@ -15,14 +15,35 @@ for envkey in ['OPENBLAS_MAIN_FREE', 'GOTOBLAS_MAIN_FREE']:
 try:
     from . import multiarray
 except ImportError as exc:
+    import sys
     msg = """
+
+IMPORTANT: PLEASE READ THIS FOR ADVICE ON HOW TO SOLVE THIS ISSUE!
+
 Importing the multiarray numpy extension module failed.  Most
 likely you are trying to import a failed build of numpy.
-If you're working with a numpy git repo, try `git clean -xdf` (removes all
-files not under version control).  Otherwise reinstall numpy.
+Here is how to proceed:
+- If you're working with a numpy git repository, try `git clean -xdf`
+  (removes all files not under version control) and rebuild numpy.
+- If you are simply trying to use the numpy version that you have installed:
+  your installation is broken - please reinstall numpy.
+- If you have already reinstalled and that did not fix the problem, then:
+  1. Check that you are using the Python you expect (you're using %s),
+     and that you have no directories in your PATH or PYTHONPATH that can
+     interfere with the Python and numpy versions you're trying to use.
+  2. If (1) looks fine, you can open a new issue at
+     https://github.com/numpy/numpy/issues.  Please include details on:
+     - how you installed Python
+     - how you installed numpy
+     - your operating system
+     - whether or not you have multiple versions of Python installed
+     - if you built from source, your compiler versions and ideally a build log
+
+     Note: this error has many possible causes, so please don't comment on
+     an existing issue about this - open a new one instead.
 
 Original error was: %s
-""" % (exc,)
+""" % (sys.executable, exc)
     raise ImportError(msg)
 finally:
     for envkey in env_added:
diff --git a/numpy/core/_add_newdocs.py b/numpy/core/_add_newdocs.py
index 9ebd12cbd..ea472f1b3 100644
--- a/numpy/core/_add_newdocs.py
+++ b/numpy/core/_add_newdocs.py
@@ -10,6 +10,8 @@ NOTE: Many of the methods of ndarray have corresponding functions.
 """
 from __future__ import division, absolute_import, print_function
 
+from numpy.core import numerictypes as _numerictypes
+from numpy.core import dtype
 from numpy.core.function_base import add_newdoc
 
 ###############################################################################
@@ -945,66 +947,6 @@ add_newdoc('numpy.core.multiarray', 'empty',
 
     """)
 
-add_newdoc('numpy.core.multiarray', 'empty_like',
-    """
-    empty_like(prototype, dtype=None, order='K', subok=True)
-
-    Return a new array with the same shape and type as a given array.
-
-    Parameters
-    ----------
-    prototype : array_like
-        The shape and data-type of `prototype` define these same attributes
-        of the returned array.
-    dtype : data-type, optional
-        Overrides the data type of the result.
-
-        .. versionadded:: 1.6.0
-    order : {'C', 'F', 'A', or 'K'}, optional
-        Overrides the memory layout of the result. 'C' means C-order,
-        'F' means F-order, 'A' means 'F' if ``prototype`` is Fortran
-        contiguous, 'C' otherwise. 'K' means match the layout of ``prototype``
-        as closely as possible.
-
-        .. versionadded:: 1.6.0
-    subok : bool, optional.
-        If True, then the newly created array will use the sub-class
-        type of 'a', otherwise it will be a base-class array. Defaults
-        to True.
-
-    Returns
-    -------
-    out : ndarray
-        Array of uninitialized (arbitrary) data with the same
-        shape and type as `prototype`.
-
-    See Also
-    --------
-    ones_like : Return an array of ones with shape and type of input.
-    zeros_like : Return an array of zeros with shape and type of input.
-    full_like : Return a new array with shape of input filled with value.
-    empty : Return a new uninitialized array.
-
-    Notes
-    -----
-    This function does *not* initialize the returned array; to do that use
-    `zeros_like` or `ones_like` instead.  It may be marginally faster than
-    the functions that do set the array values.
-
-    Examples
-    --------
-    >>> a = ([1,2,3], [4,5,6])                         # a is array-like
-    >>> np.empty_like(a)
-    array([[-1073741821, -1073741821,           3],    #random
-           [          0,           0, -1073741821]])
-    >>> a = np.array([[1., 2., 3.],[4.,5.,6.]])
-    >>> np.empty_like(a)
-    array([[ -2.00000715e+000,   1.48219694e-323,  -2.00000572e+000],#random
-           [  4.38791518e-305,  -2.00000715e+000,   4.17269252e-309]])
-
-    """)
-
-
 add_newdoc('numpy.core.multiarray', 'scalar',
     """
     scalar(dtype, obj)
@@ -1282,163 +1224,6 @@ add_newdoc('numpy.core.multiarray', 'frombuffer',
 
     """)
 
-add_newdoc('numpy.core.multiarray', 'concatenate',
-    """
-    concatenate((a1, a2, ...), axis=0, out=None)
-
-    Join a sequence of arrays along an existing axis.
-
-    Parameters
-    ----------
-    a1, a2, ... : sequence of array_like
-        The arrays must have the same shape, except in the dimension
-        corresponding to `axis` (the first, by default).
-    axis : int, optional
-        The axis along which the arrays will be joined.  If axis is None,
-        arrays are flattened before use.  Default is 0.
-    out : ndarray, optional
-        If provided, the destination to place the result. The shape must be
-        correct, matching that of what concatenate would have returned if no
-        out argument were specified.
-
-    Returns
-    -------
-    res : ndarray
-        The concatenated array.
-
-    See Also
-    --------
-    ma.concatenate : Concatenate function that preserves input masks.
-    array_split : Split an array into multiple sub-arrays of equal or
-                  near-equal size.
-    split : Split array into a list of multiple sub-arrays of equal size.
-    hsplit : Split array into multiple sub-arrays horizontally (column wise)
-    vsplit : Split array into multiple sub-arrays vertically (row wise)
-    dsplit : Split array into multiple sub-arrays along the 3rd axis (depth).
-    stack : Stack a sequence of arrays along a new axis.
-    hstack : Stack arrays in sequence horizontally (column wise)
-    vstack : Stack arrays in sequence vertically (row wise)
-    dstack : Stack arrays in sequence depth wise (along third dimension)
-    block : Assemble arrays from blocks.
-
-    Notes
-    -----
-    When one or more of the arrays to be concatenated is a MaskedArray,
-    this function will return a MaskedArray object instead of an ndarray,
-    but the input masks are *not* preserved. In cases where a MaskedArray
-    is expected as input, use the ma.concatenate function from the masked
-    array module instead.
-
-    Examples
-    --------
-    >>> a = np.array([[1, 2], [3, 4]])
-    >>> b = np.array([[5, 6]])
-    >>> np.concatenate((a, b), axis=0)
-    array([[1, 2],
-           [3, 4],
-           [5, 6]])
-    >>> np.concatenate((a, b.T), axis=1)
-    array([[1, 2, 5],
-           [3, 4, 6]])
-    >>> np.concatenate((a, b), axis=None)
-    array([1, 2, 3, 4, 5, 6])
-
-    This function will not preserve masking of MaskedArray inputs.
-
-    >>> a = np.ma.arange(3)
-    >>> a[1] = np.ma.masked
-    >>> b = np.arange(2, 5)
-    >>> a
-    masked_array(data=[0, --, 2],
-                 mask=[False,  True, False],
-           fill_value=999999)
-    >>> b
-    array([2, 3, 4])
-    >>> np.concatenate([a, b])
-    masked_array(data=[0, 1, 2, 2, 3, 4],
-                 mask=False,
-           fill_value=999999)
-    >>> np.ma.concatenate([a, b])
-    masked_array(data=[0, --, 2, 2, 3, 4],
-                 mask=[False,  True, False, False, False, False],
-           fill_value=999999)
-
-    """)
-
-add_newdoc('numpy.core', 'inner',
-    """
-    inner(a, b)
-
-    Inner product of two arrays.
-
-    Ordinary inner product of vectors for 1-D arrays (without complex
-    conjugation), in higher dimensions a sum product over the last axes.
-
-    Parameters
-    ----------
-    a, b : array_like
-        If `a` and `b` are nonscalar, their last dimensions must match.
-
-    Returns
-    -------
-    out : ndarray
-        `out.shape = a.shape[:-1] + b.shape[:-1]`
-
-    Raises
-    ------
-    ValueError
-        If the last dimension of `a` and `b` has different size.
-
-    See Also
-    --------
-    tensordot : Sum products over arbitrary axes.
-    dot : Generalised matrix product, using second last dimension of `b`.
-    einsum : Einstein summation convention.
-
-    Notes
-    -----
-    For vectors (1-D arrays) it computes the ordinary inner-product::
-
-        np.inner(a, b) = sum(a[:]*b[:])
-
-    More generally, if `ndim(a) = r > 0` and `ndim(b) = s > 0`::
-
-        np.inner(a, b) = np.tensordot(a, b, axes=(-1,-1))
-
-    or explicitly::
-
-        np.inner(a, b)[i0,...,ir-1,j0,...,js-1]
-             = sum(a[i0,...,ir-1,:]*b[j0,...,js-1,:])
-
-    In addition `a` or `b` may be scalars, in which case::
-
-       np.inner(a,b) = a*b
-
-    Examples
-    --------
-    Ordinary inner product for vectors:
-
-    >>> a = np.array([1,2,3])
-    >>> b = np.array([0,1,0])
-    >>> np.inner(a, b)
-    2
-
-    A multidimensional example:
-
-    >>> a = np.arange(24).reshape((2,3,4))
-    >>> b = np.arange(4)
-    >>> np.inner(a, b)
-    array([[ 14,  38,  62],
-           [ 86, 110, 134]])
-
-    An example where `b` is a scalar:
-
-    >>> np.inner(np.eye(2), 7)
-    array([[ 7.,  0.],
-           [ 0.,  7.]])
-
-    """)
-
 add_newdoc('numpy.core', 'fastCopyAndTranspose',
     """_fastCopyAndTranspose(a)""")
 
@@ -1573,263 +1358,6 @@ add_newdoc('numpy.core.multiarray', 'set_numeric_ops',
 
     """)
 
-add_newdoc('numpy.core.multiarray', 'where',
-    """
-    where(condition, [x, y])
-
-    Return elements chosen from `x` or `y` depending on `condition`.
-
-    .. note::
-        When only `condition` is provided, this function is a shorthand for
-        ``np.asarray(condition).nonzero()``. Using `nonzero` directly should be
-        preferred, as it behaves correctly for subclasses. The rest of this
-        documentation covers only the case where all three arguments are
-        provided.
-
-    Parameters
-    ----------
-    condition : array_like, bool
-        Where True, yield `x`, otherwise yield `y`.
-    x, y : array_like
-        Values from which to choose. `x`, `y` and `condition` need to be
-        broadcastable to some shape.
-
-    Returns
-    -------
-    out : ndarray
-        An array with elements from `x` where `condition` is True, and elements
-        from `y` elsewhere.
-
-    See Also
-    --------
-    choose
-    nonzero : The function that is called when x and y are omitted
-
-    Notes
-    -----
-    If all the arrays are 1-D, `where` is equivalent to::
-
-        [xv if c else yv
-         for c, xv, yv in zip(condition, x, y)]
-
-    Examples
-    --------
-    >>> a = np.arange(10)
-    >>> a
-    array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
-    >>> np.where(a < 5, a, 10*a)
-    array([ 0,  1,  2,  3,  4, 50, 60, 70, 80, 90])
-
-    This can be used on multidimensional arrays too:
-
-    >>> np.where([[True, False], [True, True]],
-    ...          [[1, 2], [3, 4]],
-    ...          [[9, 8], [7, 6]])
-    array([[1, 8],
-           [3, 4]])
-
-    The shapes of x, y, and the condition are broadcast together:
-
-    >>> x, y = np.ogrid[:3, :4]
-    >>> np.where(x < y, x, 10 + y)  # both x and 10+y are broadcast
-    array([[10,  0,  0,  0],
-           [10, 11,  1,  1],
-           [10, 11, 12,  2]])
-
-    >>> a = np.array([[0, 1, 2],
-    ...               [0, 2, 4],
-    ...               [0, 3, 6]])
-    >>> np.where(a < 4, a, -1)  # -1 is broadcast
-    array([[ 0,  1,  2],
-           [ 0,  2, -1],
-           [ 0,  3, -1]])
-    """)
-
-
-add_newdoc('numpy.core.multiarray', 'lexsort',
-    """
-    lexsort(keys, axis=-1)
-
-    Perform an indirect stable sort using a sequence of keys.
-
-    Given multiple sorting keys, which can be interpreted as columns in a
-    spreadsheet, lexsort returns an array of integer indices that describes
-    the sort order by multiple columns. The last key in the sequence is used
-    for the primary sort order, the second-to-last key for the secondary sort
-    order, and so on. The keys argument must be a sequence of objects that
-    can be converted to arrays of the same shape. If a 2D array is provided
-    for the keys argument, it's rows are interpreted as the sorting keys and
-    sorting is according to the last row, second last row etc.
-
-    Parameters
-    ----------
-    keys : (k, N) array or tuple containing k (N,)-shaped sequences
-        The `k` different "columns" to be sorted.  The last column (or row if
-        `keys` is a 2D array) is the primary sort key.
-    axis : int, optional
-        Axis to be indirectly sorted.  By default, sort over the last axis.
-
-    Returns
-    -------
-    indices : (N,) ndarray of ints
-        Array of indices that sort the keys along the specified axis.
-
-    See Also
-    --------
-    argsort : Indirect sort.
-    ndarray.sort : In-place sort.
-    sort : Return a sorted copy of an array.
-
-    Examples
-    --------
-    Sort names: first by surname, then by name.
-
-    >>> surnames =    ('Hertz',    'Galilei', 'Hertz')
-    >>> first_names = ('Heinrich', 'Galileo', 'Gustav')
-    >>> ind = np.lexsort((first_names, surnames))
-    >>> ind
-    array([1, 2, 0])
-
-    >>> [surnames[i] + ", " + first_names[i] for i in ind]
-    ['Galilei, Galileo', 'Hertz, Gustav', 'Hertz, Heinrich']
-
-    Sort two columns of numbers:
-
-    >>> a = [1,5,1,4,3,4,4] # First column
-    >>> b = [9,4,0,4,0,2,1] # Second column
-    >>> ind = np.lexsort((b,a)) # Sort by a, then by b
-    >>> print(ind)
-    [2 0 4 6 5 3 1]
-
-    >>> [(a[i],b[i]) for i in ind]
-    [(1, 0), (1, 9), (3, 0), (4, 1), (4, 2), (4, 4), (5, 4)]
-
-    Note that sorting is first according to the elements of ``a``.
-    Secondary sorting is according to the elements of ``b``.
-
-    A normal ``argsort`` would have yielded:
-
-    >>> [(a[i],b[i]) for i in np.argsort(a)]
-    [(1, 9), (1, 0), (3, 0), (4, 4), (4, 2), (4, 1), (5, 4)]
-
-    Structured arrays are sorted lexically by ``argsort``:
-
-    >>> x = np.array([(1,9), (5,4), (1,0), (4,4), (3,0), (4,2), (4,1)],
-    ...              dtype=np.dtype([('x', int), ('y', int)]))
-
-    >>> np.argsort(x) # or np.argsort(x, order=('x', 'y'))
-    array([2, 0, 4, 6, 5, 3, 1])
-
-    """)
-
-add_newdoc('numpy.core.multiarray', 'can_cast',
-    """
-    can_cast(from_, to, casting='safe')
-
-    Returns True if cast between data types can occur according to the
-    casting rule.  If from is a scalar or array scalar, also returns
-    True if the scalar value can be cast without overflow or truncation
-    to an integer.
-
-    Parameters
-    ----------
-    from_ : dtype, dtype specifier, scalar, or array
-        Data type, scalar, or array to cast from.
-    to : dtype or dtype specifier
-        Data type to cast to.
-    casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
-        Controls what kind of data casting may occur.
-
-          * 'no' means the data types should not be cast at all.
-          * 'equiv' means only byte-order changes are allowed.
-          * 'safe' means only casts which can preserve values are allowed.
-          * 'same_kind' means only safe casts or casts within a kind,
-            like float64 to float32, are allowed.
-          * 'unsafe' means any data conversions may be done.
-
-    Returns
-    -------
-    out : bool
-        True if cast can occur according to the casting rule.
-
-    Notes
-    -----
-    Starting in NumPy 1.9, can_cast function now returns False in 'safe'
-    casting mode for integer/float dtype and string dtype if the string dtype
-    length is not long enough to store the max integer/float value converted
-    to a string. Previously can_cast in 'safe' mode returned True for
-    integer/float dtype and a string dtype of any length.
-
-    See also
-    --------
-    dtype, result_type
-
-    Examples
-    --------
-    Basic examples
-
-    >>> np.can_cast(np.int32, np.int64)
-    True
-    >>> np.can_cast(np.float64, complex)
-    True
-    >>> np.can_cast(complex, float)
-    False
-
-    >>> np.can_cast('i8', 'f8')
-    True
-    >>> np.can_cast('i8', 'f4')
-    False
-    >>> np.can_cast('i4', 'S4')
-    False
-
-    Casting scalars
-
-    >>> np.can_cast(100, 'i1')
-    True
-    >>> np.can_cast(150, 'i1')
-    False
-    >>> np.can_cast(150, 'u1')
-    True
-
-    >>> np.can_cast(3.5e100, np.float32)
-    False
-    >>> np.can_cast(1000.0, np.float32)
-    True
-
-    Array scalar checks the value, array does not
-
-    >>> np.can_cast(np.array(1000.0), np.float32)
-    True
-    >>> np.can_cast(np.array([1000.0]), np.float32)
-    False
-
-    Using the casting rules
-
-    >>> np.can_cast('i8', 'i8', 'no')
-    True
-    >>> np.can_cast('<i8', '>i8', 'no')
-    False
-
-    >>> np.can_cast('<i8', '>i8', 'equiv')
-    True
-    >>> np.can_cast('<i4', '>i8', 'equiv')
-    False
-
-    >>> np.can_cast('<i4', '>i8', 'safe')
-    True
-    >>> np.can_cast('<i8', '>i4', 'safe')
-    False
-
-    >>> np.can_cast('<i8', '>i4', 'same_kind')
-    True
-    >>> np.can_cast('<i8', '>u4', 'same_kind')
-    False
-
-    >>> np.can_cast('<i8', '>u4', 'unsafe')
-    True
-
-    """)
-
 add_newdoc('numpy.core.multiarray', 'promote_types',
     """
     promote_types(type1, type2)
@@ -1890,123 +1418,6 @@ add_newdoc('numpy.core.multiarray', 'promote_types',
 
     """)
 
-add_newdoc('numpy.core.multiarray', 'min_scalar_type',
-    """
-    min_scalar_type(a)
-
-    For scalar ``a``, returns the data type with the smallest size
-    and smallest scalar kind which can hold its value.  For non-scalar
-    array ``a``, returns the vector's dtype unmodified.
-
-    Floating point values are not demoted to integers,
-    and complex values are not demoted to floats.
-
-    Parameters
-    ----------
-    a : scalar or array_like
-        The value whose minimal data type is to be found.
-
-    Returns
-    -------
-    out : dtype
-        The minimal data type.
-
-    Notes
-    -----
-    .. versionadded:: 1.6.0
-
-    See Also
-    --------
-    result_type, promote_types, dtype, can_cast
-
-    Examples
-    --------
-    >>> np.min_scalar_type(10)
-    dtype('uint8')
-
-    >>> np.min_scalar_type(-260)
-    dtype('int16')
-
-    >>> np.min_scalar_type(3.1)
-    dtype('float16')
-
-    >>> np.min_scalar_type(1e50)
-    dtype('float64')
-
-    >>> np.min_scalar_type(np.arange(4,dtype='f8'))
-    dtype('float64')
-
-    """)
-
-add_newdoc('numpy.core.multiarray', 'result_type',
-    """
-    result_type(*arrays_and_dtypes)
-
-    Returns the type that results from applying the NumPy
-    type promotion rules to the arguments.
-
-    Type promotion in NumPy works similarly to the rules in languages
-    like C++, with some slight differences.  When both scalars and
-    arrays are used, the array's type takes precedence and the actual value
-    of the scalar is taken into account.
-
-    For example, calculating 3*a, where a is an array of 32-bit floats,
-    intuitively should result in a 32-bit float output.  If the 3 is a
-    32-bit integer, the NumPy rules indicate it can't convert losslessly
-    into a 32-bit float, so a 64-bit float should be the result type.
-    By examining the value of the constant, '3', we see that it fits in
-    an 8-bit integer, which can be cast losslessly into the 32-bit float.
-
-    Parameters
-    ----------
-    arrays_and_dtypes : list of arrays and dtypes
-        The operands of some operation whose result type is needed.
-
-    Returns
-    -------
-    out : dtype
-        The result type.
-
-    See also
-    --------
-    dtype, promote_types, min_scalar_type, can_cast
-
-    Notes
-    -----
-    .. versionadded:: 1.6.0
-
-    The specific algorithm used is as follows.
-
-    Categories are determined by first checking which of boolean,
-    integer (int/uint), or floating point (float/complex) the maximum
-    kind of all the arrays and the scalars are.
-
-    If there are only scalars or the maximum category of the scalars
-    is higher than the maximum category of the arrays,
-    the data types are combined with :func:`promote_types`
-    to produce the return value.
-
-    Otherwise, `min_scalar_type` is called on each array, and
-    the resulting data types are all combined with :func:`promote_types`
-    to produce the return value.
-
-    The set of int values is not a subset of the uint values for types
-    with the same number of bits, something not reflected in
-    :func:`min_scalar_type`, but handled as a special case in `result_type`.
-
-    Examples
-    --------
-    >>> np.result_type(3, np.arange(7, dtype='i1'))
-    dtype('int8')
-
-    >>> np.result_type('i4', 'c8')
-    dtype('complex128')
-
-    >>> np.result_type(3.0, -2)
-    dtype('float64')
-
-    """)
-
 add_newdoc('numpy.core.multiarray', 'newbuffer',
     """
     newbuffer(size)
@@ -2059,91 +1470,6 @@ add_newdoc('numpy.core.multiarray', 'getbuffer',
 
     """)
 
-add_newdoc('numpy.core', 'dot',
-    """
-    dot(a, b, out=None)
-
-    Dot product of two arrays. Specifically,
-
-    - If both `a` and `b` are 1-D arrays, it is inner product of vectors
-      (without complex conjugation).
-
-    - If both `a` and `b` are 2-D arrays, it is matrix multiplication,
-      but using :func:`matmul` or ``a @ b`` is preferred.
-
-    - If either `a` or `b` is 0-D (scalar), it is equivalent to :func:`multiply`
-      and using ``numpy.multiply(a, b)`` or ``a * b`` is preferred.
-
-    - If `a` is an N-D array and `b` is a 1-D array, it is a sum product over
-      the last axis of `a` and `b`.
-
-    - If `a` is an N-D array and `b` is an M-D array (where ``M>=2``), it is a
-      sum product over the last axis of `a` and the second-to-last axis of `b`::
-
-        dot(a, b)[i,j,k,m] = sum(a[i,j,:] * b[k,:,m])
-
-    Parameters
-    ----------
-    a : array_like
-        First argument.
-    b : array_like
-        Second argument.
-    out : ndarray, optional
-        Output argument. This must have the exact kind that would be returned
-        if it was not used. In particular, it must have the right type, must be
-        C-contiguous, and its dtype must be the dtype that would be returned
-        for `dot(a,b)`. This is a performance feature. Therefore, if these
-        conditions are not met, an exception is raised, instead of attempting
-        to be flexible.
-
-    Returns
-    -------
-    output : ndarray
-        Returns the dot product of `a` and `b`.  If `a` and `b` are both
-        scalars or both 1-D arrays then a scalar is returned; otherwise
-        an array is returned.
-        If `out` is given, then it is returned.
-
-    Raises
-    ------
-    ValueError
-        If the last dimension of `a` is not the same size as
-        the second-to-last dimension of `b`.
-
-    See Also
-    --------
-    vdot : Complex-conjugating dot product.
-    tensordot : Sum products over arbitrary axes.
-    einsum : Einstein summation convention.
-    matmul : '@' operator as method with out parameter.
-
-    Examples
-    --------
-    >>> np.dot(3, 4)
-    12
-
-    Neither argument is complex-conjugated:
-
-    >>> np.dot([2j, 3j], [2j, 3j])
-    (-13+0j)
-
-    For 2-D arrays it is the matrix product:
-
-    >>> a = [[1, 0], [0, 1]]
-    >>> b = [[4, 1], [2, 2]]
-    >>> np.dot(a, b)
-    array([[4, 1],
-           [2, 2]])
-
-    >>> a = np.arange(3*4*5*6).reshape((3,4,5,6))
-    >>> b = np.arange(3*4*5*6)[::-1].reshape((5,4,6,3))
-    >>> np.dot(a, b)[2,3,2,1,2,2]
-    499128
-    >>> sum(a[2,3,2,:] * b[1,2,:,2])
-    499128
-
-    """)
-
 add_newdoc('numpy.core', 'matmul',
     """
     matmul(a, b, out=None)
@@ -2267,61 +1593,6 @@ add_newdoc('numpy.core', 'matmul',
 
     """)
 
-add_newdoc('numpy.core', 'vdot',
-    """
-    vdot(a, b)
-
-    Return the dot product of two vectors.
-
-    The vdot(`a`, `b`) function handles complex numbers differently than
-    dot(`a`, `b`).  If the first argument is complex the complex conjugate
-    of the first argument is used for the calculation of the dot product.
-
-    Note that `vdot` handles multidimensional arrays differently than `dot`:
-    it does *not* perform a matrix product, but flattens input arguments
-    to 1-D vectors first. Consequently, it should only be used for vectors.
-
-    Parameters
-    ----------
-    a : array_like
-        If `a` is complex the complex conjugate is taken before calculation
-        of the dot product.
-    b : array_like
-        Second argument to the dot product.
-
-    Returns
-    -------
-    output : ndarray
-        Dot product of `a` and `b`.  Can be an int, float, or
-        complex depending on the types of `a` and `b`.
-
-    See Also
-    --------
-    dot : Return the dot product without using the complex conjugate of the
-          first argument.
-
-    Examples
-    --------
-    >>> a = np.array([1+2j,3+4j])
-    >>> b = np.array([5+6j,7+8j])
-    >>> np.vdot(a, b)
-    (70-8j)
-    >>> np.vdot(b, a)
-    (70+8j)
-
-    Note that higher-dimensional arrays are flattened!
-
-    >>> a = np.array([[1, 4], [5, 6]])
-    >>> b = np.array([[4, 1], [2, 2]])
-    >>> np.vdot(a, b)
-    30
-    >>> np.vdot(b, a)
-    30
-    >>> 1*4 + 4*1 + 5*2 + 6*2
-    30
-
-    """)
-
 add_newdoc('numpy.core.multiarray', 'c_einsum',
     """
     c_einsum(subscripts, *operands, out=None, dtype=None, order='K',
@@ -5408,7 +4679,7 @@ add_newdoc('numpy.core.multiarray', 'ravel_multi_index',
 
 add_newdoc('numpy.core.multiarray', 'unravel_index',
     """
-    unravel_index(indices, dims, order='C')
+    unravel_index(indices, shape, order='C')
 
     Converts a flat index or array of flat indices into a tuple
     of coordinate arrays.
@@ -5417,10 +4688,14 @@ add_newdoc('numpy.core.multiarray', 'unravel_index',
     ----------
     indices : array_like
         An integer array whose elements are indices into the flattened
-        version of an array of dimensions ``dims``. Before version 1.6.0,
+        version of an array of dimensions ``shape``. Before version 1.6.0,
         this function accepted just one index value.
-    dims : tuple of ints
+    shape : tuple of ints
         The shape of the array to use for unraveling ``indices``.
+
+        .. versionchanged:: 1.16.0
+            Renamed from ``dims`` to ``shape``.
+
     order : {'C', 'F'}, optional
         Determines whether the indices should be viewed as indexing in
         row-major (C-style) or column-major (Fortran-style) order.
@@ -6457,6 +5732,7 @@ add_newdoc('numpy.core.multiarray', 'dtype', ('fields',
 
       (dtype, offset[, title])
 
+    Offset is limited to C int, which is signed and usually 32 bits.
     If present, the optional title can be any object (if it is a string
     or unicode then it will also be a key in the fields dictionary,
     otherwise it's meta-data). Notice also that the first two elements
@@ -6788,211 +6064,6 @@ add_newdoc('numpy.core.multiarray', 'busdaycalendar', ('weekmask',
 add_newdoc('numpy.core.multiarray', 'busdaycalendar', ('holidays',
     """A copy of the holiday array indicating additional invalid days."""))
 
-add_newdoc('numpy.core.multiarray', 'is_busday',
-    """
-    is_busday(dates, weekmask='1111100', holidays=None, busdaycal=None, out=None)
-
-    Calculates which of the given dates are valid days, and which are not.
-
-    .. versionadded:: 1.7.0
-
-    Parameters
-    ----------
-    dates : array_like of datetime64[D]
-        The array of dates to process.
-    weekmask : str or array_like of bool, optional
-        A seven-element array indicating which of Monday through Sunday are
-        valid days. May be specified as a length-seven list or array, like
-        [1,1,1,1,1,0,0]; a length-seven string, like '1111100'; or a string
-        like "Mon Tue Wed Thu Fri", made up of 3-character abbreviations for
-        weekdays, optionally separated by white space. Valid abbreviations
-        are: Mon Tue Wed Thu Fri Sat Sun
-    holidays : array_like of datetime64[D], optional
-        An array of dates to consider as invalid dates.  They may be
-        specified in any order, and NaT (not-a-time) dates are ignored.
-        This list is saved in a normalized form that is suited for
-        fast calculations of valid days.
-    busdaycal : busdaycalendar, optional
-        A `busdaycalendar` object which specifies the valid days. If this
-        parameter is provided, neither weekmask nor holidays may be
-        provided.
-    out : array of bool, optional
-        If provided, this array is filled with the result.
-
-    Returns
-    -------
-    out : array of bool
-        An array with the same shape as ``dates``, containing True for
-        each valid day, and False for each invalid day.
-
-    See Also
-    --------
-    busdaycalendar: An object that specifies a custom set of valid days.
-    busday_offset : Applies an offset counted in valid days.
-    busday_count : Counts how many valid days are in a half-open date range.
-
-    Examples
-    --------
-    >>> # The weekdays are Friday, Saturday, and Monday
-    ... np.is_busday(['2011-07-01', '2011-07-02', '2011-07-18'],
-    ...                 holidays=['2011-07-01', '2011-07-04', '2011-07-17'])
-    array([False, False,  True], dtype='bool')
-    """)
-
-add_newdoc('numpy.core.multiarray', 'busday_offset',
-    """
-    busday_offset(dates, offsets, roll='raise', weekmask='1111100', holidays=None, busdaycal=None, out=None)
-
-    First adjusts the date to fall on a valid day according to
-    the ``roll`` rule, then applies offsets to the given dates
-    counted in valid days.
-
-    .. versionadded:: 1.7.0
-
-    Parameters
-    ----------
-    dates : array_like of datetime64[D]
-        The array of dates to process.
-    offsets : array_like of int
-        The array of offsets, which is broadcast with ``dates``.
-    roll : {'raise', 'nat', 'forward', 'following', 'backward', 'preceding', 'modifiedfollowing', 'modifiedpreceding'}, optional
-        How to treat dates that do not fall on a valid day. The default
-        is 'raise'.
-
-          * 'raise' means to raise an exception for an invalid day.
-          * 'nat' means to return a NaT (not-a-time) for an invalid day.
-          * 'forward' and 'following' mean to take the first valid day
-            later in time.
-          * 'backward' and 'preceding' mean to take the first valid day
-            earlier in time.
-          * 'modifiedfollowing' means to take the first valid day
-            later in time unless it is across a Month boundary, in which
-            case to take the first valid day earlier in time.
-          * 'modifiedpreceding' means to take the first valid day
-            earlier in time unless it is across a Month boundary, in which
-            case to take the first valid day later in time.
-    weekmask : str or array_like of bool, optional
-        A seven-element array indicating which of Monday through Sunday are
-        valid days. May be specified as a length-seven list or array, like
-        [1,1,1,1,1,0,0]; a length-seven string, like '1111100'; or a string
-        like "Mon Tue Wed Thu Fri", made up of 3-character abbreviations for
-        weekdays, optionally separated by white space. Valid abbreviations
-        are: Mon Tue Wed Thu Fri Sat Sun
-    holidays : array_like of datetime64[D], optional
-        An array of dates to consider as invalid dates.  They may be
-        specified in any order, and NaT (not-a-time) dates are ignored.
-        This list is saved in a normalized form that is suited for
-        fast calculations of valid days.
-    busdaycal : busdaycalendar, optional
-        A `busdaycalendar` object which specifies the valid days. If this
-        parameter is provided, neither weekmask nor holidays may be
-        provided.
-    out : array of datetime64[D], optional
-        If provided, this array is filled with the result.
-
-    Returns
-    -------
-    out : array of datetime64[D]
-        An array with a shape from broadcasting ``dates`` and ``offsets``
-        together, containing the dates with offsets applied.
-
-    See Also
-    --------
-    busdaycalendar: An object that specifies a custom set of valid days.
-    is_busday : Returns a boolean array indicating valid days.
-    busday_count : Counts how many valid days are in a half-open date range.
-
-    Examples
-    --------
-    >>> # First business day in October 2011 (not accounting for holidays)
-    ... np.busday_offset('2011-10', 0, roll='forward')
-    numpy.datetime64('2011-10-03','D')
-    >>> # Last business day in February 2012 (not accounting for holidays)
-    ... np.busday_offset('2012-03', -1, roll='forward')
-    numpy.datetime64('2012-02-29','D')
-    >>> # Third Wednesday in January 2011
-    ... np.busday_offset('2011-01', 2, roll='forward', weekmask='Wed')
-    numpy.datetime64('2011-01-19','D')
-    >>> # 2012 Mother's Day in Canada and the U.S.
-    ... np.busday_offset('2012-05', 1, roll='forward', weekmask='Sun')
-    numpy.datetime64('2012-05-13','D')
-
-    >>> # First business day on or after a date
-    ... np.busday_offset('2011-03-20', 0, roll='forward')
-    numpy.datetime64('2011-03-21','D')
-    >>> np.busday_offset('2011-03-22', 0, roll='forward')
-    numpy.datetime64('2011-03-22','D')
-    >>> # First business day after a date
-    ... np.busday_offset('2011-03-20', 1, roll='backward')
-    numpy.datetime64('2011-03-21','D')
-    >>> np.busday_offset('2011-03-22', 1, roll='backward')
-    numpy.datetime64('2011-03-23','D')
-    """)
-
-add_newdoc('numpy.core.multiarray', 'busday_count',
-    """
-    busday_count(begindates, enddates, weekmask='1111100', holidays=[], busdaycal=None, out=None)
-
-    Counts the number of valid days between `begindates` and
-    `enddates`, not including the day of `enddates`.
-
-    If ``enddates`` specifies a date value that is earlier than the
-    corresponding ``begindates`` date value, the count will be negative.
-
-    .. versionadded:: 1.7.0
-
-    Parameters
-    ----------
-    begindates : array_like of datetime64[D]
-        The array of the first dates for counting.
-    enddates : array_like of datetime64[D]
-        The array of the end dates for counting, which are excluded
-        from the count themselves.
-    weekmask : str or array_like of bool, optional
-        A seven-element array indicating which of Monday through Sunday are
-        valid days. May be specified as a length-seven list or array, like
-        [1,1,1,1,1,0,0]; a length-seven string, like '1111100'; or a string
-        like "Mon Tue Wed Thu Fri", made up of 3-character abbreviations for
-        weekdays, optionally separated by white space. Valid abbreviations
-        are: Mon Tue Wed Thu Fri Sat Sun
-    holidays : array_like of datetime64[D], optional
-        An array of dates to consider as invalid dates.  They may be
-        specified in any order, and NaT (not-a-time) dates are ignored.
-        This list is saved in a normalized form that is suited for
-        fast calculations of valid days.
-    busdaycal : busdaycalendar, optional
-        A `busdaycalendar` object which specifies the valid days. If this
-        parameter is provided, neither weekmask nor holidays may be
-        provided.
-    out : array of int, optional
-        If provided, this array is filled with the result.
-
-    Returns
-    -------
-    out : array of int
-        An array with a shape from broadcasting ``begindates`` and ``enddates``
-        together, containing the number of valid days between
-        the begin and end dates.
-
-    See Also
-    --------
-    busdaycalendar: An object that specifies a custom set of valid days.
-    is_busday : Returns a boolean array indicating valid days.
-    busday_offset : Applies an offset counted in valid days.
-
-    Examples
-    --------
-    >>> # Number of weekdays in January 2011
-    ... np.busday_count('2011-01', '2011-02')
-    21
-    >>> # Number of weekdays in 2011
-    ...  np.busday_count('2011', '2012')
-    260
-    >>> # Number of Saturdays in 2011
-    ... np.busday_count('2011', '2012', weekmask='Sat')
-    53
-    """)
-
 add_newdoc('numpy.core.multiarray', 'normalize_axis_index',
     """
     normalize_axis_index(axis, ndim, msg_prefix=None)
@@ -7044,67 +6115,6 @@ add_newdoc('numpy.core.multiarray', 'normalize_axis_index',
     AxisError: axes_arg: axis -4 is out of bounds for array of dimension 3
     """)
 
-add_newdoc('numpy.core.multiarray', 'datetime_as_string',
-    """
-    datetime_as_string(arr, unit=None, timezone='naive', casting='same_kind')
-
-    Convert an array of datetimes into an array of strings.
-
-    Parameters
-    ----------
-    arr : array_like of datetime64
-        The array of UTC timestamps to format.
-    unit : str
-        One of None, 'auto', or a :ref:`datetime unit <arrays.dtypes.dateunits>`.
-    timezone : {'naive', 'UTC', 'local'} or tzinfo
-        Timezone information to use when displaying the datetime. If 'UTC', end
-        with a Z to indicate UTC time. If 'local', convert to the local timezone
-        first, and suffix with a +-#### timezone offset. If a tzinfo object,
-        then do as with 'local', but use the specified timezone.
-    casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}
-        Casting to allow when changing between datetime units.
-
-    Returns
-    -------
-    str_arr : ndarray
-        An array of strings the same shape as `arr`.
-
-    Examples
-    --------
-    >>> d = np.arange('2002-10-27T04:30', 4*60, 60, dtype='M8[m]')
-    >>> d
-    array(['2002-10-27T04:30', '2002-10-27T05:30', '2002-10-27T06:30',
-           '2002-10-27T07:30'], dtype='datetime64[m]')
-
-    Setting the timezone to UTC shows the same information, but with a Z suffix
-
-    >>> np.datetime_as_string(d, timezone='UTC')
-    array(['2002-10-27T04:30Z', '2002-10-27T05:30Z', '2002-10-27T06:30Z',
-           '2002-10-27T07:30Z'], dtype='<U35')
-
-    Note that we picked datetimes that cross a DST boundary. Passing in a
-    ``pytz`` timezone object will print the appropriate offset
-
-    >>> np.datetime_as_string(d, timezone=pytz.timezone('US/Eastern'))
-    array(['2002-10-27T00:30-0400', '2002-10-27T01:30-0400',
-           '2002-10-27T01:30-0500', '2002-10-27T02:30-0500'], dtype='<U39')
-
-    Passing in a unit will change the precision
-
-    >>> np.datetime_as_string(d, unit='h')
-    array(['2002-10-27T04', '2002-10-27T05', '2002-10-27T06', '2002-10-27T07'],
-          dtype='<U32')
-    >>> np.datetime_as_string(d, unit='s')
-    array(['2002-10-27T04:30:00', '2002-10-27T05:30:00', '2002-10-27T06:30:00',
-           '2002-10-27T07:30:00'], dtype='<U38')
-
-    'casting' can be used to specify whether precision can be changed
-
-    >>> np.datetime_as_string(d, unit='h', casting='safe')
-    TypeError: Cannot create a datetime string as units 'h' from a NumPy
-    datetime with units 'm' according to the rule 'safe'
-    """)
-
 add_newdoc('numpy.core.multiarray', 'datetime_data',
     """
     datetime_data(dtype, /)
@@ -7965,66 +6975,228 @@ add_newdoc('numpy.core.numerictypes', 'generic', ('view',
 
 ##############################################################################
 #
-# Documentation for other scalar classes
+# Documentation for scalar type abstract base classes in type hierarchy
+#
+##############################################################################
+
+
+add_newdoc('numpy.core.numerictypes', 'number',
+    """
+    Abstract base class of all numeric scalar types.
+    
+    """)
+
+add_newdoc('numpy.core.numerictypes', 'integer',
+    """
+    Abstract base class of all integer scalar types.
+    
+    """)
+
+add_newdoc('numpy.core.numerictypes', 'signedinteger',
+    """
+    Abstract base class of all signed integer scalar types.
+    
+    """)
+
+add_newdoc('numpy.core.numerictypes', 'unsignedinteger',
+    """
+    Abstract base class of all unsigned integer scalar types.
+    
+    """)
+
+add_newdoc('numpy.core.numerictypes', 'inexact',
+    """
+    Abstract base class of all numeric scalar types with a (potentially)
+    inexact representation of the values in its range, such as
+    floating-point numbers.
+    
+    """)
+
+add_newdoc('numpy.core.numerictypes', 'floating',
+    """
+    Abstract base class of all floating-point scalar types.
+    
+    """)
+
+add_newdoc('numpy.core.numerictypes', 'complexfloating',
+    """
+    Abstract base class of all complex number scalar types that are made up of
+    floating-point numbers.
+    
+    """)
+
+add_newdoc('numpy.core.numerictypes', 'flexible',
+    """
+    Abstract base class of all scalar types without predefined length.
+    The actual size of these types depends on the specific `np.dtype`
+    instantiation.
+    
+    """)
+
+add_newdoc('numpy.core.numerictypes', 'character',
+    """
+    Abstract base class of all character string scalar types.
+    
+    """)
+
+
+##############################################################################
+#
+# Documentation for concrete scalar classes
 #
 ##############################################################################
 
-add_newdoc('numpy.core.numerictypes', 'bool_',
-    """NumPy's Boolean type.  Character code: ``?``.  Alias: bool8""")
+def numeric_type_aliases(aliases):
+    def type_aliases_gen():
+        for alias, doc in aliases:
+            try:
+                alias_type = getattr(_numerictypes, alias)
+            except AttributeError:
+                # The set of aliases that actually exist varies between platforms
+                pass
+            else:
+                yield (alias_type, alias, doc)
+    return list(type_aliases_gen())
+
+
+possible_aliases = numeric_type_aliases([
+    ('int8', '8-bit signed integer (-128 to 127)'),
+    ('int16', '16-bit signed integer (-32768 to 32767)'),
+    ('int32', '32-bit signed integer (-2147483648 to 2147483647)'),
+    ('int64', '64-bit signed integer (-9223372036854775808 to 9223372036854775807)'),
+    ('intp', 'Signed integer large enough to fit pointer, compatible with C ``intptr_t``'),
+    ('uint8', '8-bit unsigned integer (0 to 255)'),
+    ('uint16', '16-bit unsigned integer (0 to 65535)'),
+    ('uint32', '32-bit unsigned integer (0 to 4294967295)'),
+    ('uint64', '64-bit unsigned integer (0 to 18446744073709551615)'),
+    ('uintp', 'Unsigned integer large enough to fit pointer, compatible with C ``uintptr_t``'),
+    ('float16', '16-bit-precision floating-point number type: sign bit, 5 bits exponent, 10 bits mantissa'),
+    ('float32', '32-bit-precision floating-point number type: sign bit, 8 bits exponent, 23 bits mantissa'),
+    ('float64', '64-bit precision floating-point number type: sign bit, 11 bits exponent, 52 bits mantissa'),
+    ('float96', '96-bit extended-precision floating-point number type'),
+    ('float128', '128-bit extended-precision floating-point number type'),
+    ('complex64', 'Complex number type composed of 2 32-bit-precision floating-point numbers'),
+    ('complex128', 'Complex number type composed of 2 64-bit-precision floating-point numbers'),
+    ('complex192', 'Complex number type composed of 2 96-bit extended-precision floating-point numbers'),
+    ('complex256', 'Complex number type composed of 2 128-bit extended-precision floating-point numbers'),
+    ])
+
+
+def add_newdoc_for_scalar_type(obj, fixed_aliases, doc):
+    o = getattr(_numerictypes, obj)
+
+    character_code = dtype(o).char
+    canonical_name_doc = "" if obj == o.__name__ else "Canonical name: ``np.{}``.\n    ".format(obj)
+    alias_doc = ''.join("Alias: ``np.{}``.\n    ".format(alias) for alias in fixed_aliases)
+    alias_doc += ''.join("Alias *on this platform*: ``np.{}``: {}.\n    ".format(alias, doc)
+                         for (alias_type, alias, doc) in possible_aliases if alias_type is o)
+
+    docstring = """
+    {doc}
+    Character code: ``'{character_code}'``.
+    {canonical_name_doc}{alias_doc}
+    """.format(doc=doc.strip(), character_code=character_code,
+               canonical_name_doc=canonical_name_doc, alias_doc=alias_doc)
+
+    add_newdoc('numpy.core.numerictypes', obj, docstring)
+
+
+add_newdoc_for_scalar_type('bool_', ['bool8'],
+    """
+    Boolean type (True or False), stored as a byte.
+    """)
 
-add_newdoc('numpy.core.numerictypes', 'complex64',
+add_newdoc_for_scalar_type('byte', [],
     """
-    Complex number type composed of two 32 bit floats. Character code: 'F'.
+    Signed integer type, compatible with C ``char``.
+    """)
 
+add_newdoc_for_scalar_type('short', [],
+    """
+    Signed integer type, compatible with C ``short``.
     """)
 
-add_newdoc('numpy.core.numerictypes', 'complex128',
+add_newdoc_for_scalar_type('intc', [],
     """
-    Complex number type composed of two 64 bit floats. Character code: 'D'.
-    Python complex compatible.
+    Signed integer type, compatible with C ``int``.
+    """)
 
+add_newdoc_for_scalar_type('int_', [],
+    """
+    Signed integer type, compatible with Python `int` anc C ``long``.
     """)
 
-add_newdoc('numpy.core.numerictypes', 'complex256',
+add_newdoc_for_scalar_type('longlong', [],
     """
-    Complex number type composed of two 128-bit floats. Character code: 'G'.
+    Signed integer type, compatible with C ``long long``.
+    """)
 
+add_newdoc_for_scalar_type('ubyte', [],
+    """
+    Unsigned integer type, compatible with C ``unsigned char``.
     """)
 
-add_newdoc('numpy.core.numerictypes', 'float32',
+add_newdoc_for_scalar_type('ushort', [],
     """
-    32-bit floating-point number. Character code 'f'. C float compatible.
+    Unsigned integer type, compatible with C ``unsigned short``.
+    """)
 
+add_newdoc_for_scalar_type('uintc', [],
+    """
+    Unsigned integer type, compatible with C ``unsigned int``.
     """)
 
-add_newdoc('numpy.core.numerictypes', 'float64',
+add_newdoc_for_scalar_type('uint', [],
     """
-    64-bit floating-point number. Character code 'd'. Python float compatible.
+    Unsigned integer type, compatible with C ``unsigned long``.
+    """)
 
+add_newdoc_for_scalar_type('ulonglong', [],
+    """
+    Signed integer type, compatible with C ``unsigned long long``.
     """)
 
-add_newdoc('numpy.core.numerictypes', 'float96',
+add_newdoc_for_scalar_type('half', [],
     """
+    Half-precision floating-point number type.
     """)
 
-add_newdoc('numpy.core.numerictypes', 'float128',
+add_newdoc_for_scalar_type('single', [],
     """
-    128-bit floating-point number. Character code: 'g'. C long float
-    compatible.
+    Single-precision floating-point number type, compatible with C ``float``.
+    """)
 
+add_newdoc_for_scalar_type('double', ['float_'],
+    """
+    Double-precision floating-point number type, compatible with Python `float`
+    and C ``double``.
     """)
 
-add_newdoc('numpy.core.numerictypes', 'int8',
-    """8-bit integer. Character code ``b``. C char compatible.""")
+add_newdoc_for_scalar_type('longdouble', ['longfloat'],
+    """
+    Extended-precision floating-point number type, compatible with C
+    ``long double`` but not necessarily with IEEE 754 quadruple-precision.
+    """)
 
-add_newdoc('numpy.core.numerictypes', 'int16',
-    """16-bit integer. Character code ``h``. C short compatible.""")
+add_newdoc_for_scalar_type('csingle', ['singlecomplex'],
+    """
+    Complex number type composed of two single-precision floating-point
+    numbers.
+    """)
 
-add_newdoc('numpy.core.numerictypes', 'int32',
-    """32-bit integer. Character code 'i'. C int compatible.""")
+add_newdoc_for_scalar_type('cdouble', ['cfloat', 'complex_'],
+    """
+    Complex number type composed of two double-precision floating-point
+    numbers, compatible with Python `complex`.
+    """)
 
-add_newdoc('numpy.core.numerictypes', 'int64',
-    """64-bit integer. Character code 'l'. Python int compatible.""")
+add_newdoc_for_scalar_type('clongdouble', ['clongfloat', 'longcomplex'],
+    """
+    Complex number type composed of two extended-precision floating-point
+    numbers.
+    """)
 
-add_newdoc('numpy.core.numerictypes', 'object_',
-    """Any Python object.  Character code: 'O'.""")
+add_newdoc_for_scalar_type('object_', [],
+    """
+    Any Python object.
+    """)
diff --git a/numpy/core/_aliased_types.py b/numpy/core/_aliased_types.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/numpy/core/_aliased_types.py
diff --git a/numpy/core/_dtype.py b/numpy/core/_dtype.py
new file mode 100644
index 000000000..d115e0fa6
--- /dev/null
+++ b/numpy/core/_dtype.py
@@ -0,0 +1,339 @@
+"""
+A place for code to be called from the implementation of np.dtype
+
+String handling is much easier to do correctly in python.
+"""
+from __future__ import division, absolute_import, print_function
+
+import sys
+
+import numpy as np
+
+
+_kind_to_stem = {
+    'u': 'uint',
+    'i': 'int',
+    'c': 'complex',
+    'f': 'float',
+    'b': 'bool',
+    'V': 'void',
+    'O': 'object',
+    'M': 'datetime',
+    'm': 'timedelta'
+}
+if sys.version_info[0] >= 3:
+    _kind_to_stem.update({
+        'S': 'bytes',
+        'U': 'str'
+    })
+else:
+    _kind_to_stem.update({
+        'S': 'string',
+        'U': 'unicode'
+    })
+
+
+def _kind_name(dtype):
+    try:
+        return _kind_to_stem[dtype.kind]
+    except KeyError:
+        raise RuntimeError(
+            "internal dtype error, unknown kind {!r}"
+            .format(dtype.kind)
+        )
+
+
+def __str__(dtype):
+    if dtype.fields is not None:
+        return _struct_str(dtype, include_align=True)
+    elif dtype.subdtype:
+        return _subarray_str(dtype)
+    elif issubclass(dtype.type, np.flexible) or not dtype.isnative:
+        return dtype.str
+    else:
+        return dtype.name
+
+
+def __repr__(dtype):
+    arg_str = _construction_repr(dtype, include_align=False)
+    if dtype.isalignedstruct:
+        arg_str = arg_str + ", align=True"
+    return "dtype({})".format(arg_str)
+
+
+def _unpack_field(dtype, offset, title=None):
+    """
+    Helper function to normalize the items in dtype.fields.
+
+    Call as:
+
+    dtype, offset, title = _unpack_field(*dtype.fields[name])
+    """
+    return dtype, offset, title
+
+
+def _isunsized(dtype):
+    # PyDataType_ISUNSIZED
+    return dtype.itemsize == 0
+
+
+def _construction_repr(dtype, include_align=False, short=False):
+    """
+    Creates a string repr of the dtype, excluding the 'dtype()' part
+    surrounding the object. This object may be a string, a list, or
+    a dict depending on the nature of the dtype. This
+    is the object passed as the first parameter to the dtype
+    constructor, and if no additional constructor parameters are
+    given, will reproduce the exact memory layout.
+
+    Parameters
+    ----------
+    short : bool
+        If true, this creates a shorter repr using 'kind' and 'itemsize', instead
+        of the longer type name.
+
+    include_align : bool
+        If true, this includes the 'align=True' parameter
+        inside the struct dtype construction dict when needed. Use this flag
+        if you want a proper repr string without the 'dtype()' part around it.
+
+        If false, this does not preserve the
+        'align=True' parameter or sticky NPY_ALIGNED_STRUCT flag for
+        struct arrays like the regular repr does, because the 'align'
+        flag is not part of first dtype constructor parameter. This
+        mode is intended for a full 'repr', where the 'align=True' is
+        provided as the second parameter.
+    """
+    if dtype.fields is not None:
+        return _struct_str(dtype, include_align=include_align)
+    elif dtype.subdtype:
+        return _subarray_str(dtype)
+    else:
+        return _scalar_str(dtype, short=short)
+
+
+def _scalar_str(dtype, short):
+    byteorder = _byte_order_str(dtype)
+
+    if dtype.type == np.bool_:
+        if short:
+            return "'?'"
+        else:
+            return "'bool'"
+
+    elif dtype.type == np.object_:
+        # The object reference may be different sizes on different
+        # platforms, so it should never include the itemsize here.
+        return "'O'"
+
+    elif dtype.type == np.string_:
+        if _isunsized(dtype):
+            return "'S'"
+        else:
+            return "'S%d'" % dtype.itemsize
+
+    elif dtype.type == np.unicode_:
+        if _isunsized(dtype):
+            return "'%sU'" % byteorder
+        else:
+            return "'%sU%d'" % (byteorder, dtype.itemsize / 4)
+
+    elif dtype.type == np.void:
+        if _isunsized(dtype):
+            return "'V'"
+        else:
+            return "'V%d'" % dtype.itemsize
+
+    elif dtype.type == np.datetime64:
+        return "'%sM8%s'" % (byteorder, _datetime_metadata_str(dtype))
+
+    elif dtype.type == np.timedelta64:
+        return "'%sm8%s'" % (byteorder, _datetime_metadata_str(dtype))
+
+    elif np.issubdtype(dtype, np.number):
+        # Short repr with endianness, like '<f8'
+        if short or dtype.byteorder not in ('=', '|'):
+            return "'%s%c%d'" % (byteorder, dtype.kind, dtype.itemsize)
+
+        # Longer repr, like 'float64'
+        else:
+            return "'%s%d'" % (_kind_name(dtype), 8*dtype.itemsize)
+
+    elif dtype.isbuiltin == 2:
+        return dtype.type.__name__
+
+    else:
+        raise RuntimeError(
+            "Internal error: NumPy dtype unrecognized type number")
+
+
+def _byte_order_str(dtype):
+    """ Normalize byteorder to '<' or '>' """
+    # hack to obtain the native and swapped byte order characters
+    swapped = np.dtype(int).newbyteorder('s')
+    native = swapped.newbyteorder('s')
+
+    byteorder = dtype.byteorder
+    if byteorder == '=':
+        return native.byteorder
+    if byteorder == 's':
+        # TODO: this path can never be reached
+        return swapped.byteorder
+    elif byteorder == '|':
+        return ''
+    else:
+        return byteorder
+
+
+def _datetime_metadata_str(dtype):
+    # TODO: this duplicates the C append_metastr_to_string
+    unit, count = np.datetime_data(dtype)
+    if unit == 'generic':
+        return ''
+    elif count == 1:
+        return '[{}]'.format(unit)
+    else:
+        return '[{}{}]'.format(count, unit)
+
+
+def _struct_dict_str(dtype, includealignedflag):
+    # unpack the fields dictionary into ls
+    names = dtype.names
+    fld_dtypes = []
+    offsets = []
+    titles = []
+    for name in names:
+        fld_dtype, offset, title = _unpack_field(*dtype.fields[name])
+        fld_dtypes.append(fld_dtype)
+        offsets.append(offset)
+        titles.append(title)
+
+    # Build up a string to make the dictionary
+
+    # First, the names
+    ret = "{'names':["
+    ret += ",".join(repr(name) for name in names)
+
+    # Second, the formats
+    ret += "], 'formats':["
+    ret += ",".join(
+        _construction_repr(fld_dtype, short=True) for fld_dtype in fld_dtypes)
+
+    # Third, the offsets
+    ret += "], 'offsets':["
+    ret += ",".join("%d" % offset for offset in offsets)
+
+    # Fourth, the titles
+    if any(title is not None for title in titles):
+        ret += "], 'titles':["
+        ret += ",".join(repr(title) for title in titles)
+
+    # Fifth, the itemsize
+    ret += "], 'itemsize':%d" % dtype.itemsize
+
+    if (includealignedflag and dtype.isalignedstruct):
+        # Finally, the aligned flag
+        ret += ", 'aligned':True}"
+    else:
+        ret += "}"
+
+    return ret
+
+
+def _is_packed(dtype):
+    """
+    Checks whether the structured data type in 'dtype'
+    has a simple layout, where all the fields are in order,
+    and follow each other with no alignment padding.
+
+    When this returns true, the dtype can be reconstructed
+    from a list of the field names and dtypes with no additional
+    dtype parameters.
+
+    Duplicates the C `is_dtype_struct_simple_unaligned_layout` functio.
+    """
+    total_offset = 0
+    for name in dtype.names:
+        fld_dtype, fld_offset, title = _unpack_field(*dtype.fields[name])
+        if fld_offset != total_offset:
+            return False
+        total_offset += fld_dtype.itemsize
+    if total_offset != dtype.itemsize:
+        return False
+    return True
+
+
+def _struct_list_str(dtype):
+    items = []
+    for name in dtype.names:
+        fld_dtype, fld_offset, title = _unpack_field(*dtype.fields[name])
+
+        item = "("
+        if title is not None:
+            item += "({!r}, {!r}), ".format(title, name)
+        else:
+            item += "{!r}, ".format(name)
+        # Special case subarray handling here
+        if fld_dtype.subdtype is not None:
+            base, shape = fld_dtype.subdtype
+            item += "{}, {}".format(
+                _construction_repr(base, short=True),
+                shape
+            )
+        else:
+            item += _construction_repr(fld_dtype, short=True)
+
+        item += ")"
+        items.append(item)
+
+    return "[" + ", ".join(items) + "]"
+
+
+def _struct_str(dtype, include_align):
+    # The list str representation can't include the 'align=' flag,
+    # so if it is requested and the struct has the aligned flag set,
+    # we must use the dict str instead.
+    if not (include_align and dtype.isalignedstruct) and _is_packed(dtype):
+        sub = _struct_list_str(dtype)
+
+    else:
+        sub = _struct_dict_str(dtype, include_align)
+
+    # If the data type isn't the default, void, show it
+    if dtype.type != np.void:
+        return "({t.__module__}.{t.__name__}, {f})".format(t=dtype.type, f=sub)
+    else:
+        return sub
+
+
+def _subarray_str(dtype):
+    base, shape = dtype.subdtype
+    return "({}, {})".format(
+        _construction_repr(base, short=True),
+        shape
+    )
+
+
+def _name_get(dtype):
+    # provides dtype.name.__get__
+
+    if dtype.isbuiltin == 2:
+        # user dtypes don't promise to do anything special
+        return dtype.type.__name__
+
+    # Builtin classes are documented as returning a "bit name"
+    name = dtype.type.__name__
+
+    # handle bool_, str_, etc
+    if name[-1] == '_':
+        name = name[:-1]
+
+    # append bit counts to str, unicode, and void
+    if np.issubdtype(dtype, np.flexible) and not _isunsized(dtype):
+        name += "{}".format(dtype.itemsize * 8)
+
+    # append metadata to datetimes
+    elif dtype.type in (np.datetime64, np.timedelta64):
+        name += _datetime_metadata_str(dtype)
+
+    return name
diff --git a/numpy/core/_internal.py b/numpy/core/_internal.py
index 48ede14d0..c4d967dc2 100644
--- a/numpy/core/_internal.py
+++ b/numpy/core/_internal.py
@@ -1,5 +1,5 @@
 """
-A place for code to be called from core C-code.
+A place for internal code
 
 Some things are more easily handled Python.
 
@@ -9,13 +9,12 @@ from __future__ import division, absolute_import, print_function
 import re
 import sys
 
-from numpy.compat import basestring, unicode
+from numpy.compat import unicode
 from .multiarray import dtype, array, ndarray
 try:
     import ctypes
 except ImportError:
     ctypes = None
-from .numerictypes import object_
 
 if (sys.byteorder == 'little'):
     _nbo = b'<'
@@ -808,3 +807,35 @@ def _is_from_ctypes(obj):
         return 'ctypes' in ctype_base.__module__
     except Exception:
         return False
+
+
+class recursive(object):
+    '''
+    A decorator class for recursive nested functions.
+    Naive recursive nested functions hold a reference to themselves:
+
+    def outer(*args):
+        def stringify_leaky(arg0, *arg1):
+            if len(arg1) > 0:
+                return stringify_leaky(*arg1)  # <- HERE
+            return str(arg0)
+        stringify_leaky(*args)
+
+    This design pattern creates a reference cycle that is difficult for a
+    garbage collector to resolve. The decorator class prevents the
+    cycle by passing the nested function in as an argument `self`:
+
+    def outer(*args):
+        @recursive
+        def stringify(self, arg0, *arg1):
+            if len(arg1) > 0:
+                return self(*arg1)
+            return str(arg0)
+        stringify(*args)
+
+    '''
+    def __init__(self, func):
+        self.func = func
+    def __call__(self, *args, **kwargs):
+        return self.func(self, *args, **kwargs)
+
diff --git a/numpy/core/_methods.py b/numpy/core/_methods.py
index 33f6d01a8..8974f0ce1 100644
--- a/numpy/core/_methods.py
+++ b/numpy/core/_methods.py
@@ -154,3 +154,18 @@ def _ptp(a, axis=None, out=None, keepdims=False):
         umr_minimum(a, axis, None, None, keepdims),
         out
     )
+
+_NDARRAY_ARRAY_FUNCTION = mu.ndarray.__array_function__
+
+def _array_function(self, func, types, args, kwargs):
+    # TODO: rewrite this in C
+    # Cannot handle items that have __array_function__ other than our own.
+    for t in types:
+        if t is not mu.ndarray:
+            method = getattr(t, '__array_function__', _NDARRAY_ARRAY_FUNCTION)
+            if method is not _NDARRAY_ARRAY_FUNCTION:
+                return NotImplemented
+
+    # Arguments contain no overrides, so we can safely call the
+    # overloaded function again.
+    return func(*args, **kwargs)
diff --git a/numpy/core/_string_helpers.py b/numpy/core/_string_helpers.py
new file mode 100644
index 000000000..45e6a739e
--- /dev/null
+++ b/numpy/core/_string_helpers.py
@@ -0,0 +1,100 @@
+"""
+String-handling utilities to avoid locale-dependence.
+
+Used primarily to generate type name aliases.
+"""
+# "import string" is costly to import!
+# Construct the translation tables directly
+#   "A" = chr(65), "a" = chr(97)
+_all_chars = [chr(_m) for _m in range(256)]
+_ascii_upper = _all_chars[65:65+26]
+_ascii_lower = _all_chars[97:97+26]
+LOWER_TABLE = "".join(_all_chars[:65] + _ascii_lower + _all_chars[65+26:])
+UPPER_TABLE = "".join(_all_chars[:97] + _ascii_upper + _all_chars[97+26:])
+
+
+def english_lower(s):
+    """ Apply English case rules to convert ASCII strings to all lower case.
+
+    This is an internal utility function to replace calls to str.lower() such
+    that we can avoid changing behavior with changing locales. In particular,
+    Turkish has distinct dotted and dotless variants of the Latin letter "I" in
+    both lowercase and uppercase. Thus, "I".lower() != "i" in a "tr" locale.
+
+    Parameters
+    ----------
+    s : str
+
+    Returns
+    -------
+    lowered : str
+
+    Examples
+    --------
+    >>> from numpy.core.numerictypes import english_lower
+    >>> english_lower('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_')
+    'abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz0123456789_'
+    >>> english_lower('')
+    ''
+    """
+    lowered = s.translate(LOWER_TABLE)
+    return lowered
+
+
+def english_upper(s):
+    """ Apply English case rules to convert ASCII strings to all upper case.
+
+    This is an internal utility function to replace calls to str.upper() such
+    that we can avoid changing behavior with changing locales. In particular,
+    Turkish has distinct dotted and dotless variants of the Latin letter "I" in
+    both lowercase and uppercase. Thus, "i".upper() != "I" in a "tr" locale.
+
+    Parameters
+    ----------
+    s : str
+
+    Returns
+    -------
+    uppered : str
+
+    Examples
+    --------
+    >>> from numpy.core.numerictypes import english_upper
+    >>> english_upper('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_')
+    'ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_'
+    >>> english_upper('')
+    ''
+    """
+    uppered = s.translate(UPPER_TABLE)
+    return uppered
+
+
+def english_capitalize(s):
+    """ Apply English case rules to convert the first character of an ASCII
+    string to upper case.
+
+    This is an internal utility function to replace calls to str.capitalize()
+    such that we can avoid changing behavior with changing locales.
+
+    Parameters
+    ----------
+    s : str
+
+    Returns
+    -------
+    capitalized : str
+
+    Examples
+    --------
+    >>> from numpy.core.numerictypes import english_capitalize
+    >>> english_capitalize('int8')
+    'Int8'
+    >>> english_capitalize('Int8')
+    'Int8'
+    >>> english_capitalize('')
+    ''
+    """
+    if s:
+        return english_upper(s[0]) + s[1:]
+    else:
+        return s
diff --git a/numpy/core/_type_aliases.py b/numpy/core/_type_aliases.py
new file mode 100644
index 000000000..cce6c0425
--- /dev/null
+++ b/numpy/core/_type_aliases.py
@@ -0,0 +1,282 @@
+"""
+Due to compatibility, numpy has a very large number of different naming
+conventions for the scalar types (those subclassing from `numpy.generic`).
+This file produces a convoluted set of dictionaries mapping names to types,
+and sometimes other mappings too.
+
+.. data:: allTypes
+    A dictionary of names to types that will be exposed as attributes through
+    ``np.core.numerictypes.*``
+
+.. data:: sctypeDict
+    Similar to `allTypes`, but maps a broader set of aliases to their types.
+
+.. data:: sctypeNA
+    NumArray-compatible names for the scalar types. Contains not only
+    ``name: type`` mappings, but ``char: name`` mappings too.
+
+    .. deprecated:: 1.16
+
+.. data:: sctypes
+    A dictionary keyed by a "type group" string, providing a list of types
+    under that group.
+
+"""
+import warnings
+import sys
+
+from numpy.compat import unicode
+from numpy._globals import VisibleDeprecationWarning
+from numpy.core._string_helpers import english_lower, english_capitalize
+from numpy.core.multiarray import typeinfo, dtype
+from numpy.core._dtype import _kind_name
+
+
+sctypeDict = {}      # Contains all leaf-node scalar types with aliases
+class TypeNADict(dict):
+    def __getitem__(self, key):
+        # 2018-06-24, 1.16
+        warnings.warn('sctypeNA and typeNA will be removed in v1.18 '
+                      'of numpy', VisibleDeprecationWarning, stacklevel=2)
+        return dict.__getitem__(self, key)
+    def get(self, key, default=None):
+        # 2018-06-24, 1.16
+        warnings.warn('sctypeNA and typeNA will be removed in v1.18 '
+                      'of numpy', VisibleDeprecationWarning, stacklevel=2)
+        return dict.get(self, key, default)
+
+sctypeNA = TypeNADict()  # Contails all leaf-node types -> numarray type equivalences
+allTypes = {}            # Collect the types we will add to the module
+
+
+# separate the actual type info from the abstract base classes
+_abstract_types = {}
+_concrete_typeinfo = {}
+for k, v in typeinfo.items():
+    # make all the keys lowercase too
+    k = english_lower(k)
+    if isinstance(v, type):
+        _abstract_types[k] = v
+    else:
+        _concrete_typeinfo[k] = v
+
+_concrete_types = set(v.type for k, v in _concrete_typeinfo.items())
+
+
+def _bits_of(obj):
+    try:
+        info = next(v for v in _concrete_typeinfo.values() if v.type is obj)
+    except StopIteration:
+        if obj in _abstract_types.values():
+            raise ValueError("Cannot count the bits of an abstract type")
+
+        # some third-party type - make a best-guess
+        return dtype(obj).itemsize * 8
+    else:
+        return info.bits
+
+
+def bitname(obj):
+    """Return a bit-width name for a given type object"""
+    bits = _bits_of(obj)
+    dt = dtype(obj)
+    char = dt.kind
+    base = _kind_name(dt)
+
+    if base == 'object':
+        bits = 0
+
+    if bits != 0:
+        char = "%s%d" % (char, bits // 8)
+
+    return base, bits, char
+
+
+def _add_types():
+    for name, info in _concrete_typeinfo.items():
+        # define C-name and insert typenum and typechar references also
+        allTypes[name] = info.type
+        sctypeDict[name] = info.type
+        sctypeDict[info.char] = info.type
+        sctypeDict[info.num] = info.type
+
+    for name, cls in _abstract_types.items():
+        allTypes[name] = cls
+_add_types()
+
+# This is the priority order used to assign the bit-sized NPY_INTxx names, which
+# must match the order in npy_common.h in order for NPY_INTxx and np.intxx to be
+# consistent.
+# If two C types have the same size, then the earliest one in this list is used
+# as the sized name.
+_int_ctypes = ['long', 'longlong', 'int', 'short', 'byte']
+_uint_ctypes = list('u' + t for t in _int_ctypes)
+
+def _add_aliases():
+    for name, info in _concrete_typeinfo.items():
+        # these are handled by _add_integer_aliases
+        if name in _int_ctypes or name in _uint_ctypes:
+            continue
+
+        # insert bit-width version for this class (if relevant)
+        base, bit, char = bitname(info.type)
+
+        myname = "%s%d" % (base, bit)
+
+        # ensure that (c)longdouble does not overwrite the aliases assigned to
+        # (c)double
+        if name in ('longdouble', 'clongdouble') and myname in allTypes:
+            continue
+
+        base_capitalize = english_capitalize(base)
+        if base == 'complex':
+            na_name = '%s%d' % (base_capitalize, bit//2)
+        elif base == 'bool':
+            na_name = base_capitalize
+        else:
+            na_name = "%s%d" % (base_capitalize, bit)
+
+        allTypes[myname] = info.type
+
+        # add mapping for both the bit name and the numarray name
+        sctypeDict[myname] = info.type
+        sctypeDict[na_name] = info.type
+
+        # add forward, reverse, and string mapping to numarray
+        sctypeNA[na_name] = info.type
+        sctypeNA[info.type] = na_name
+        sctypeNA[info.char] = na_name
+
+        sctypeDict[char] = info.type
+        sctypeNA[char] = na_name
+_add_aliases()
+
+def _add_integer_aliases():
+    seen_bits = set()
+    for i_ctype, u_ctype in zip(_int_ctypes, _uint_ctypes):
+        i_info = _concrete_typeinfo[i_ctype]
+        u_info = _concrete_typeinfo[u_ctype]
+        bits = i_info.bits  # same for both
+
+        for info, charname, intname, Intname in [
+                (i_info,'i%d' % (bits//8,), 'int%d' % bits, 'Int%d' % bits),
+                (u_info,'u%d' % (bits//8,), 'uint%d' % bits, 'UInt%d' % bits)]:
+            if bits not in seen_bits:
+                # sometimes two different types have the same number of bits
+                # if so, the one iterated over first takes precedence
+                allTypes[intname] = info.type
+                sctypeDict[intname] = info.type
+                sctypeDict[Intname] = info.type
+                sctypeDict[charname] = info.type
+                sctypeNA[Intname] = info.type
+                sctypeNA[charname] = info.type
+            sctypeNA[info.type] = Intname
+            sctypeNA[info.char] = Intname
+
+        seen_bits.add(bits)
+
+_add_integer_aliases()
+
+# We use these later
+void = allTypes['void']
+
+#
+# Rework the Python names (so that float and complex and int are consistent
+#                            with Python usage)
+#
+def _set_up_aliases():
+    type_pairs = [('complex_', 'cdouble'),
+                  ('int0', 'intp'),
+                  ('uint0', 'uintp'),
+                  ('single', 'float'),
+                  ('csingle', 'cfloat'),
+                  ('singlecomplex', 'cfloat'),
+                  ('float_', 'double'),
+                  ('intc', 'int'),
+                  ('uintc', 'uint'),
+                  ('int_', 'long'),
+                  ('uint', 'ulong'),
+                  ('cfloat', 'cdouble'),
+                  ('longfloat', 'longdouble'),
+                  ('clongfloat', 'clongdouble'),
+                  ('longcomplex', 'clongdouble'),
+                  ('bool_', 'bool'),
+                  ('bytes_', 'string'),
+                  ('string_', 'string'),
+                  ('unicode_', 'unicode'),
+                  ('object_', 'object')]
+    if sys.version_info[0] >= 3:
+        type_pairs.extend([('str_', 'unicode')])
+    else:
+        type_pairs.extend([('str_', 'string')])
+    for alias, t in type_pairs:
+        allTypes[alias] = allTypes[t]
+        sctypeDict[alias] = sctypeDict[t]
+    # Remove aliases overriding python types and modules
+    to_remove = ['ulong', 'object', 'int', 'float',
+                 'complex', 'bool', 'string', 'datetime', 'timedelta']
+    if sys.version_info[0] >= 3:
+        to_remove.extend(['bytes', 'str'])
+    else:
+        to_remove.extend(['unicode', 'long'])
+
+    for t in to_remove:
+        try:
+            del allTypes[t]
+            del sctypeDict[t]
+        except KeyError:
+            pass
+_set_up_aliases()
+
+
+sctypes = {'int': [],
+           'uint':[],
+           'float':[],
+           'complex':[],
+           'others':[bool, object, bytes, unicode, void]}
+
+def _add_array_type(typename, bits):
+    try:
+        t = allTypes['%s%d' % (typename, bits)]
+    except KeyError:
+        pass
+    else:
+        sctypes[typename].append(t)
+
+def _set_array_types():
+    ibytes = [1, 2, 4, 8, 16, 32, 64]
+    fbytes = [2, 4, 8, 10, 12, 16, 32, 64]
+    for bytes in ibytes:
+        bits = 8*bytes
+        _add_array_type('int', bits)
+        _add_array_type('uint', bits)
+    for bytes in fbytes:
+        bits = 8*bytes
+        _add_array_type('float', bits)
+        _add_array_type('complex', 2*bits)
+    _gi = dtype('p')
+    if _gi.type not in sctypes['int']:
+        indx = 0
+        sz = _gi.itemsize
+        _lst = sctypes['int']
+        while (indx < len(_lst) and sz >= _lst[indx](0).itemsize):
+            indx += 1
+        sctypes['int'].insert(indx, _gi.type)
+        sctypes['uint'].insert(indx, dtype('P').type)
+_set_array_types()
+
+
+# Add additional strings to the sctypeDict
+_toadd = ['int', 'float', 'complex', 'bool', 'object']
+if sys.version_info[0] >= 3:
+    _toadd.extend(['str', 'bytes', ('a', 'bytes_')])
+else:
+    _toadd.extend(['string', ('str', 'string_'), 'unicode', ('a', 'string_')])
+
+for name in _toadd:
+    if isinstance(name, tuple):
+        sctypeDict[name[0]] = allTypes[name[1]]
+    else:
+        sctypeDict[name] = allTypes['%s_' % name]
+
+del _toadd, name
diff --git a/numpy/core/arrayprint.py b/numpy/core/arrayprint.py
index a4b5aecc3..1b9fbbfa9 100644
--- a/numpy/core/arrayprint.py
+++ b/numpy/core/arrayprint.py
@@ -42,12 +42,13 @@ from . import numerictypes as _nt
 from .umath import absolute, not_equal, isnan, isinf, isfinite, isnat
 from . import multiarray
 from .multiarray import (array, dragon4_positional, dragon4_scientific,
-                         datetime_as_string, datetime_data, dtype, ndarray,
+                         datetime_as_string, datetime_data, ndarray,
                          set_legacy_print_mode)
 from .fromnumeric import ravel, any
 from .numeric import concatenate, asarray, errstate
 from .numerictypes import (longlong, intc, int_, float_, complex_, bool_,
                            flexible)
+from .overrides import array_function_dispatch
 import warnings
 import contextlib
 
@@ -496,6 +497,16 @@ def _array2string(a, options, separator=' ', prefix=""):
     return lst
 
 
+def _array2string_dispatcher(
+        a, max_line_width=None, precision=None,
+        suppress_small=None, separator=None, prefix=None,
+        style=None, formatter=None, threshold=None,
+        edgeitems=None, sign=None, floatmode=None, suffix=None,
+        **kwarg):
+    return (a,)
+
+
+@array_function_dispatch(_array2string_dispatcher)
 def array2string(a, max_line_width=None, precision=None,
                  suppress_small=None, separator=' ', prefix="",
                  style=np._NoValue, formatter=None, threshold=None,
@@ -1370,6 +1381,12 @@ def dtype_short_repr(dtype):
     return typename
 
 
+def _array_repr_dispatcher(
+        arr, max_line_width=None, precision=None, suppress_small=None):
+    return (arr,)
+
+
+@array_function_dispatch(_array_repr_dispatcher)
 def array_repr(arr, max_line_width=None, precision=None, suppress_small=None):
     """
     Return the string representation of an array.
@@ -1454,8 +1471,16 @@ def array_repr(arr, max_line_width=None, precision=None, suppress_small=None):
 
     return arr_str + spacer + dtype_str
 
+
 _guarded_str = _recursive_guard()(str)
 
+
+def _array_str_dispatcher(
+        a, max_line_width=None, precision=None, suppress_small=None):
+    return (a,)
+
+
+@array_function_dispatch(_array_str_dispatcher)
 def array_str(a, max_line_width=None, precision=None, suppress_small=None):
     """
     Return a string representation of the data in an array.
diff --git a/numpy/core/code_generators/cversions.txt b/numpy/core/code_generators/cversions.txt
index 43c32eac6..c8b998bfc 100644
--- a/numpy/core/code_generators/cversions.txt
+++ b/numpy/core/code_generators/cversions.txt
@@ -43,3 +43,5 @@
 # PyArray_SetWritebackIfCopyBase and deprecated PyArray_SetUpdateIfCopyBase.
 0x0000000c = a1bc756c5782853ec2e3616cf66869d8
 
+# Version 13 (Numpy 1.16) Added fields core_dim_flags and core_dim_sizes to PyUFuncObject
+0x0000000d = a1bc756c5782853ec2e3616cf66869d8
diff --git a/numpy/core/code_generators/ufunc_docstrings.py b/numpy/core/code_generators/ufunc_docstrings.py
index 6e5cb25af..13231de29 100644
--- a/numpy/core/code_generators/ufunc_docstrings.py
+++ b/numpy/core/code_generators/ufunc_docstrings.py
@@ -2594,8 +2594,7 @@ add_newdoc('numpy.core.umath', 'multiply',
     Returns
     -------
     y : ndarray
-        The product of `x1` and `x2`, element-wise. Returns a scalar if
-        both `x1` and `x2` are scalars.
+        The product of `x1` and `x2`, element-wise.
         $OUT_SCALAR_2
 
     Notes
diff --git a/numpy/core/defchararray.py b/numpy/core/defchararray.py
index 6d0a0add5..0a8c7bbec 100644
--- a/numpy/core/defchararray.py
+++ b/numpy/core/defchararray.py
@@ -22,6 +22,7 @@ from .numerictypes import string_, unicode_, integer, object_, bool_, character
 from .numeric import ndarray, compare_chararrays
 from .numeric import array as narray
 from numpy.core.multiarray import _vec_string
+from numpy.core.overrides import array_function_dispatch
 from numpy.compat import asbytes, long
 import numpy
 
@@ -95,6 +96,11 @@ def _get_num_chars(a):
     return a.itemsize
 
 
+def _binary_op_dispatcher(x1, x2):
+    return (x1, x2)
+
+
+@array_function_dispatch(_binary_op_dispatcher)
 def equal(x1, x2):
     """
     Return (x1 == x2) element-wise.
@@ -119,6 +125,8 @@ def equal(x1, x2):
     """
     return compare_chararrays(x1, x2, '==', True)
 
+
+@array_function_dispatch(_binary_op_dispatcher)
 def not_equal(x1, x2):
     """
     Return (x1 != x2) element-wise.
@@ -143,6 +151,8 @@ def not_equal(x1, x2):
     """
     return compare_chararrays(x1, x2, '!=', True)
 
+
+@array_function_dispatch(_binary_op_dispatcher)
 def greater_equal(x1, x2):
     """
     Return (x1 >= x2) element-wise.
@@ -168,6 +178,8 @@ def greater_equal(x1, x2):
     """
     return compare_chararrays(x1, x2, '>=', True)
 
+
+@array_function_dispatch(_binary_op_dispatcher)
 def less_equal(x1, x2):
     """
     Return (x1 <= x2) element-wise.
@@ -192,6 +204,8 @@ def less_equal(x1, x2):
     """
     return compare_chararrays(x1, x2, '<=', True)
 
+
+@array_function_dispatch(_binary_op_dispatcher)
 def greater(x1, x2):
     """
     Return (x1 > x2) element-wise.
@@ -216,6 +230,8 @@ def greater(x1, x2):
     """
     return compare_chararrays(x1, x2, '>', True)
 
+
+@array_function_dispatch(_binary_op_dispatcher)
 def less(x1, x2):
     """
     Return (x1 < x2) element-wise.
@@ -240,6 +256,12 @@ def less(x1, x2):
     """
     return compare_chararrays(x1, x2, '<', True)
 
+
+def _unary_op_dispatcher(a):
+    return (a,)
+
+
+@array_function_dispatch(_unary_op_dispatcher)
 def str_len(a):
     """
     Return len(a) element-wise.
@@ -259,6 +281,8 @@ def str_len(a):
     """
     return _vec_string(a, integer, '__len__')
 
+
+@array_function_dispatch(_binary_op_dispatcher)
 def add(x1, x2):
     """
     Return element-wise string concatenation for two arrays of str or unicode.
@@ -285,6 +309,12 @@ def add(x1, x2):
     dtype = _use_unicode(arr1, arr2)
     return _vec_string(arr1, (dtype, out_size), '__add__', (arr2,))
 
+
+def _multiply_dispatcher(a, i):
+    return (a,)
+
+
+@array_function_dispatch(_multiply_dispatcher)
 def multiply(a, i):
     """
     Return (a * i), that is string multiple concatenation,
@@ -313,6 +343,12 @@ def multiply(a, i):
     return _vec_string(
         a_arr, (a_arr.dtype.type, out_size), '__mul__', (i_arr,))
 
+
+def _mod_dispatcher(a, values):
+    return (a, values)
+
+
+@array_function_dispatch(_mod_dispatcher)
 def mod(a, values):
     """
     Return (a % i), that is pre-Python 2.6 string formatting
@@ -339,6 +375,8 @@ def mod(a, values):
     return _to_string_or_unicode_array(
         _vec_string(a, object_, '__mod__', (values,)))
 
+
+@array_function_dispatch(_unary_op_dispatcher)
 def capitalize(a):
     """
     Return a copy of `a` with only the first character of each element
@@ -377,6 +415,11 @@ def capitalize(a):
     return _vec_string(a_arr, a_arr.dtype, 'capitalize')
 
 
+def _center_dispatcher(a, width, fillchar=None):
+    return (a,)
+
+
+@array_function_dispatch(_center_dispatcher)
 def center(a, width, fillchar=' '):
     """
     Return a copy of `a` with its elements centered in a string of
@@ -413,6 +456,11 @@ def center(a, width, fillchar=' '):
         a_arr, (a_arr.dtype.type, size), 'center', (width_arr, fillchar))
 
 
+def _count_dispatcher(a, sub, start=None, end=None):
+    return (a,)
+
+
+@array_function_dispatch(_count_dispatcher)
 def count(a, sub, start=0, end=None):
     """
     Returns an array with the number of non-overlapping occurrences of
@@ -459,6 +507,11 @@ def count(a, sub, start=0, end=None):
     return _vec_string(a, integer, 'count', [sub, start] + _clean_args(end))
 
 
+def _code_dispatcher(a, encoding=None, errors=None):
+    return (a,)
+
+
+@array_function_dispatch(_code_dispatcher)
 def decode(a, encoding=None, errors=None):
     """
     Calls `str.decode` element-wise.
@@ -505,6 +558,7 @@ def decode(a, encoding=None, errors=None):
         _vec_string(a, object_, 'decode', _clean_args(encoding, errors)))
 
 
+@array_function_dispatch(_code_dispatcher)
 def encode(a, encoding=None, errors=None):
     """
     Calls `str.encode` element-wise.
@@ -540,6 +594,11 @@ def encode(a, encoding=None, errors=None):
         _vec_string(a, object_, 'encode', _clean_args(encoding, errors)))
 
 
+def _endswith_dispatcher(a, suffix, start=None, end=None):
+    return (a,)
+
+
+@array_function_dispatch(_endswith_dispatcher)
 def endswith(a, suffix, start=0, end=None):
     """
     Returns a boolean array which is `True` where the string element
@@ -584,6 +643,11 @@ def endswith(a, suffix, start=0, end=None):
         a, bool_, 'endswith', [suffix, start] + _clean_args(end))
 
 
+def _expandtabs_dispatcher(a, tabsize=None):
+    return (a,)
+
+
+@array_function_dispatch(_expandtabs_dispatcher)
 def expandtabs(a, tabsize=8):
     """
     Return a copy of each string element where all tab characters are
@@ -619,6 +683,7 @@ def expandtabs(a, tabsize=8):
         _vec_string(a, object_, 'expandtabs', (tabsize,)))
 
 
+@array_function_dispatch(_count_dispatcher)
 def find(a, sub, start=0, end=None):
     """
     For each element, return the lowest index in the string where
@@ -654,6 +719,7 @@ def find(a, sub, start=0, end=None):
         a, integer, 'find', [sub, start] + _clean_args(end))
 
 
+@array_function_dispatch(_count_dispatcher)
 def index(a, sub, start=0, end=None):
     """
     Like `find`, but raises `ValueError` when the substring is not found.
@@ -681,6 +747,8 @@ def index(a, sub, start=0, end=None):
     return _vec_string(
         a, integer, 'index', [sub, start] + _clean_args(end))
 
+
+@array_function_dispatch(_unary_op_dispatcher)
 def isalnum(a):
     """
     Returns true for each element if all characters in the string are
@@ -705,6 +773,8 @@ def isalnum(a):
     """
     return _vec_string(a, bool_, 'isalnum')
 
+
+@array_function_dispatch(_unary_op_dispatcher)
 def isalpha(a):
     """
     Returns true for each element if all characters in the string are
@@ -729,6 +799,8 @@ def isalpha(a):
     """
     return _vec_string(a, bool_, 'isalpha')
 
+
+@array_function_dispatch(_unary_op_dispatcher)
 def isdigit(a):
     """
     Returns true for each element if all characters in the string are
@@ -753,6 +825,8 @@ def isdigit(a):
     """
     return _vec_string(a, bool_, 'isdigit')
 
+
+@array_function_dispatch(_unary_op_dispatcher)
 def islower(a):
     """
     Returns true for each element if all cased characters in the
@@ -778,6 +852,8 @@ def islower(a):
     """
     return _vec_string(a, bool_, 'islower')
 
+
+@array_function_dispatch(_unary_op_dispatcher)
 def isspace(a):
     """
     Returns true for each element if there are only whitespace
@@ -803,6 +879,8 @@ def isspace(a):
     """
     return _vec_string(a, bool_, 'isspace')
 
+
+@array_function_dispatch(_unary_op_dispatcher)
 def istitle(a):
     """
     Returns true for each element if the element is a titlecased
@@ -827,6 +905,8 @@ def istitle(a):
     """
     return _vec_string(a, bool_, 'istitle')
 
+
+@array_function_dispatch(_unary_op_dispatcher)
 def isupper(a):
     """
     Returns true for each element if all cased characters in the
@@ -852,6 +932,12 @@ def isupper(a):
     """
     return _vec_string(a, bool_, 'isupper')
 
+
+def _join_dispatcher(sep, seq):
+    return (sep, seq)
+
+
+@array_function_dispatch(_join_dispatcher)
 def join(sep, seq):
     """
     Return a string which is the concatenation of the strings in the
@@ -877,6 +963,12 @@ def join(sep, seq):
         _vec_string(sep, object_, 'join', (seq,)))
 
 
+
+def _just_dispatcher(a, width, fillchar=None):
+    return (a,)
+
+
+@array_function_dispatch(_just_dispatcher)
 def ljust(a, width, fillchar=' '):
     """
     Return an array with the elements of `a` left-justified in a
@@ -912,6 +1004,7 @@ def ljust(a, width, fillchar=' '):
         a_arr, (a_arr.dtype.type, size), 'ljust', (width_arr, fillchar))
 
 
+@array_function_dispatch(_unary_op_dispatcher)
 def lower(a):
     """
     Return an array with the elements converted to lowercase.
@@ -948,6 +1041,11 @@ def lower(a):
     return _vec_string(a_arr, a_arr.dtype, 'lower')
 
 
+def _strip_dispatcher(a, chars=None):
+    return (a,)
+
+
+@array_function_dispatch(_strip_dispatcher)
 def lstrip(a, chars=None):
     """
     For each element in `a`, return a copy with the leading characters
@@ -1005,6 +1103,11 @@ def lstrip(a, chars=None):
     return _vec_string(a_arr, a_arr.dtype, 'lstrip', (chars,))
 
 
+def _partition_dispatcher(a, sep):
+    return (a,)
+
+
+@array_function_dispatch(_partition_dispatcher)
 def partition(a, sep):
     """
     Partition each element in `a` around `sep`.
@@ -1040,6 +1143,11 @@ def partition(a, sep):
         _vec_string(a, object_, 'partition', (sep,)))
 
 
+def _replace_dispatcher(a, old, new, count=None):
+    return (a,)
+
+
+@array_function_dispatch(_replace_dispatcher)
 def replace(a, old, new, count=None):
     """
     For each element in `a`, return a copy of the string with all
@@ -1072,6 +1180,7 @@ def replace(a, old, new, count=None):
             a, object_, 'replace', [old, new] + _clean_args(count)))
 
 
+@array_function_dispatch(_count_dispatcher)
 def rfind(a, sub, start=0, end=None):
     """
     For each element in `a`, return the highest index in the string
@@ -1104,6 +1213,7 @@ def rfind(a, sub, start=0, end=None):
         a, integer, 'rfind', [sub, start] + _clean_args(end))
 
 
+@array_function_dispatch(_count_dispatcher)
 def rindex(a, sub, start=0, end=None):
     """
     Like `rfind`, but raises `ValueError` when the substring `sub` is
@@ -1133,6 +1243,7 @@ def rindex(a, sub, start=0, end=None):
         a, integer, 'rindex', [sub, start] + _clean_args(end))
 
 
+@array_function_dispatch(_just_dispatcher)
 def rjust(a, width, fillchar=' '):
     """
     Return an array with the elements of `a` right-justified in a
@@ -1168,6 +1279,7 @@ def rjust(a, width, fillchar=' '):
         a_arr, (a_arr.dtype.type, size), 'rjust', (width_arr, fillchar))
 
 
+@array_function_dispatch(_partition_dispatcher)
 def rpartition(a, sep):
     """
     Partition (split) each element around the right-most separator.
@@ -1203,6 +1315,11 @@ def rpartition(a, sep):
         _vec_string(a, object_, 'rpartition', (sep,)))
 
 
+def _split_dispatcher(a, sep=None, maxsplit=None):
+    return (a,)
+
+
+@array_function_dispatch(_split_dispatcher)
 def rsplit(a, sep=None, maxsplit=None):
     """
     For each element in `a`, return a list of the words in the
@@ -1240,6 +1357,11 @@ def rsplit(a, sep=None, maxsplit=None):
         a, object_, 'rsplit', [sep] + _clean_args(maxsplit))
 
 
+def _strip_dispatcher(a, chars=None):
+    return (a,)
+
+
+@array_function_dispatch(_strip_dispatcher)
 def rstrip(a, chars=None):
     """
     For each element in `a`, return a copy with the trailing
@@ -1284,6 +1406,7 @@ def rstrip(a, chars=None):
     return _vec_string(a_arr, a_arr.dtype, 'rstrip', (chars,))
 
 
+@array_function_dispatch(_split_dispatcher)
 def split(a, sep=None, maxsplit=None):
     """
     For each element in `a`, return a list of the words in the
@@ -1318,6 +1441,11 @@ def split(a, sep=None, maxsplit=None):
         a, object_, 'split', [sep] + _clean_args(maxsplit))
 
 
+def _splitlines_dispatcher(a, keepends=None):
+    return (a,)
+
+
+@array_function_dispatch(_splitlines_dispatcher)
 def splitlines(a, keepends=None):
     """
     For each element in `a`, return a list of the lines in the
@@ -1347,6 +1475,11 @@ def splitlines(a, keepends=None):
         a, object_, 'splitlines', _clean_args(keepends))
 
 
+def _startswith_dispatcher(a, prefix, start=None, end=None):
+    return (a,)
+
+
+@array_function_dispatch(_startswith_dispatcher)
 def startswith(a, prefix, start=0, end=None):
     """
     Returns a boolean array which is `True` where the string element
@@ -1378,6 +1511,7 @@ def startswith(a, prefix, start=0, end=None):
         a, bool_, 'startswith', [prefix, start] + _clean_args(end))
 
 
+@array_function_dispatch(_strip_dispatcher)
 def strip(a, chars=None):
     """
     For each element in `a`, return a copy with the leading and
@@ -1426,6 +1560,7 @@ def strip(a, chars=None):
     return _vec_string(a_arr, a_arr.dtype, 'strip', _clean_args(chars))
 
 
+@array_function_dispatch(_unary_op_dispatcher)
 def swapcase(a):
     """
     Return element-wise a copy of the string with
@@ -1463,6 +1598,7 @@ def swapcase(a):
     return _vec_string(a_arr, a_arr.dtype, 'swapcase')
 
 
+@array_function_dispatch(_unary_op_dispatcher)
 def title(a):
     """
     Return element-wise title cased version of string or unicode.
@@ -1502,6 +1638,11 @@ def title(a):
     return _vec_string(a_arr, a_arr.dtype, 'title')
 
 
+def _translate_dispatcher(a, table, deletechars=None):
+    return (a,)
+
+
+@array_function_dispatch(_translate_dispatcher)
 def translate(a, table, deletechars=None):
     """
     For each element in `a`, return a copy of the string where all
@@ -1538,6 +1679,7 @@ def translate(a, table, deletechars=None):
             a_arr, a_arr.dtype, 'translate', [table] + _clean_args(deletechars))
 
 
+@array_function_dispatch(_unary_op_dispatcher)
 def upper(a):
     """
     Return an array with the elements converted to uppercase.
@@ -1574,6 +1716,11 @@ def upper(a):
     return _vec_string(a_arr, a_arr.dtype, 'upper')
 
 
+def _zfill_dispatcher(a, width):
+    return (a,)
+
+
+@array_function_dispatch(_zfill_dispatcher)
 def zfill(a, width):
     """
     Return the numeric string left-filled with zeros
@@ -1604,6 +1751,7 @@ def zfill(a, width):
         a_arr, (a_arr.dtype.type, size), 'zfill', (width_arr,))
 
 
+@array_function_dispatch(_unary_op_dispatcher)
 def isnumeric(a):
     """
     For each element, return True if there are only numeric
@@ -1635,6 +1783,7 @@ def isnumeric(a):
     return _vec_string(a, bool_, 'isnumeric')
 
 
+@array_function_dispatch(_unary_op_dispatcher)
 def isdecimal(a):
     """
     For each element, return True if there are only decimal
diff --git a/numpy/core/einsumfunc.py b/numpy/core/einsumfunc.py
index 5b8689235..1281b3c98 100644
--- a/numpy/core/einsumfunc.py
+++ b/numpy/core/einsumfunc.py
@@ -8,7 +8,7 @@ import itertools
 
 from numpy.compat import basestring
 from numpy.core.multiarray import c_einsum
-from numpy.core.numeric import asarray, asanyarray, result_type, tensordot, dot
+from numpy.core.numeric import asanyarray, tensordot
 
 __all__ = ['einsum', 'einsum_path']
 
@@ -1373,7 +1373,7 @@ def einsum(*operands, **kwargs):
 
             # Find indices to contract over
             left_pos, right_pos = [], []
-            for s in idx_rm:
+            for s in sorted(idx_rm):
                 left_pos.append(input_left.find(s))
                 right_pos.append(input_right.find(s))
 
diff --git a/numpy/core/fromnumeric.py b/numpy/core/fromnumeric.py
index b9cc98cae..2fdbf3e23 100644
--- a/numpy/core/fromnumeric.py
+++ b/numpy/core/fromnumeric.py
@@ -12,6 +12,7 @@ from . import multiarray as mu
 from . import umath as um
 from . import numerictypes as nt
 from .numeric import asarray, array, asanyarray, concatenate
+from .overrides import array_function_dispatch
 from . import _methods
 
 _dt_ = nt.sctype2char
@@ -83,6 +84,11 @@ def _wrapreduction(obj, ufunc, method, axis, dtype, out, **kwargs):
     return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
 
 
+def _take_dispatcher(a, indices, axis=None, out=None, mode=None):
+    return (a, out)
+
+
+@array_function_dispatch(_take_dispatcher)
 def take(a, indices, axis=None, out=None, mode='raise'):
     """
     Take elements from an array along an axis.
@@ -181,7 +187,12 @@ def take(a, indices, axis=None, out=None, mode='raise'):
     return _wrapfunc(a, 'take', indices, axis=axis, out=out, mode=mode)
 
 
+def _reshape_dispatcher(a, newshape, order=None):
+    return (a,)
+
+
 # not deprecated --- copy if necessary, view otherwise
+@array_function_dispatch(_reshape_dispatcher)
 def reshape(a, newshape, order='C'):
     """
     Gives a new shape to an array without changing its data.
@@ -279,6 +290,14 @@ def reshape(a, newshape, order='C'):
     return _wrapfunc(a, 'reshape', newshape, order=order)
 
 
+def _choose_dispatcher(a, choices, out=None, mode=None):
+    yield a
+    for c in choices:
+        yield c
+    yield out
+
+
+@array_function_dispatch(_choose_dispatcher)
 def choose(a, choices, out=None, mode='raise'):
     """
     Construct an array from an index array and a set of arrays to choose from.
@@ -401,6 +420,11 @@ def choose(a, choices, out=None, mode='raise'):
     return _wrapfunc(a, 'choose', choices, out=out, mode=mode)
 
 
+def _repeat_dispatcher(a, repeats, axis=None):
+    return (a,)
+
+
+@array_function_dispatch(_repeat_dispatcher)
 def repeat(a, repeats, axis=None):
     """
     Repeat elements of an array.
@@ -445,6 +469,11 @@ def repeat(a, repeats, axis=None):
     return _wrapfunc(a, 'repeat', repeats, axis=axis)
 
 
+def _put_dispatcher(a, ind, v, mode=None):
+    return (a, ind, v)
+
+
+@array_function_dispatch(_put_dispatcher)
 def put(a, ind, v, mode='raise'):
     """
     Replaces specified elements of an array with given values.
@@ -503,6 +532,11 @@ def put(a, ind, v, mode='raise'):
     return put(ind, v, mode=mode)
 
 
+def _swapaxes_dispatcher(a, axis1, axis2):
+    return (a,)
+
+
+@array_function_dispatch(_swapaxes_dispatcher)
 def swapaxes(a, axis1, axis2):
     """
     Interchange two axes of an array.
@@ -549,6 +583,11 @@ def swapaxes(a, axis1, axis2):
     return _wrapfunc(a, 'swapaxes', axis1, axis2)
 
 
+def _transpose_dispatcher(a, axes=None):
+    return (a,)
+
+
+@array_function_dispatch(_transpose_dispatcher)
 def transpose(a, axes=None):
     """
     Permute the dimensions of an array.
@@ -598,6 +637,11 @@ def transpose(a, axes=None):
     return _wrapfunc(a, 'transpose', axes)
 
 
+def _partition_dispatcher(a, kth, axis=None, kind=None, order=None):
+    return (a,)
+
+
+@array_function_dispatch(_partition_dispatcher)
 def partition(a, kth, axis=-1, kind='introselect', order=None):
     """
     Return a partitioned copy of an array.
@@ -689,6 +733,11 @@ def partition(a, kth, axis=-1, kind='introselect', order=None):
     return a
 
 
+def _argpartition_dispatcher(a, kth, axis=None, kind=None, order=None):
+    return (a,)
+
+
+@array_function_dispatch(_argpartition_dispatcher)
 def argpartition(a, kth, axis=-1, kind='introselect', order=None):
     """
     Perform an indirect partition along the given axis using the
@@ -757,6 +806,11 @@ def argpartition(a, kth, axis=-1, kind='introselect', order=None):
     return _wrapfunc(a, 'argpartition', kth, axis=axis, kind=kind, order=order)
 
 
+def _sort_dispatcher(a, axis=None, kind=None, order=None):
+    return (a,)
+
+
+@array_function_dispatch(_sort_dispatcher)
 def sort(a, axis=-1, kind='quicksort', order=None):
     """
     Return a sorted copy of an array.
@@ -879,6 +933,11 @@ def sort(a, axis=-1, kind='quicksort', order=None):
     return a
 
 
+def _argsort_dispatcher(a, axis=None, kind=None, order=None):
+    return (a,)
+
+
+@array_function_dispatch(_argsort_dispatcher)
 def argsort(a, axis=-1, kind='quicksort', order=None):
     """
     Returns the indices that would sort an array.
@@ -973,6 +1032,11 @@ def argsort(a, axis=-1, kind='quicksort', order=None):
     return _wrapfunc(a, 'argsort', axis=axis, kind=kind, order=order)
 
 
+def _argmax_dispatcher(a, axis=None, out=None):
+    return (a, out)
+
+
+@array_function_dispatch(_argmax_dispatcher)
 def argmax(a, axis=None, out=None):
     """
     Returns the indices of the maximum values along an axis.
@@ -1007,10 +1071,10 @@ def argmax(a, axis=None, out=None):
 
     Examples
     --------
-    >>> a = np.arange(6).reshape(2,3)
+    >>> a = np.arange(6).reshape(2,3) + 10
     >>> a
-    array([[0, 1, 2],
-           [3, 4, 5]])
+    array([[10, 11, 12],
+           [13, 14, 15]])
     >>> np.argmax(a)
     5
     >>> np.argmax(a, axis=0)
@@ -1024,7 +1088,7 @@ def argmax(a, axis=None, out=None):
     >>> ind
     (1, 2)
     >>> a[ind]
-    5
+    15
 
     >>> b = np.arange(6)
     >>> b[1] = 5
@@ -1037,6 +1101,11 @@ def argmax(a, axis=None, out=None):
     return _wrapfunc(a, 'argmax', axis=axis, out=out)
 
 
+def _argmin_dispatcher(a, axis=None, out=None):
+    return (a, out)
+
+
+@array_function_dispatch(_argmin_dispatcher)
 def argmin(a, axis=None, out=None):
     """
     Returns the indices of the minimum values along an axis.
@@ -1071,10 +1140,10 @@ def argmin(a, axis=None, out=None):
 
     Examples
     --------
-    >>> a = np.arange(6).reshape(2,3)
+    >>> a = np.arange(6).reshape(2,3) + 10
     >>> a
-    array([[0, 1, 2],
-           [3, 4, 5]])
+    array([[10, 11, 12],
+           [13, 14, 15]])
     >>> np.argmin(a)
     0
     >>> np.argmin(a, axis=0)
@@ -1088,12 +1157,12 @@ def argmin(a, axis=None, out=None):
     >>> ind
     (0, 0)
     >>> a[ind]
-    0
+    10
 
-    >>> b = np.arange(6)
-    >>> b[4] = 0
+    >>> b = np.arange(6) + 10
+    >>> b[4] = 10
     >>> b
-    array([0, 1, 2, 3, 0, 5])
+    array([10, 11, 12, 13, 10, 15])
     >>> np.argmin(b)  # Only the first occurrence is returned.
     0
 
@@ -1101,6 +1170,11 @@ def argmin(a, axis=None, out=None):
     return _wrapfunc(a, 'argmin', axis=axis, out=out)
 
 
+def _searchsorted_dispatcher(a, v, side=None, sorter=None):
+    return (a, v, sorter)
+
+
+@array_function_dispatch(_searchsorted_dispatcher)
 def searchsorted(a, v, side='left', sorter=None):
     """
     Find indices where elements should be inserted to maintain order.
@@ -1170,6 +1244,11 @@ def searchsorted(a, v, side='left', sorter=None):
     return _wrapfunc(a, 'searchsorted', v, side=side, sorter=sorter)
 
 
+def _resize_dispatcher(a, new_shape):
+    return (a,)
+
+
+@array_function_dispatch(_resize_dispatcher)
 def resize(a, new_shape):
     """
     Return a new array with the specified shape.
@@ -1243,6 +1322,11 @@ def resize(a, new_shape):
     return reshape(a, new_shape)
 
 
+def _squeeze_dispatcher(a, axis=None):
+    return (a,)
+
+
+@array_function_dispatch(_squeeze_dispatcher)
 def squeeze(a, axis=None):
     """
     Remove single-dimensional entries from the shape of an array.
@@ -1301,6 +1385,12 @@ def squeeze(a, axis=None):
     else:
         return squeeze(axis=axis)
 
+
+def _diagonal_dispatcher(a, offset=None, axis1=None, axis2=None):
+    return (a,)
+
+
+@array_function_dispatch(_diagonal_dispatcher)
 def diagonal(a, offset=0, axis1=0, axis2=1):
     """
     Return specified diagonals.
@@ -1415,6 +1505,12 @@ def diagonal(a, offset=0, axis1=0, axis2=1):
         return asanyarray(a).diagonal(offset=offset, axis1=axis1, axis2=axis2)
 
 
+def _trace_dispatcher(
+        a, offset=None, axis1=None, axis2=None, dtype=None, out=None):
+    return (a, out)
+
+
+@array_function_dispatch(_trace_dispatcher)
 def trace(a, offset=0, axis1=0, axis2=1, dtype=None, out=None):
     """
     Return the sum along diagonals of the array.
@@ -1478,6 +1574,11 @@ def trace(a, offset=0, axis1=0, axis2=1, dtype=None, out=None):
         return asanyarray(a).trace(offset=offset, axis1=axis1, axis2=axis2, dtype=dtype, out=out)
 
 
+def _ravel_dispatcher(a, order=None):
+    return (a,)
+
+
+@array_function_dispatch(_ravel_dispatcher)
 def ravel(a, order='C'):
     """Return a contiguous flattened array.
 
@@ -1584,6 +1685,11 @@ def ravel(a, order='C'):
         return asanyarray(a).ravel(order=order)
 
 
+def _nonzero_dispatcher(a):
+    return (a,)
+
+
+@array_function_dispatch(_nonzero_dispatcher)
 def nonzero(a):
     """
     Return the indices of the elements that are non-zero.
@@ -1670,6 +1776,11 @@ def nonzero(a):
     return _wrapfunc(a, 'nonzero')
 
 
+def _shape_dispatcher(a):
+    return (a,)
+
+
+@array_function_dispatch(_shape_dispatcher)
 def shape(a):
     """
     Return the shape of an array.
@@ -1715,6 +1826,11 @@ def shape(a):
     return result
 
 
+def _compress_dispatcher(condition, a, axis=None, out=None):
+    return (condition, a, out)
+
+
+@array_function_dispatch(_compress_dispatcher)
 def compress(condition, a, axis=None, out=None):
     """
     Return selected slices of an array along given axis.
@@ -1778,6 +1894,11 @@ def compress(condition, a, axis=None, out=None):
     return _wrapfunc(a, 'compress', condition, axis=axis, out=out)
 
 
+def _clip_dispatcher(a, a_min, a_max, out=None):
+    return (a, a_min, a_max)
+
+
+@array_function_dispatch(_clip_dispatcher)
 def clip(a, a_min, a_max, out=None):
     """
     Clip (limit) the values in an array.
@@ -1835,6 +1956,12 @@ def clip(a, a_min, a_max, out=None):
     return _wrapfunc(a, 'clip', a_min, a_max, out=out)
 
 
+def _sum_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None,
+                    initial=None):
+    return (a, out)
+
+
+@array_function_dispatch(_sum_dispatcher)
 def sum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue, initial=np._NoValue):
     """
     Sum of array elements over a given axis.
@@ -1934,7 +2061,7 @@ def sum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue, initial=np._No
         # 2018-02-25, 1.15.0
         warnings.warn(
             "Calling np.sum(generator) is deprecated, and in the future will give a different result. "
-            "Use np.sum(np.from_iter(generator)) or the python sum builtin instead.",
+            "Use np.sum(np.fromiter(generator)) or the python sum builtin instead.",
             DeprecationWarning, stacklevel=2)
 
         res = _sum_(a)
@@ -1947,6 +2074,11 @@ def sum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue, initial=np._No
                           initial=initial)
 
 
+def _any_dispatcher(a, axis=None, out=None, keepdims=None):
+    return (a, out)
+
+
+@array_function_dispatch(_any_dispatcher)
 def any(a, axis=None, out=None, keepdims=np._NoValue):
     """
     Test whether any array element along a given axis evaluates to True.
@@ -2030,6 +2162,11 @@ def any(a, axis=None, out=None, keepdims=np._NoValue):
     return _wrapreduction(a, np.logical_or, 'any', axis, None, out, keepdims=keepdims)
 
 
+def _all_dispatcher(a, axis=None, out=None, keepdims=None):
+    return (a, out)
+
+
+@array_function_dispatch(_all_dispatcher)
 def all(a, axis=None, out=None, keepdims=np._NoValue):
     """
     Test whether all array elements along a given axis evaluate to True.
@@ -2106,6 +2243,11 @@ def all(a, axis=None, out=None, keepdims=np._NoValue):
     return _wrapreduction(a, np.logical_and, 'all', axis, None, out, keepdims=keepdims)
 
 
+def _cumsum_dispatcher(a, axis=None, dtype=None, out=None):
+    return (a, out)
+
+
+@array_function_dispatch(_cumsum_dispatcher)
 def cumsum(a, axis=None, dtype=None, out=None):
     """
     Return the cumulative sum of the elements along a given axis.
@@ -2173,6 +2315,11 @@ def cumsum(a, axis=None, dtype=None, out=None):
     return _wrapfunc(a, 'cumsum', axis=axis, dtype=dtype, out=out)
 
 
+def _ptp_dispatcher(a, axis=None, out=None, keepdims=None):
+    return (a, out)
+
+
+@array_function_dispatch(_ptp_dispatcher)
 def ptp(a, axis=None, out=None, keepdims=np._NoValue):
     """
     Range of values (maximum - minimum) along an axis.
@@ -2241,6 +2388,11 @@ def ptp(a, axis=None, out=None, keepdims=np._NoValue):
     return _methods._ptp(a, axis=axis, out=out, **kwargs)
 
 
+def _amax_dispatcher(a, axis=None, out=None, keepdims=None, initial=None):
+    return (a, out)
+
+
+@array_function_dispatch(_amax_dispatcher)
 def amax(a, axis=None, out=None, keepdims=np._NoValue, initial=np._NoValue):
     """
     Return the maximum of an array or maximum along an axis.
@@ -2351,6 +2503,11 @@ def amax(a, axis=None, out=None, keepdims=np._NoValue, initial=np._NoValue):
                           initial=initial)
 
 
+def _amin_dispatcher(a, axis=None, out=None, keepdims=None, initial=None):
+    return (a, out)
+
+
+@array_function_dispatch(_amin_dispatcher)
 def amin(a, axis=None, out=None, keepdims=np._NoValue, initial=np._NoValue):
     """
     Return the minimum of an array or minimum along an axis.
@@ -2459,6 +2616,11 @@ def amin(a, axis=None, out=None, keepdims=np._NoValue, initial=np._NoValue):
                           initial=initial)
 
 
+def _alen_dispathcer(a):
+    return (a,)
+
+
+@array_function_dispatch(_alen_dispathcer)
 def alen(a):
     """
     Return the length of the first dimension of the input array.
@@ -2492,6 +2654,12 @@ def alen(a):
         return len(array(a, ndmin=1))
 
 
+def _prod_dispatcher(
+        a, axis=None, dtype=None, out=None, keepdims=None, initial=None):
+    return (a, out)
+
+
+@array_function_dispatch(_prod_dispatcher)
 def prod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue, initial=np._NoValue):
     """
     Return the product of array elements over a given axis.
@@ -2602,6 +2770,11 @@ def prod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue, initial=np._N
                           initial=initial)
 
 
+def _cumprod_dispatcher(a, axis=None, dtype=None, out=None):
+    return (a, out)
+
+
+@array_function_dispatch(_cumprod_dispatcher)
 def cumprod(a, axis=None, dtype=None, out=None):
     """
     Return the cumulative product of elements along a given axis.
@@ -2665,6 +2838,11 @@ def cumprod(a, axis=None, dtype=None, out=None):
     return _wrapfunc(a, 'cumprod', axis=axis, dtype=dtype, out=out)
 
 
+def _ndim_dispatcher(a):
+    return (a,)
+
+
+@array_function_dispatch(_ndim_dispatcher)
 def ndim(a):
     """
     Return the number of dimensions of an array.
@@ -2702,6 +2880,11 @@ def ndim(a):
         return asarray(a).ndim
 
 
+def _size_dispatcher(a, axis=None):
+    return (a,)
+
+
+@array_function_dispatch(_size_dispatcher)
 def size(a, axis=None):
     """
     Return the number of elements along a given axis.
@@ -2748,6 +2931,11 @@ def size(a, axis=None):
             return asarray(a).shape[axis]
 
 
+def _around_dispatcher(a, decimals=None, out=None):
+    return (a, out)
+
+
+@array_function_dispatch(_around_dispatcher)
 def around(a, decimals=0, out=None):
     """
     Evenly round to the given number of decimals.
@@ -2817,20 +3005,11 @@ def around(a, decimals=0, out=None):
     return _wrapfunc(a, 'round', decimals=decimals, out=out)
 
 
-def round_(a, decimals=0, out=None):
-    """
-    Round an array to the given number of decimals.
-
-    Refer to `around` for full documentation.
-
-    See Also
-    --------
-    around : equivalent function
-
-    """
-    return around(a, decimals=decimals, out=out)
+def _mean_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None):
+    return (a, out)
 
 
+@array_function_dispatch(_mean_dispatcher)
 def mean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
     """
     Compute the arithmetic mean along the specified axis.
@@ -2937,6 +3116,12 @@ def mean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
                           out=out, **kwargs)
 
 
+def _std_dispatcher(
+        a, axis=None, dtype=None, out=None, ddof=None, keepdims=None):
+    return (a, out)
+
+
+@array_function_dispatch(_std_dispatcher)
 def std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
     """
     Compute the standard deviation along the specified axis.
@@ -3055,6 +3240,12 @@ def std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
                          **kwargs)
 
 
+def _var_dispatcher(
+        a, axis=None, dtype=None, out=None, ddof=None, keepdims=None):
+    return (a, out)
+
+
+@array_function_dispatch(_var_dispatcher)
 def var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
     """
     Compute the variance along the specified axis.
@@ -3177,6 +3368,19 @@ def var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
 # Aliases of other functions. These have their own definitions only so that
 # they can have unique docstrings.
 
+@array_function_dispatch(_around_dispatcher)
+def round_(a, decimals=0, out=None):
+    """
+    Round an array to the given number of decimals.
+
+    See Also
+    --------
+    around : equivalent function; see for details.
+    """
+    return around(a, decimals=decimals, out=out)
+
+
+@array_function_dispatch(_prod_dispatcher, verify=False)
 def product(*args, **kwargs):
     """
     Return the product of array elements over a given axis.
@@ -3188,6 +3392,7 @@ def product(*args, **kwargs):
     return prod(*args, **kwargs)
 
 
+@array_function_dispatch(_cumprod_dispatcher, verify=False)
 def cumproduct(*args, **kwargs):
     """
     Return the cumulative product over the given axis.
@@ -3199,6 +3404,7 @@ def cumproduct(*args, **kwargs):
     return cumprod(*args, **kwargs)
 
 
+@array_function_dispatch(_any_dispatcher, verify=False)
 def sometrue(*args, **kwargs):
     """
     Check whether some values are true.
@@ -3212,6 +3418,7 @@ def sometrue(*args, **kwargs):
     return any(*args, **kwargs)
 
 
+@array_function_dispatch(_all_dispatcher, verify=False)
 def alltrue(*args, **kwargs):
     """
     Check if all elements of input array are true.
@@ -3223,6 +3430,7 @@ def alltrue(*args, **kwargs):
     return all(*args, **kwargs)
 
 
+@array_function_dispatch(_ndim_dispatcher)
 def rank(a):
     """
     Return the number of dimensions of an array.
diff --git a/numpy/core/getlimits.py b/numpy/core/getlimits.py
index e450a660d..0e3c58793 100644
--- a/numpy/core/getlimits.py
+++ b/numpy/core/getlimits.py
@@ -513,7 +513,7 @@ class iinfo(object):
         self.bits = self.dtype.itemsize * 8
         self.key = "%s%d" % (self.kind, self.bits)
         if self.kind not in 'iu':
-            raise ValueError("Invalid integer data type.")
+            raise ValueError("Invalid integer data type %r." % (self.kind,))
 
     def min(self):
         """Minimum value of given dtype."""
diff --git a/numpy/core/include/numpy/ndarrayobject.h b/numpy/core/include/numpy/ndarrayobject.h
index 12fc7098c..45f008b1d 100644
--- a/numpy/core/include/numpy/ndarrayobject.h
+++ b/numpy/core/include/numpy/ndarrayobject.h
@@ -5,13 +5,7 @@
 #ifndef NPY_NDARRAYOBJECT_H
 #define NPY_NDARRAYOBJECT_H
 #ifdef __cplusplus
-#define CONFUSE_EMACS {
-#define CONFUSE_EMACS2 }
-extern "C" CONFUSE_EMACS
-#undef CONFUSE_EMACS
-#undef CONFUSE_EMACS2
-/* ... otherwise a semi-smart identer (like emacs) tries to indent
-       everything when you're typing */
+extern "C" {
 #endif
 
 #include <Python.h>
diff --git a/numpy/core/include/numpy/ndarraytypes.h b/numpy/core/include/numpy/ndarraytypes.h
index ec2893b21..da006909a 100644
--- a/numpy/core/include/numpy/ndarraytypes.h
+++ b/numpy/core/include/numpy/ndarraytypes.h
@@ -1670,7 +1670,7 @@ PyArray_CLEARFLAGS(PyArrayObject *arr, int flags)
 #define PyTypeNum_ISOBJECT(type) ((type) == NPY_OBJECT)
 
 
-#define PyDataType_ISBOOL(obj) PyTypeNum_ISBOOL(_PyADt(obj))
+#define PyDataType_ISBOOL(obj) PyTypeNum_ISBOOL(((PyArray_Descr*)(obj))->type_num)
 #define PyDataType_ISUNSIGNED(obj) PyTypeNum_ISUNSIGNED(((PyArray_Descr*)(obj))->type_num)
 #define PyDataType_ISSIGNED(obj) PyTypeNum_ISSIGNED(((PyArray_Descr*)(obj))->type_num)
 #define PyDataType_ISINTEGER(obj) PyTypeNum_ISINTEGER(((PyArray_Descr*)(obj))->type_num )
diff --git a/numpy/core/include/numpy/npy_1_7_deprecated_api.h b/numpy/core/include/numpy/npy_1_7_deprecated_api.h
index 4c318bc47..76b57b748 100644
--- a/numpy/core/include/numpy/npy_1_7_deprecated_api.h
+++ b/numpy/core/include/numpy/npy_1_7_deprecated_api.h
@@ -9,11 +9,11 @@
 #define _WARN___STR2__(x) #x
 #define _WARN___STR1__(x) _WARN___STR2__(x)
 #define _WARN___LOC__ __FILE__ "(" _WARN___STR1__(__LINE__) ") : Warning Msg: "
-#pragma message(_WARN___LOC__"Using deprecated NumPy API, disable it by " \
-                         "#defining NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION")
+#pragma message(_WARN___LOC__"Using deprecated NumPy API, disable it with " \
+                         "#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION")
 #elif defined(__GNUC__)
-#warning "Using deprecated NumPy API, disable it by " \
-         "#defining NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION"
+#warning "Using deprecated NumPy API, disable it with " \
+         "#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION"
 #endif
 /* TODO: How to do this warning message for other compilers? */
 
diff --git a/numpy/core/include/numpy/npy_3kcompat.h b/numpy/core/include/numpy/npy_3kcompat.h
index 2d0ccd3b9..a3c69f44e 100644
--- a/numpy/core/include/numpy/npy_3kcompat.h
+++ b/numpy/core/include/numpy/npy_3kcompat.h
@@ -69,6 +69,16 @@ static NPY_INLINE int PyInt_Check(PyObject *op) {
     #define Npy_EnterRecursiveCall(x) Py_EnterRecursiveCall(x)
 #endif
 
+/* Py_SETREF was added in 3.5.2, and only if Py_LIMITED_API is absent */
+#if PY_VERSION_HEX < 0x03050200
+    #define Py_SETREF(op, op2)                      \
+        do {                                        \
+            PyObject *_py_tmp = (PyObject *)(op);   \
+            (op) = (op2);                           \
+            Py_DECREF(_py_tmp);                     \
+        } while (0)
+#endif
+
 /*
  * PyString -> PyBytes
  */
@@ -141,20 +151,14 @@ static NPY_INLINE int PyInt_Check(PyObject *op) {
 static NPY_INLINE void
 PyUnicode_ConcatAndDel(PyObject **left, PyObject *right)
 {
-    PyObject *newobj;
-    newobj = PyUnicode_Concat(*left, right);
-    Py_DECREF(*left);
+    Py_SETREF(*left, PyUnicode_Concat(*left, right));
     Py_DECREF(right);
-    *left = newobj;
 }
 
 static NPY_INLINE void
 PyUnicode_Concat2(PyObject **left, PyObject *right)
 {
-    PyObject *newobj;
-    newobj = PyUnicode_Concat(*left, right);
-    Py_DECREF(*left);
-    *left = newobj;
+    Py_SETREF(*left, PyUnicode_Concat(*left, right));
 }
 
 /*
@@ -378,6 +382,38 @@ npy_PyFile_CloseFile(PyObject *file)
     return 0;
 }
 
+
+/* This is a copy of _PyErr_ChainExceptions, with:
+ *  - a minimal implementation for python 2
+ *  - __cause__ used instead of __context__
+ */
+static NPY_INLINE void
+npy_PyErr_ChainExceptionsCause(PyObject *exc, PyObject *val, PyObject *tb)
+{
+    if (exc == NULL)
+        return;
+
+    if (PyErr_Occurred()) {
+        /* only py3 supports this anyway */
+        #ifdef NPY_PY3K
+            PyObject *exc2, *val2, *tb2;
+            PyErr_Fetch(&exc2, &val2, &tb2);
+            PyErr_NormalizeException(&exc, &val, &tb);
+            if (tb != NULL) {
+                PyException_SetTraceback(val, tb);
+                Py_DECREF(tb);
+            }
+            Py_DECREF(exc);
+            PyErr_NormalizeException(&exc2, &val2, &tb2);
+            PyException_SetCause(val2, val);
+            PyErr_Restore(exc2, val2, tb2);
+        #endif
+    }
+    else {
+        PyErr_Restore(exc, val, tb);
+    }
+}
+
 /*
  * PyObject_Cmp
  */
diff --git a/numpy/core/include/numpy/npy_common.h b/numpy/core/include/numpy/npy_common.h
index 5faff4385..64aaaacff 100644
--- a/numpy/core/include/numpy/npy_common.h
+++ b/numpy/core/include/numpy/npy_common.h
@@ -14,7 +14,7 @@
  * using static inline modifiers when defining npy_math functions
  * allows the compiler to make optimizations when possible
  */
-#if NPY_INTERNAL_BUILD
+#if defined(NPY_INTERNAL_BUILD) && NPY_INTERNAL_BUILD
 #ifndef NPY_INLINE_MATH
 #define NPY_INLINE_MATH 1
 #endif
diff --git a/numpy/core/include/numpy/ufuncobject.h b/numpy/core/include/numpy/ufuncobject.h
index 4b1b3d325..85f8a6c08 100644
--- a/numpy/core/include/numpy/ufuncobject.h
+++ b/numpy/core/include/numpy/ufuncobject.h
@@ -209,9 +209,32 @@ typedef struct _tagPyUFuncObject {
          * set by nditer object.
          */
         npy_uint32 iter_flags;
+
+        /* New in NPY_API_VERSION 0x0000000D and above */
+
+        /*
+         * for each core_num_dim_ix distinct dimension names,
+         * the possible "frozen" size (-1 if not frozen).
+         */
+        npy_intp *core_dim_sizes;
+
+        /*
+         * for each distinct core dimension, a set of UFUNC_CORE_DIM* flags
+         */
+        npy_uint32 *core_dim_flags;
+
+
+
 } PyUFuncObject;
 
 #include "arrayobject.h"
+/* Generalized ufunc; 0x0001 reserved for possible use as CORE_ENABLED */
+/* the core dimension's size will be determined by the operands. */
+#define UFUNC_CORE_DIM_SIZE_INFERRED 0x0002
+/* the core dimension may be absent */
+#define UFUNC_CORE_DIM_CAN_IGNORE 0x0004
+/* flags inferred during execution */
+#define UFUNC_CORE_DIM_MISSING 0x00040000
 
 #define UFUNC_ERR_IGNORE 0
 #define UFUNC_ERR_WARN   1
@@ -314,22 +337,6 @@ typedef struct _loop1d_info {
                                 &(arg)->first))) \
                 goto fail;} while (0)
 
-
-/* keep in sync with ieee754.c.src */
-#if defined(sun) || defined(__BSD__) || defined(__OpenBSD__) || \
-      (defined(__FreeBSD__) && (__FreeBSD_version < 502114)) || \
-      defined(__NetBSD__) || \
-      defined(__GLIBC__) || defined(__APPLE__) || \
-      defined(__CYGWIN__) || defined(__MINGW32__) || \
-      (defined(__FreeBSD__) && (__FreeBSD_version >= 502114)) || \
-      defined(_AIX) || \
-      defined(_MSC_VER) || \
-      defined(__osf__) && defined(__alpha)
-#else
-#define NO_FLOATING_POINT_SUPPORT
-#endif
-
-
 /*
  * THESE MACROS ARE DEPRECATED.
  * Use npy_set_floatstatus_* in the npymath library.
diff --git a/numpy/core/memmap.py b/numpy/core/memmap.py
index 536fa6094..8269f537f 100644
--- a/numpy/core/memmap.py
+++ b/numpy/core/memmap.py
@@ -2,7 +2,9 @@ from __future__ import division, absolute_import, print_function
 
 import numpy as np
 from .numeric import uint8, ndarray, dtype
-from numpy.compat import long, basestring, is_pathlib_path
+from numpy.compat import (
+    long, basestring, is_pathlib_path, contextlib_nullcontext
+)
 
 __all__ = ['memmap']
 
@@ -211,78 +213,72 @@ class memmap(ndarray):
                 raise ValueError("mode must be one of %s" %
                                  (valid_filemodes + list(mode_equivalents.keys())))
 
-        if hasattr(filename, 'read'):
-            fid = filename
-            own_file = False
-        elif is_pathlib_path(filename):
-            fid = filename.open((mode == 'c' and 'r' or mode)+'b')
-            own_file = True
-        else:
-            fid = open(filename, (mode == 'c' and 'r' or mode)+'b')
-            own_file = True
-
-        if (mode == 'w+') and shape is None:
+        if mode == 'w+' and shape is None:
             raise ValueError("shape must be given")
 
-        fid.seek(0, 2)
-        flen = fid.tell()
-        descr = dtypedescr(dtype)
-        _dbytes = descr.itemsize
-
-        if shape is None:
-            bytes = flen - offset
-            if (bytes % _dbytes):
-                fid.close()
-                raise ValueError("Size of available data is not a "
-                        "multiple of the data-type size.")
-            size = bytes // _dbytes
-            shape = (size,)
-        else:
-            if not isinstance(shape, tuple):
-                shape = (shape,)
-            size = np.intp(1)  # avoid default choice of np.int_, which might overflow
-            for k in shape:
-                size *= k
-
-        bytes = long(offset + size*_dbytes)
-
-        if mode == 'w+' or (mode == 'r+' and flen < bytes):
-            fid.seek(bytes - 1, 0)
-            fid.write(b'\0')
-            fid.flush()
-
-        if mode == 'c':
-            acc = mmap.ACCESS_COPY
-        elif mode == 'r':
-            acc = mmap.ACCESS_READ
-        else:
-            acc = mmap.ACCESS_WRITE
-
-        start = offset - offset % mmap.ALLOCATIONGRANULARITY
-        bytes -= start
-        array_offset = offset - start
-        mm = mmap.mmap(fid.fileno(), bytes, access=acc, offset=start)
-
-        self = ndarray.__new__(subtype, shape, dtype=descr, buffer=mm,
-                               offset=array_offset, order=order)
-        self._mmap = mm
-        self.offset = offset
-        self.mode = mode
-
-        if isinstance(filename, basestring):
-            self.filename = os.path.abspath(filename)
+        if hasattr(filename, 'read'):
+            f_ctx = contextlib_nullcontext(filename)
         elif is_pathlib_path(filename):
-            self.filename = filename.resolve()
-        # py3 returns int for TemporaryFile().name
-        elif (hasattr(filename, "name") and
-              isinstance(filename.name, basestring)):
-            self.filename = os.path.abspath(filename.name)
-        # same as memmap copies (e.g. memmap + 1)
+            f_ctx = filename.open(('r' if mode == 'c' else mode)+'b')
         else:
-            self.filename = None
-
-        if own_file:
-            fid.close()
+            f_ctx = open(filename, ('r' if mode == 'c' else mode)+'b')
+
+        with f_ctx as fid:
+            fid.seek(0, 2)
+            flen = fid.tell()
+            descr = dtypedescr(dtype)
+            _dbytes = descr.itemsize
+
+            if shape is None:
+                bytes = flen - offset
+                if bytes % _dbytes:
+                    raise ValueError("Size of available data is not a "
+                            "multiple of the data-type size.")
+                size = bytes // _dbytes
+                shape = (size,)
+            else:
+                if not isinstance(shape, tuple):
+                    shape = (shape,)
+                size = np.intp(1)  # avoid default choice of np.int_, which might overflow
+                for k in shape:
+                    size *= k
+
+            bytes = long(offset + size*_dbytes)
+
+            if mode == 'w+' or (mode == 'r+' and flen < bytes):
+                fid.seek(bytes - 1, 0)
+                fid.write(b'\0')
+                fid.flush()
+
+            if mode == 'c':
+                acc = mmap.ACCESS_COPY
+            elif mode == 'r':
+                acc = mmap.ACCESS_READ
+            else:
+                acc = mmap.ACCESS_WRITE
+
+            start = offset - offset % mmap.ALLOCATIONGRANULARITY
+            bytes -= start
+            array_offset = offset - start
+            mm = mmap.mmap(fid.fileno(), bytes, access=acc, offset=start)
+
+            self = ndarray.__new__(subtype, shape, dtype=descr, buffer=mm,
+                                   offset=array_offset, order=order)
+            self._mmap = mm
+            self.offset = offset
+            self.mode = mode
+
+            if isinstance(filename, basestring):
+                self.filename = os.path.abspath(filename)
+            elif is_pathlib_path(filename):
+                self.filename = filename.resolve()
+            # py3 returns int for TemporaryFile().name
+            elif (hasattr(filename, "name") and
+                  isinstance(filename.name, basestring)):
+                self.filename = os.path.abspath(filename.name)
+            # same as memmap copies (e.g. memmap + 1)
+            else:
+                self.filename = None
 
         return self
 
diff --git a/numpy/core/multiarray.py b/numpy/core/multiarray.py
index 8b07bccf2..4dbd3b0fd 100644
--- a/numpy/core/multiarray.py
+++ b/numpy/core/multiarray.py
@@ -3,32 +3,1151 @@ Create the numpy.core.multiarray namespace for backward compatibility. In v1.16
 the multiarray and umath c-extension modules were merged into a single
 _multiarray_umath extension module. So we replicate the old namespace
 by importing from the extension module.
+
 """
 
 from . import _multiarray_umath
+from .overrides import array_function_dispatch
+import numpy as np
 from numpy.core._multiarray_umath import *
-from numpy.core._multiarray_umath import (_fastCopyAndTranspose, _flagdict, _insert,
-     _reconstruct, _vec_string, _ARRAY_API, _monotonicity)
+from numpy.core._multiarray_umath import (
+    _fastCopyAndTranspose, _flagdict, _insert, _reconstruct, _vec_string,
+    _ARRAY_API, _monotonicity
+    )
 
-__all__ = ['_ARRAY_API', 'ALLOW_THREADS', 'BUFSIZE', 'CLIP', 'DATETIMEUNITS',
+__all__ = [
+    '_ARRAY_API', 'ALLOW_THREADS', 'BUFSIZE', 'CLIP', 'DATETIMEUNITS',
     'ITEM_HASOBJECT', 'ITEM_IS_POINTER', 'LIST_PICKLE', 'MAXDIMS',
     'MAY_SHARE_BOUNDS', 'MAY_SHARE_EXACT', 'NEEDS_INIT', 'NEEDS_PYAPI',
-    'RAISE', 'USE_GETITEM', 'USE_SETITEM', 'WRAP',
-    '_fastCopyAndTranspose', '_flagdict', '_insert', '_reconstruct',
-    '_vec_string', '_monotonicity',
-    'add_docstring', 'arange', 'array', 'bincount', 'broadcast', 'busday_count',
-    'busday_offset', 'busdaycalendar', 'can_cast', 'compare_chararrays',
-    'concatenate', 'copyto', 'correlate', 'correlate2', 'count_nonzero',
-    'c_einsum', 'datetime_as_string', 'datetime_data', 'digitize', 'dot',
-    'dragon4_positional', 'dragon4_scientific', 'dtype', 'empty', 'empty_like',
-    'error', 'flagsobj', 'flatiter', 'format_longfloat', 'frombuffer',
-    'fromfile', 'fromiter', 'fromstring', 'getbuffer', 'inner', 'int_asbuffer',
-    'interp', 'interp_complex', 'is_busday', 'lexsort', 'matmul',
-    'may_share_memory', 'min_scalar_type', 'ndarray', 'nditer', 'nested_iters',
-    'newbuffer', 'normalize_axis_index', 'packbits', 'promote_types',
-    'putmask', 'ravel_multi_index', 'result_type', 'scalar',
+    'RAISE', 'USE_GETITEM', 'USE_SETITEM', 'WRAP', '_fastCopyAndTranspose',
+    '_flagdict', '_insert', '_reconstruct', '_vec_string', '_monotonicity',
+    'add_docstring', 'arange', 'array', 'bincount', 'broadcast',
+    'busday_count', 'busday_offset', 'busdaycalendar', 'can_cast',
+    'compare_chararrays', 'concatenate', 'copyto', 'correlate', 'correlate2',
+    'count_nonzero', 'c_einsum', 'datetime_as_string', 'datetime_data',
+    'digitize', 'dot', 'dragon4_positional', 'dragon4_scientific', 'dtype',
+    'empty', 'empty_like', 'error', 'flagsobj', 'flatiter', 'format_longfloat',
+    'frombuffer', 'fromfile', 'fromiter', 'fromstring', 'getbuffer', 'inner',
+    'int_asbuffer', 'interp', 'interp_complex', 'is_busday', 'lexsort',
+    'matmul', 'may_share_memory', 'min_scalar_type', 'ndarray', 'nditer',
+    'nested_iters', 'newbuffer', 'normalize_axis_index', 'packbits',
+    'promote_types', 'putmask', 'ravel_multi_index', 'result_type', 'scalar',
     'set_datetimeparse_function', 'set_legacy_print_mode', 'set_numeric_ops',
     'set_string_function', 'set_typeDict', 'shares_memory', 'test_interrupt',
     'tracemalloc_domain', 'typeinfo', 'unpackbits', 'unravel_index', 'vdot',
     'where', 'zeros']
 
+
+def _empty_like_dispatcher(prototype, dtype=None, order=None, subok=None):
+    return (prototype,)
+
+
+@array_function_dispatch(_empty_like_dispatcher)
+def empty_like(prototype, dtype=None, order='K', subok=True):
+    """Return a new array with the same shape and type as a given array.
+
+    Parameters
+    ----------
+    prototype : array_like
+        The shape and data-type of `prototype` define these same attributes
+        of the returned array.
+    dtype : data-type, optional
+        Overrides the data type of the result.
+
+        .. versionadded:: 1.6.0
+    order : {'C', 'F', 'A', or 'K'}, optional
+        Overrides the memory layout of the result. 'C' means C-order,
+        'F' means F-order, 'A' means 'F' if ``prototype`` is Fortran
+        contiguous, 'C' otherwise. 'K' means match the layout of ``prototype``
+        as closely as possible.
+
+        .. versionadded:: 1.6.0
+    subok : bool, optional.
+        If True, then the newly created array will use the sub-class
+        type of 'a', otherwise it will be a base-class array. Defaults
+        to True.
+
+    Returns
+    -------
+    out : ndarray
+        Array of uninitialized (arbitrary) data with the same
+        shape and type as `prototype`.
+
+    See Also
+    --------
+    ones_like : Return an array of ones with shape and type of input.
+    zeros_like : Return an array of zeros with shape and type of input.
+    full_like : Return a new array with shape of input filled with value.
+    empty : Return a new uninitialized array.
+
+    Notes
+    -----
+    This function does *not* initialize the returned array; to do that use
+    `zeros_like` or `ones_like` instead.  It may be marginally faster than
+    the functions that do set the array values.
+
+    Examples
+    --------
+    >>> a = ([1,2,3], [4,5,6])                         # a is array-like
+    >>> np.empty_like(a)
+    array([[-1073741821, -1073741821,           3],    #random
+           [          0,           0, -1073741821]])
+    >>> a = np.array([[1., 2., 3.],[4.,5.,6.]])
+    >>> np.empty_like(a)
+    array([[ -2.00000715e+000,   1.48219694e-323,  -2.00000572e+000],#random
+           [  4.38791518e-305,  -2.00000715e+000,   4.17269252e-309]])
+
+    """
+    return _multiarray_umath.empty_like(prototype, dtype, order, subok)
+
+
+def _concatenate_dispatcher(arrays, axis=None, out=None):
+    for array in arrays:
+        yield array
+    yield out
+
+
+@array_function_dispatch(_concatenate_dispatcher)
+def concatenate(arrays, axis=0, out=None):
+    """
+    concatenate((a1, a2, ...), axis=0, out=None)
+
+    Join a sequence of arrays along an existing axis.
+
+    Parameters
+    ----------
+    a1, a2, ... : sequence of array_like
+        The arrays must have the same shape, except in the dimension
+        corresponding to `axis` (the first, by default).
+    axis : int, optional
+        The axis along which the arrays will be joined.  If axis is None,
+        arrays are flattened before use.  Default is 0.
+    out : ndarray, optional
+        If provided, the destination to place the result. The shape must be
+        correct, matching that of what concatenate would have returned if no
+        out argument were specified.
+
+    Returns
+    -------
+    res : ndarray
+        The concatenated array.
+
+    See Also
+    --------
+    ma.concatenate : Concatenate function that preserves input masks.
+    array_split : Split an array into multiple sub-arrays of equal or
+                  near-equal size.
+    split : Split array into a list of multiple sub-arrays of equal size.
+    hsplit : Split array into multiple sub-arrays horizontally (column wise)
+    vsplit : Split array into multiple sub-arrays vertically (row wise)
+    dsplit : Split array into multiple sub-arrays along the 3rd axis (depth).
+    stack : Stack a sequence of arrays along a new axis.
+    hstack : Stack arrays in sequence horizontally (column wise)
+    vstack : Stack arrays in sequence vertically (row wise)
+    dstack : Stack arrays in sequence depth wise (along third dimension)
+    block : Assemble arrays from blocks.
+
+    Notes
+    -----
+    When one or more of the arrays to be concatenated is a MaskedArray,
+    this function will return a MaskedArray object instead of an ndarray,
+    but the input masks are *not* preserved. In cases where a MaskedArray
+    is expected as input, use the ma.concatenate function from the masked
+    array module instead.
+
+    Examples
+    --------
+    >>> a = np.array([[1, 2], [3, 4]])
+    >>> b = np.array([[5, 6]])
+    >>> np.concatenate((a, b), axis=0)
+    array([[1, 2],
+           [3, 4],
+           [5, 6]])
+    >>> np.concatenate((a, b.T), axis=1)
+    array([[1, 2, 5],
+           [3, 4, 6]])
+    >>> np.concatenate((a, b), axis=None)
+    array([1, 2, 3, 4, 5, 6])
+
+    This function will not preserve masking of MaskedArray inputs.
+
+    >>> a = np.ma.arange(3)
+    >>> a[1] = np.ma.masked
+    >>> b = np.arange(2, 5)
+    >>> a
+    masked_array(data=[0, --, 2],
+                 mask=[False,  True, False],
+           fill_value=999999)
+    >>> b
+    array([2, 3, 4])
+    >>> np.concatenate([a, b])
+    masked_array(data=[0, 1, 2, 2, 3, 4],
+                 mask=False,
+           fill_value=999999)
+    >>> np.ma.concatenate([a, b])
+    masked_array(data=[0, --, 2, 2, 3, 4],
+                 mask=[False,  True, False, False, False, False],
+           fill_value=999999)
+
+    """
+    return _multiarray_umath.concatenate(arrays, axis, out)
+
+
+def _inner_dispatcher(a, b):
+    return (a, b)
+
+
+@array_function_dispatch(_inner_dispatcher)
+def inner(a, b):
+    """
+    Inner product of two arrays.
+
+    Ordinary inner product of vectors for 1-D arrays (without complex
+    conjugation), in higher dimensions a sum product over the last axes.
+
+    Parameters
+    ----------
+    a, b : array_like
+        If `a` and `b` are nonscalar, their last dimensions must match.
+
+    Returns
+    -------
+    out : ndarray
+        `out.shape = a.shape[:-1] + b.shape[:-1]`
+
+    Raises
+    ------
+    ValueError
+        If the last dimension of `a` and `b` has different size.
+
+    See Also
+    --------
+    tensordot : Sum products over arbitrary axes.
+    dot : Generalised matrix product, using second last dimension of `b`.
+    einsum : Einstein summation convention.
+
+    Notes
+    -----
+    For vectors (1-D arrays) it computes the ordinary inner-product::
+
+        np.inner(a, b) = sum(a[:]*b[:])
+
+    More generally, if `ndim(a) = r > 0` and `ndim(b) = s > 0`::
+
+        np.inner(a, b) = np.tensordot(a, b, axes=(-1,-1))
+
+    or explicitly::
+
+        np.inner(a, b)[i0,...,ir-1,j0,...,js-1]
+             = sum(a[i0,...,ir-1,:]*b[j0,...,js-1,:])
+
+    In addition `a` or `b` may be scalars, in which case::
+
+       np.inner(a,b) = a*b
+
+    Examples
+    --------
+    Ordinary inner product for vectors:
+
+    >>> a = np.array([1,2,3])
+    >>> b = np.array([0,1,0])
+    >>> np.inner(a, b)
+    2
+
+    A multidimensional example:
+
+    >>> a = np.arange(24).reshape((2,3,4))
+    >>> b = np.arange(4)
+    >>> np.inner(a, b)
+    array([[ 14,  38,  62],
+           [ 86, 110, 134]])
+
+    An example where `b` is a scalar:
+
+    >>> np.inner(np.eye(2), 7)
+    array([[ 7.,  0.],
+           [ 0.,  7.]])
+
+    """
+    return _multiarray_umath.inner(a, b)
+
+
+def _where_dispatcher(condition, x=None, y=None):
+    return (condition, x, y)
+
+
+@array_function_dispatch(_where_dispatcher)
+def where(condition, x=np._NoValue, y=np._NoValue):
+    """
+    where(condition, [x, y])
+
+    Return elements chosen from `x` or `y` depending on `condition`.
+
+    .. note::
+        When only `condition` is provided, this function is a shorthand for
+        ``np.asarray(condition).nonzero()``. Using `nonzero` directly should be
+        preferred, as it behaves correctly for subclasses. The rest of this
+        documentation covers only the case where all three arguments are
+        provided.
+
+    Parameters
+    ----------
+    condition : array_like, bool
+        Where True, yield `x`, otherwise yield `y`.
+    x, y : array_like
+        Values from which to choose. `x`, `y` and `condition` need to be
+        broadcastable to some shape.
+
+    Returns
+    -------
+    out : ndarray
+        An array with elements from `x` where `condition` is True, and elements
+        from `y` elsewhere.
+
+    See Also
+    --------
+    choose
+    nonzero : The function that is called when x and y are omitted
+
+    Notes
+    -----
+    If all the arrays are 1-D, `where` is equivalent to::
+
+        [xv if c else yv
+         for c, xv, yv in zip(condition, x, y)]
+
+    Examples
+    --------
+    >>> a = np.arange(10)
+    >>> a
+    array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
+    >>> np.where(a < 5, a, 10*a)
+    array([ 0,  1,  2,  3,  4, 50, 60, 70, 80, 90])
+
+    This can be used on multidimensional arrays too:
+
+    >>> np.where([[True, False], [True, True]],
+    ...          [[1, 2], [3, 4]],
+    ...          [[9, 8], [7, 6]])
+    array([[1, 8],
+           [3, 4]])
+
+    The shapes of x, y, and the condition are broadcast together:
+
+    >>> x, y = np.ogrid[:3, :4]
+    >>> np.where(x < y, x, 10 + y)  # both x and 10+y are broadcast
+    array([[10,  0,  0,  0],
+           [10, 11,  1,  1],
+           [10, 11, 12,  2]])
+
+    >>> a = np.array([[0, 1, 2],
+    ...               [0, 2, 4],
+    ...               [0, 3, 6]])
+    >>> np.where(a < 4, a, -1)  # -1 is broadcast
+    array([[ 0,  1,  2],
+           [ 0,  2, -1],
+           [ 0,  3, -1]])
+    """
+    # _multiarray_umath.where only accepts positional arguments
+    args = tuple(a for a in (x, y) if a is not np._NoValue)
+    return _multiarray_umath.where(condition, *args)
+
+
+def _lexsort_dispatcher(keys, axis=None):
+    if isinstance(keys, tuple):
+        return keys
+    else:
+        return (keys,)
+
+
+@array_function_dispatch(_lexsort_dispatcher)
+def lexsort(keys, axis=-1):
+    """
+    Perform an indirect stable sort using a sequence of keys.
+
+    Given multiple sorting keys, which can be interpreted as columns in a
+    spreadsheet, lexsort returns an array of integer indices that describes
+    the sort order by multiple columns. The last key in the sequence is used
+    for the primary sort order, the second-to-last key for the secondary sort
+    order, and so on. The keys argument must be a sequence of objects that
+    can be converted to arrays of the same shape. If a 2D array is provided
+    for the keys argument, it's rows are interpreted as the sorting keys and
+    sorting is according to the last row, second last row etc.
+
+    Parameters
+    ----------
+    keys : (k, N) array or tuple containing k (N,)-shaped sequences
+        The `k` different "columns" to be sorted.  The last column (or row if
+        `keys` is a 2D array) is the primary sort key.
+    axis : int, optional
+        Axis to be indirectly sorted.  By default, sort over the last axis.
+
+    Returns
+    -------
+    indices : (N,) ndarray of ints
+        Array of indices that sort the keys along the specified axis.
+
+    See Also
+    --------
+    argsort : Indirect sort.
+    ndarray.sort : In-place sort.
+    sort : Return a sorted copy of an array.
+
+    Examples
+    --------
+    Sort names: first by surname, then by name.
+
+    >>> surnames =    ('Hertz',    'Galilei', 'Hertz')
+    >>> first_names = ('Heinrich', 'Galileo', 'Gustav')
+    >>> ind = np.lexsort((first_names, surnames))
+    >>> ind
+    array([1, 2, 0])
+
+    >>> [surnames[i] + ", " + first_names[i] for i in ind]
+    ['Galilei, Galileo', 'Hertz, Gustav', 'Hertz, Heinrich']
+
+    Sort two columns of numbers:
+
+    >>> a = [1,5,1,4,3,4,4] # First column
+    >>> b = [9,4,0,4,0,2,1] # Second column
+    >>> ind = np.lexsort((b,a)) # Sort by a, then by b
+    >>> print(ind)
+    [2 0 4 6 5 3 1]
+
+    >>> [(a[i],b[i]) for i in ind]
+    [(1, 0), (1, 9), (3, 0), (4, 1), (4, 2), (4, 4), (5, 4)]
+
+    Note that sorting is first according to the elements of ``a``.
+    Secondary sorting is according to the elements of ``b``.
+
+    A normal ``argsort`` would have yielded:
+
+    >>> [(a[i],b[i]) for i in np.argsort(a)]
+    [(1, 9), (1, 0), (3, 0), (4, 4), (4, 2), (4, 1), (5, 4)]
+
+    Structured arrays are sorted lexically by ``argsort``:
+
+    >>> x = np.array([(1,9), (5,4), (1,0), (4,4), (3,0), (4,2), (4,1)],
+    ...              dtype=np.dtype([('x', int), ('y', int)]))
+
+    >>> np.argsort(x) # or np.argsort(x, order=('x', 'y'))
+    array([2, 0, 4, 6, 5, 3, 1])
+
+    """
+    return _multiarray_umath.lexsort(keys, axis)
+
+
+def _can_cast_dispatcher(from_, to, casting=None):
+    return (from_,)
+
+
+@array_function_dispatch(_can_cast_dispatcher)
+def can_cast(from_, to, casting='safe'):
+    """
+    Returns True if cast between data types can occur according to the
+    casting rule.  If from is a scalar or array scalar, also returns
+    True if the scalar value can be cast without overflow or truncation
+    to an integer.
+
+    Parameters
+    ----------
+    from_ : dtype, dtype specifier, scalar, or array
+        Data type, scalar, or array to cast from.
+    to : dtype or dtype specifier
+        Data type to cast to.
+    casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
+        Controls what kind of data casting may occur.
+
+          * 'no' means the data types should not be cast at all.
+          * 'equiv' means only byte-order changes are allowed.
+          * 'safe' means only casts which can preserve values are allowed.
+          * 'same_kind' means only safe casts or casts within a kind,
+            like float64 to float32, are allowed.
+          * 'unsafe' means any data conversions may be done.
+
+    Returns
+    -------
+    out : bool
+        True if cast can occur according to the casting rule.
+
+    Notes
+    -----
+    Starting in NumPy 1.9, can_cast function now returns False in 'safe'
+    casting mode for integer/float dtype and string dtype if the string dtype
+    length is not long enough to store the max integer/float value converted
+    to a string. Previously can_cast in 'safe' mode returned True for
+    integer/float dtype and a string dtype of any length.
+
+    See also
+    --------
+    dtype, result_type
+
+    Examples
+    --------
+    Basic examples
+
+    >>> np.can_cast(np.int32, np.int64)
+    True
+    >>> np.can_cast(np.float64, complex)
+    True
+    >>> np.can_cast(complex, float)
+    False
+
+    >>> np.can_cast('i8', 'f8')
+    True
+    >>> np.can_cast('i8', 'f4')
+    False
+    >>> np.can_cast('i4', 'S4')
+    False
+
+    Casting scalars
+
+    >>> np.can_cast(100, 'i1')
+    True
+    >>> np.can_cast(150, 'i1')
+    False
+    >>> np.can_cast(150, 'u1')
+    True
+
+    >>> np.can_cast(3.5e100, np.float32)
+    False
+    >>> np.can_cast(1000.0, np.float32)
+    True
+
+    Array scalar checks the value, array does not
+
+    >>> np.can_cast(np.array(1000.0), np.float32)
+    True
+    >>> np.can_cast(np.array([1000.0]), np.float32)
+    False
+
+    Using the casting rules
+
+    >>> np.can_cast('i8', 'i8', 'no')
+    True
+    >>> np.can_cast('<i8', '>i8', 'no')
+    False
+
+    >>> np.can_cast('<i8', '>i8', 'equiv')
+    True
+    >>> np.can_cast('<i4', '>i8', 'equiv')
+    False
+
+    >>> np.can_cast('<i4', '>i8', 'safe')
+    True
+    >>> np.can_cast('<i8', '>i4', 'safe')
+    False
+
+    >>> np.can_cast('<i8', '>i4', 'same_kind')
+    True
+    >>> np.can_cast('<i8', '>u4', 'same_kind')
+    False
+
+    >>> np.can_cast('<i8', '>u4', 'unsafe')
+    True
+
+    """
+    return _multiarray_umath.can_cast(from_, to, casting)
+
+
+def _min_scalar_type_dispatcher(a):
+    return (a,)
+
+
+@array_function_dispatch(_min_scalar_type_dispatcher)
+def min_scalar_type(a):
+    """
+    For scalar ``a``, returns the data type with the smallest size
+    and smallest scalar kind which can hold its value.  For non-scalar
+    array ``a``, returns the vector's dtype unmodified.
+
+    Floating point values are not demoted to integers,
+    and complex values are not demoted to floats.
+
+    Parameters
+    ----------
+    a : scalar or array_like
+        The value whose minimal data type is to be found.
+
+    Returns
+    -------
+    out : dtype
+        The minimal data type.
+
+    Notes
+    -----
+    .. versionadded:: 1.6.0
+
+    See Also
+    --------
+    result_type, promote_types, dtype, can_cast
+
+    Examples
+    --------
+    >>> np.min_scalar_type(10)
+    dtype('uint8')
+
+    >>> np.min_scalar_type(-260)
+    dtype('int16')
+
+    >>> np.min_scalar_type(3.1)
+    dtype('float16')
+
+    >>> np.min_scalar_type(1e50)
+    dtype('float64')
+
+    >>> np.min_scalar_type(np.arange(4,dtype='f8'))
+    dtype('float64')
+
+    """
+    return _multiarray_umath.min_scalar_type(a)
+
+
+def _result_type_dispatcher(*arrays_and_dtypes):
+    return arrays_and_dtypes
+
+
+@array_function_dispatch(_result_type_dispatcher)
+def result_type(*arrays_and_dtypes):
+    """
+    Returns the type that results from applying the NumPy
+    type promotion rules to the arguments.
+
+    Type promotion in NumPy works similarly to the rules in languages
+    like C++, with some slight differences.  When both scalars and
+    arrays are used, the array's type takes precedence and the actual value
+    of the scalar is taken into account.
+
+    For example, calculating 3*a, where a is an array of 32-bit floats,
+    intuitively should result in a 32-bit float output.  If the 3 is a
+    32-bit integer, the NumPy rules indicate it can't convert losslessly
+    into a 32-bit float, so a 64-bit float should be the result type.
+    By examining the value of the constant, '3', we see that it fits in
+    an 8-bit integer, which can be cast losslessly into the 32-bit float.
+
+    Parameters
+    ----------
+    arrays_and_dtypes : list of arrays and dtypes
+        The operands of some operation whose result type is needed.
+
+    Returns
+    -------
+    out : dtype
+        The result type.
+
+    See also
+    --------
+    dtype, promote_types, min_scalar_type, can_cast
+
+    Notes
+    -----
+    .. versionadded:: 1.6.0
+
+    The specific algorithm used is as follows.
+
+    Categories are determined by first checking which of boolean,
+    integer (int/uint), or floating point (float/complex) the maximum
+    kind of all the arrays and the scalars are.
+
+    If there are only scalars or the maximum category of the scalars
+    is higher than the maximum category of the arrays,
+    the data types are combined with :func:`promote_types`
+    to produce the return value.
+
+    Otherwise, `min_scalar_type` is called on each array, and
+    the resulting data types are all combined with :func:`promote_types`
+    to produce the return value.
+
+    The set of int values is not a subset of the uint values for types
+    with the same number of bits, something not reflected in
+    :func:`min_scalar_type`, but handled as a special case in `result_type`.
+
+    Examples
+    --------
+    >>> np.result_type(3, np.arange(7, dtype='i1'))
+    dtype('int8')
+
+    >>> np.result_type('i4', 'c8')
+    dtype('complex128')
+
+    >>> np.result_type(3.0, -2)
+    dtype('float64')
+
+    """
+    return _multiarray_umath.result_type(*arrays_and_dtypes)
+
+
+def _dot_dispatcher(a, b, out=None):
+    return (a, b, out)
+
+
+@array_function_dispatch(_dot_dispatcher)
+def dot(a, b, out=None):
+    """
+    Dot product of two arrays. Specifically,
+
+    - If both `a` and `b` are 1-D arrays, it is inner product of vectors
+      (without complex conjugation).
+
+    - If both `a` and `b` are 2-D arrays, it is matrix multiplication,
+      but using :func:`matmul` or ``a @ b`` is preferred.
+
+    - If either `a` or `b` is 0-D (scalar), it is equivalent to :func:`multiply`
+      and using ``numpy.multiply(a, b)`` or ``a * b`` is preferred.
+
+    - If `a` is an N-D array and `b` is a 1-D array, it is a sum product over
+      the last axis of `a` and `b`.
+
+    - If `a` is an N-D array and `b` is an M-D array (where ``M>=2``), it is a
+      sum product over the last axis of `a` and the second-to-last axis of `b`::
+
+        dot(a, b)[i,j,k,m] = sum(a[i,j,:] * b[k,:,m])
+
+    Parameters
+    ----------
+    a : array_like
+        First argument.
+    b : array_like
+        Second argument.
+    out : ndarray, optional
+        Output argument. This must have the exact kind that would be returned
+        if it was not used. In particular, it must have the right type, must be
+        C-contiguous, and its dtype must be the dtype that would be returned
+        for `dot(a,b)`. This is a performance feature. Therefore, if these
+        conditions are not met, an exception is raised, instead of attempting
+        to be flexible.
+
+    Returns
+    -------
+    output : ndarray
+        Returns the dot product of `a` and `b`.  If `a` and `b` are both
+        scalars or both 1-D arrays then a scalar is returned; otherwise
+        an array is returned.
+        If `out` is given, then it is returned.
+
+    Raises
+    ------
+    ValueError
+        If the last dimension of `a` is not the same size as
+        the second-to-last dimension of `b`.
+
+    See Also
+    --------
+    vdot : Complex-conjugating dot product.
+    tensordot : Sum products over arbitrary axes.
+    einsum : Einstein summation convention.
+    matmul : '@' operator as method with out parameter.
+
+    Examples
+    --------
+    >>> np.dot(3, 4)
+    12
+
+    Neither argument is complex-conjugated:
+
+    >>> np.dot([2j, 3j], [2j, 3j])
+    (-13+0j)
+
+    For 2-D arrays it is the matrix product:
+
+    >>> a = [[1, 0], [0, 1]]
+    >>> b = [[4, 1], [2, 2]]
+    >>> np.dot(a, b)
+    array([[4, 1],
+           [2, 2]])
+
+    >>> a = np.arange(3*4*5*6).reshape((3,4,5,6))
+    >>> b = np.arange(3*4*5*6)[::-1].reshape((5,4,6,3))
+    >>> np.dot(a, b)[2,3,2,1,2,2]
+    499128
+    >>> sum(a[2,3,2,:] * b[1,2,:,2])
+    499128
+
+    """
+    return _multiarray_umath.dot(a, b, out)
+
+
+def _vdot_dispatcher(a, b):
+    return (a, b)
+
+
+@array_function_dispatch(_vdot_dispatcher)
+def vdot(a, b):
+    """
+    Return the dot product of two vectors.
+
+    The vdot(`a`, `b`) function handles complex numbers differently than
+    dot(`a`, `b`).  If the first argument is complex the complex conjugate
+    of the first argument is used for the calculation of the dot product.
+
+    Note that `vdot` handles multidimensional arrays differently than `dot`:
+    it does *not* perform a matrix product, but flattens input arguments
+    to 1-D vectors first. Consequently, it should only be used for vectors.
+
+    Parameters
+    ----------
+    a : array_like
+        If `a` is complex the complex conjugate is taken before calculation
+        of the dot product.
+    b : array_like
+        Second argument to the dot product.
+
+    Returns
+    -------
+    output : ndarray
+        Dot product of `a` and `b`.  Can be an int, float, or
+        complex depending on the types of `a` and `b`.
+
+    See Also
+    --------
+    dot : Return the dot product without using the complex conjugate of the
+          first argument.
+
+    Examples
+    --------
+    >>> a = np.array([1+2j,3+4j])
+    >>> b = np.array([5+6j,7+8j])
+    >>> np.vdot(a, b)
+    (70-8j)
+    >>> np.vdot(b, a)
+    (70+8j)
+
+    Note that higher-dimensional arrays are flattened!
+
+    >>> a = np.array([[1, 4], [5, 6]])
+    >>> b = np.array([[4, 1], [2, 2]])
+    >>> np.vdot(a, b)
+    30
+    >>> np.vdot(b, a)
+    30
+    >>> 1*4 + 4*1 + 5*2 + 6*2
+    30
+
+    """
+    return _multiarray_umath.vdot(a, b)
+
+
+def _is_busday_dispatcher(
+        dates, weekmask=None, holidays=None, busdaycal=None, out=None):
+    return (dates, weekmask, holidays, out)
+
+
+@array_function_dispatch(_is_busday_dispatcher)
+def is_busday(dates, weekmask=None, holidays=None, busdaycal=None,
+              out=None):
+    """
+    is_busday(dates, weekmask='1111100', holidays=None, busdaycal=None, out=None)
+
+    Calculates which of the given dates are valid days, and which are not.
+
+    .. versionadded:: 1.7.0
+
+    Parameters
+    ----------
+    dates : array_like of datetime64[D]
+        The array of dates to process.
+    weekmask : str or array_like of bool, optional
+        A seven-element array indicating which of Monday through Sunday are
+        valid days. May be specified as a length-seven list or array, like
+        [1,1,1,1,1,0,0]; a length-seven string, like '1111100'; or a string
+        like "Mon Tue Wed Thu Fri", made up of 3-character abbreviations for
+        weekdays, optionally separated by white space. Valid abbreviations
+        are: Mon Tue Wed Thu Fri Sat Sun
+    holidays : array_like of datetime64[D], optional
+        An array of dates to consider as invalid dates.  They may be
+        specified in any order, and NaT (not-a-time) dates are ignored.
+        This list is saved in a normalized form that is suited for
+        fast calculations of valid days.
+    busdaycal : busdaycalendar, optional
+        A `busdaycalendar` object which specifies the valid days. If this
+        parameter is provided, neither weekmask nor holidays may be
+        provided.
+    out : array of bool, optional
+        If provided, this array is filled with the result.
+
+    Returns
+    -------
+    out : array of bool
+        An array with the same shape as ``dates``, containing True for
+        each valid day, and False for each invalid day.
+
+    See Also
+    --------
+    busdaycalendar: An object that specifies a custom set of valid days.
+    busday_offset : Applies an offset counted in valid days.
+    busday_count : Counts how many valid days are in a half-open date range.
+
+    Examples
+    --------
+    >>> # The weekdays are Friday, Saturday, and Monday
+    ... np.is_busday(['2011-07-01', '2011-07-02', '2011-07-18'],
+    ...                 holidays=['2011-07-01', '2011-07-04', '2011-07-17'])
+    array([False, False,  True], dtype='bool')
+    """
+    kwargs = {}
+    if weekmask is not None:
+        kwargs['weekmask'] = weekmask
+    if holidays is not None:
+        kwargs['holidays'] = holidays
+    if busdaycal is not None:
+        kwargs['busdaycal'] = busdaycal
+    if out is not None:
+        kwargs['out'] = out
+    return _multiarray_umath.is_busday(dates, **kwargs)
+
+
+def _busday_offset_dispatcher(dates, offsets, roll=None, weekmask=None,
+                              holidays=None, busdaycal=None, out=None):
+    return (dates, offsets, weekmask, holidays, out)
+
+
+@array_function_dispatch(_busday_offset_dispatcher)
+def busday_offset(dates, offsets, roll='raise', weekmask=None,
+                  holidays=None, busdaycal=None, out=None):
+    """
+    busday_offset(dates, offsets, roll='raise', weekmask='1111100', holidays=None, busdaycal=None, out=None)
+
+    First adjusts the date to fall on a valid day according to
+    the ``roll`` rule, then applies offsets to the given dates
+    counted in valid days.
+
+    .. versionadded:: 1.7.0
+
+    Parameters
+    ----------
+    dates : array_like of datetime64[D]
+        The array of dates to process.
+    offsets : array_like of int
+        The array of offsets, which is broadcast with ``dates``.
+    roll : {'raise', 'nat', 'forward', 'following', 'backward', 'preceding', 'modifiedfollowing', 'modifiedpreceding'}, optional
+        How to treat dates that do not fall on a valid day. The default
+        is 'raise'.
+
+          * 'raise' means to raise an exception for an invalid day.
+          * 'nat' means to return a NaT (not-a-time) for an invalid day.
+          * 'forward' and 'following' mean to take the first valid day
+            later in time.
+          * 'backward' and 'preceding' mean to take the first valid day
+            earlier in time.
+          * 'modifiedfollowing' means to take the first valid day
+            later in time unless it is across a Month boundary, in which
+            case to take the first valid day earlier in time.
+          * 'modifiedpreceding' means to take the first valid day
+            earlier in time unless it is across a Month boundary, in which
+            case to take the first valid day later in time.
+    weekmask : str or array_like of bool, optional
+        A seven-element array indicating which of Monday through Sunday are
+        valid days. May be specified as a length-seven list or array, like
+        [1,1,1,1,1,0,0]; a length-seven string, like '1111100'; or a string
+        like "Mon Tue Wed Thu Fri", made up of 3-character abbreviations for
+        weekdays, optionally separated by white space. Valid abbreviations
+        are: Mon Tue Wed Thu Fri Sat Sun
+    holidays : array_like of datetime64[D], optional
+        An array of dates to consider as invalid dates.  They may be
+        specified in any order, and NaT (not-a-time) dates are ignored.
+        This list is saved in a normalized form that is suited for
+        fast calculations of valid days.
+    busdaycal : busdaycalendar, optional
+        A `busdaycalendar` object which specifies the valid days. If this
+        parameter is provided, neither weekmask nor holidays may be
+        provided.
+    out : array of datetime64[D], optional
+        If provided, this array is filled with the result.
+
+    Returns
+    -------
+    out : array of datetime64[D]
+        An array with a shape from broadcasting ``dates`` and ``offsets``
+        together, containing the dates with offsets applied.
+
+    See Also
+    --------
+    busdaycalendar: An object that specifies a custom set of valid days.
+    is_busday : Returns a boolean array indicating valid days.
+    busday_count : Counts how many valid days are in a half-open date range.
+
+    Examples
+    --------
+    >>> # First business day in October 2011 (not accounting for holidays)
+    ... np.busday_offset('2011-10', 0, roll='forward')
+    numpy.datetime64('2011-10-03','D')
+    >>> # Last business day in February 2012 (not accounting for holidays)
+    ... np.busday_offset('2012-03', -1, roll='forward')
+    numpy.datetime64('2012-02-29','D')
+    >>> # Third Wednesday in January 2011
+    ... np.busday_offset('2011-01', 2, roll='forward', weekmask='Wed')
+    numpy.datetime64('2011-01-19','D')
+    >>> # 2012 Mother's Day in Canada and the U.S.
+    ... np.busday_offset('2012-05', 1, roll='forward', weekmask='Sun')
+    numpy.datetime64('2012-05-13','D')
+
+    >>> # First business day on or after a date
+    ... np.busday_offset('2011-03-20', 0, roll='forward')
+    numpy.datetime64('2011-03-21','D')
+    >>> np.busday_offset('2011-03-22', 0, roll='forward')
+    numpy.datetime64('2011-03-22','D')
+    >>> # First business day after a date
+    ... np.busday_offset('2011-03-20', 1, roll='backward')
+    numpy.datetime64('2011-03-21','D')
+    >>> np.busday_offset('2011-03-22', 1, roll='backward')
+    numpy.datetime64('2011-03-23','D')
+    """
+    kwargs = {}
+    if weekmask is not None:
+        kwargs['weekmask'] = weekmask
+    if holidays is not None:
+        kwargs['holidays'] = holidays
+    if busdaycal is not None:
+        kwargs['busdaycal'] = busdaycal
+    if out is not None:
+        kwargs['out'] = out
+    return _multiarray_umath.busday_offset(dates, offsets, roll, **kwargs)
+
+
+def _busday_count_dispatcher(begindates, enddates, weekmask=None,
+                             holidays=None, busdaycal=None, out=None):
+    return (begindates, enddates, weekmask, holidays, out)
+
+
+@array_function_dispatch(_busday_count_dispatcher)
+def busday_count(begindates, enddates, weekmask=None, holidays=None,
+                 busdaycal=None, out=None):
+    """
+    busday_count(begindates, enddates, weekmask='1111100', holidays=[], busdaycal=None, out=None)
+
+    Counts the number of valid days between `begindates` and
+    `enddates`, not including the day of `enddates`.
+
+    If ``enddates`` specifies a date value that is earlier than the
+    corresponding ``begindates`` date value, the count will be negative.
+
+    .. versionadded:: 1.7.0
+
+    Parameters
+    ----------
+    begindates : array_like of datetime64[D]
+        The array of the first dates for counting.
+    enddates : array_like of datetime64[D]
+        The array of the end dates for counting, which are excluded
+        from the count themselves.
+    weekmask : str or array_like of bool, optional
+        A seven-element array indicating which of Monday through Sunday are
+        valid days. May be specified as a length-seven list or array, like
+        [1,1,1,1,1,0,0]; a length-seven string, like '1111100'; or a string
+        like "Mon Tue Wed Thu Fri", made up of 3-character abbreviations for
+        weekdays, optionally separated by white space. Valid abbreviations
+        are: Mon Tue Wed Thu Fri Sat Sun
+    holidays : array_like of datetime64[D], optional
+        An array of dates to consider as invalid dates.  They may be
+        specified in any order, and NaT (not-a-time) dates are ignored.
+        This list is saved in a normalized form that is suited for
+        fast calculations of valid days.
+    busdaycal : busdaycalendar, optional
+        A `busdaycalendar` object which specifies the valid days. If this
+        parameter is provided, neither weekmask nor holidays may be
+        provided.
+    out : array of int, optional
+        If provided, this array is filled with the result.
+
+    Returns
+    -------
+    out : array of int
+        An array with a shape from broadcasting ``begindates`` and ``enddates``
+        together, containing the number of valid days between
+        the begin and end dates.
+
+    See Also
+    --------
+    busdaycalendar: An object that specifies a custom set of valid days.
+    is_busday : Returns a boolean array indicating valid days.
+    busday_offset : Applies an offset counted in valid days.
+
+    Examples
+    --------
+    >>> # Number of weekdays in January 2011
+    ... np.busday_count('2011-01', '2011-02')
+    21
+    >>> # Number of weekdays in 2011
+    ...  np.busday_count('2011', '2012')
+    260
+    >>> # Number of Saturdays in 2011
+    ... np.busday_count('2011', '2012', weekmask='Sat')
+    53
+    """
+    kwargs = {}
+    if weekmask is not None:
+        kwargs['weekmask'] = weekmask
+    if holidays is not None:
+        kwargs['holidays'] = holidays
+    if busdaycal is not None:
+        kwargs['busdaycal'] = busdaycal
+    if out is not None:
+        kwargs['out'] = out
+    return _multiarray_umath.busday_count(begindates, enddates, **kwargs)
+
+
+def _datetime_as_string_dispatcher(
+        arr, unit=None, timezone=None, casting=None):
+    return (arr,)
+
+
+@array_function_dispatch(_datetime_as_string_dispatcher)
+def datetime_as_string(arr, unit=None, timezone='naive', casting='same_kind'):
+    """
+    Convert an array of datetimes into an array of strings.
+
+    Parameters
+    ----------
+    arr : array_like of datetime64
+        The array of UTC timestamps to format.
+    unit : str
+        One of None, 'auto', or a :ref:`datetime unit <arrays.dtypes.dateunits>`.
+    timezone : {'naive', 'UTC', 'local'} or tzinfo
+        Timezone information to use when displaying the datetime. If 'UTC', end
+        with a Z to indicate UTC time. If 'local', convert to the local timezone
+        first, and suffix with a +-#### timezone offset. If a tzinfo object,
+        then do as with 'local', but use the specified timezone.
+    casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}
+        Casting to allow when changing between datetime units.
+
+    Returns
+    -------
+    str_arr : ndarray
+        An array of strings the same shape as `arr`.
+
+    Examples
+    --------
+    >>> d = np.arange('2002-10-27T04:30', 4*60, 60, dtype='M8[m]')
+    >>> d
+    array(['2002-10-27T04:30', '2002-10-27T05:30', '2002-10-27T06:30',
+           '2002-10-27T07:30'], dtype='datetime64[m]')
+
+    Setting the timezone to UTC shows the same information, but with a Z suffix
+
+    >>> np.datetime_as_string(d, timezone='UTC')
+    array(['2002-10-27T04:30Z', '2002-10-27T05:30Z', '2002-10-27T06:30Z',
+           '2002-10-27T07:30Z'], dtype='<U35')
+
+    Note that we picked datetimes that cross a DST boundary. Passing in a
+    ``pytz`` timezone object will print the appropriate offset
+
+    >>> np.datetime_as_string(d, timezone=pytz.timezone('US/Eastern'))
+    array(['2002-10-27T00:30-0400', '2002-10-27T01:30-0400',
+           '2002-10-27T01:30-0500', '2002-10-27T02:30-0500'], dtype='<U39')
+
+    Passing in a unit will change the precision
+
+    >>> np.datetime_as_string(d, unit='h')
+    array(['2002-10-27T04', '2002-10-27T05', '2002-10-27T06', '2002-10-27T07'],
+          dtype='<U32')
+    >>> np.datetime_as_string(d, unit='s')
+    array(['2002-10-27T04:30:00', '2002-10-27T05:30:00', '2002-10-27T06:30:00',
+           '2002-10-27T07:30:00'], dtype='<U38')
+
+    'casting' can be used to specify whether precision can be changed
+
+    >>> np.datetime_as_string(d, unit='h', casting='safe')
+    TypeError: Cannot create a datetime string as units 'h' from a NumPy
+    datetime with units 'm' according to the rule 'safe'
+    """
+    return _multiarray_umath.datetime_as_string(arr, unit, timezone, casting)
diff --git a/numpy/core/numeric.py b/numpy/core/numeric.py
index 1b4818b76..6e4e585c3 100644
--- a/numpy/core/numeric.py
+++ b/numpy/core/numeric.py
@@ -18,7 +18,7 @@ from .multiarray import (
     _fastCopyAndTranspose as fastCopyAndTranspose, ALLOW_THREADS,
     BUFSIZE, CLIP, MAXDIMS, MAY_SHARE_BOUNDS, MAY_SHARE_EXACT, RAISE,
     WRAP, arange, array, broadcast, can_cast, compare_chararrays,
-    concatenate, copyto, count_nonzero, dot, dtype, empty,
+    concatenate, copyto, dot, dtype, empty,
     empty_like, flatiter, frombuffer, fromfile, fromiter, fromstring,
     inner, int_asbuffer, lexsort, matmul, may_share_memory,
     min_scalar_type, ndarray, nditer, nested_iters, promote_types,
@@ -28,6 +28,7 @@ if sys.version_info[0] < 3:
     from .multiarray import newbuffer, getbuffer
 
 from . import umath
+from .overrides import array_function_dispatch
 from .umath import (multiply, invert, sin, UFUNC_BUFSIZE_DEFAULT,
                     ERR_IGNORE, ERR_WARN, ERR_RAISE, ERR_CALL, ERR_PRINT,
                     ERR_LOG, ERR_DEFAULT, PINF, NAN)
@@ -40,7 +41,13 @@ ufunc = type(sin)
 newaxis = None
 
 if sys.version_info[0] >= 3:
-    import pickle
+    if sys.version_info[1] in (6, 7):
+        try:
+            import pickle5 as pickle
+        except ImportError:
+            import pickle
+    else:
+        import pickle
     basestring = str
     import builtins
 else:
@@ -91,6 +98,11 @@ class ComplexWarning(RuntimeWarning):
     pass
 
 
+def _zeros_like_dispatcher(a, dtype=None, order=None, subok=None):
+    return (a,)
+
+
+@array_function_dispatch(_zeros_like_dispatcher)
 def zeros_like(a, dtype=None, order='K', subok=True):
     """
     Return an array of zeros with the same shape and type as a given array.
@@ -205,6 +217,11 @@ def ones(shape, dtype=None, order='C'):
     return a
 
 
+def _ones_like_dispatcher(a, dtype=None, order=None, subok=None):
+    return (a,)
+
+
+@array_function_dispatch(_ones_like_dispatcher)
 def ones_like(a, dtype=None, order='K', subok=True):
     """
     Return an array of ones with the same shape and type as a given array.
@@ -311,6 +328,11 @@ def full(shape, fill_value, dtype=None, order='C'):
     return a
 
 
+def _full_like_dispatcher(a, fill_value, dtype=None, order=None, subok=None):
+    return (a,)
+
+
+@array_function_dispatch(_full_like_dispatcher)
 def full_like(a, fill_value, dtype=None, order='K', subok=True):
     """
     Return a full array with the same shape and type as a given array.
@@ -368,6 +390,11 @@ def full_like(a, fill_value, dtype=None, order='K', subok=True):
     return res
 
 
+def _count_nonzero_dispatcher(a, axis=None):
+    return (a,)
+
+
+@array_function_dispatch(_count_nonzero_dispatcher)
 def count_nonzero(a, axis=None):
     """
     Counts the number of non-zero values in the array ``a``.
@@ -787,6 +814,11 @@ def isfortran(a):
     return a.flags.fnc
 
 
+def _argwhere_dispatcher(a):
+    return (a,)
+
+
+@array_function_dispatch(_argwhere_dispatcher)
 def argwhere(a):
     """
     Find the indices of array elements that are non-zero, grouped by element.
@@ -828,6 +860,11 @@ def argwhere(a):
     return transpose(nonzero(a))
 
 
+def _flatnonzero_dispatcher(a):
+    return (a,)
+
+
+@array_function_dispatch(_flatnonzero_dispatcher)
 def flatnonzero(a):
     """
     Return indices that are non-zero in the flattened version of a.
@@ -879,6 +916,11 @@ def _mode_from_name(mode):
     return mode
 
 
+def _correlate_dispatcher(a, v, mode=None):
+    return (a, v)
+
+
+@array_function_dispatch(_correlate_dispatcher)
 def correlate(a, v, mode='valid'):
     """
     Cross-correlation of two 1-dimensional sequences.
@@ -947,6 +989,11 @@ def correlate(a, v, mode='valid'):
     return multiarray.correlate2(a, v, mode)
 
 
+def _convolve_dispatcher(a, v, mode=None):
+    return (a, v)
+
+
+@array_function_dispatch(_convolve_dispatcher)
 def convolve(a, v, mode='full'):
     """
     Returns the discrete, linear convolution of two one-dimensional sequences.
@@ -1046,6 +1093,11 @@ def convolve(a, v, mode='full'):
     return multiarray.correlate(a, v[::-1], mode)
 
 
+def _outer_dispatcher(a, b, out=None):
+    return (a, b, out)
+
+
+@array_function_dispatch(_outer_dispatcher)
 def outer(a, b, out=None):
     """
     Compute the outer product of two vectors.
@@ -1130,6 +1182,11 @@ def outer(a, b, out=None):
     return multiply(a.ravel()[:, newaxis], b.ravel()[newaxis, :], out)
 
 
+def _tensordot_dispatcher(a, b, axes=None):
+    return (a, b)
+
+
+@array_function_dispatch(_tensordot_dispatcher)
 def tensordot(a, b, axes=2):
     """
     Compute tensor dot product along specified axes for arrays >= 1-D.
@@ -1316,6 +1373,11 @@ def tensordot(a, b, axes=2):
     return res.reshape(olda + oldb)
 
 
+def _roll_dispatcher(a, shift, axis=None):
+    return (a,)
+
+
+@array_function_dispatch(_roll_dispatcher)
 def roll(a, shift, axis=None):
     """
     Roll array elements along a given axis.
@@ -1405,6 +1467,11 @@ def roll(a, shift, axis=None):
         return result
 
 
+def _rollaxis_dispatcher(a, axis, start=None):
+    return (a,)
+
+
+@array_function_dispatch(_rollaxis_dispatcher)
 def rollaxis(a, axis, start=0):
     """
     Roll the specified axis backwards, until it lies in a given position.
@@ -1525,6 +1592,11 @@ def normalize_axis_tuple(axis, ndim, argname=None, allow_duplicate=False):
     return axis
 
 
+def _moveaxis_dispatcher(a, source, destination):
+    return (a,)
+
+
+@array_function_dispatch(_moveaxis_dispatcher)
 def moveaxis(a, source, destination):
     """
     Move axes of an array to new positions.
@@ -1601,6 +1673,11 @@ def _move_axis_to_0(a, axis):
     return moveaxis(a, axis, 0)
 
 
+def _cross_dispatcher(a, b, axisa=None, axisb=None, axisc=None, axis=None):
+    return (a, b)
+
+
+@array_function_dispatch(_cross_dispatcher)
 def cross(a, b, axisa=-1, axisb=-1, axisc=-1, axis=None):
     """
     Return the cross product of two (arrays of) vectors.
@@ -1928,6 +2005,10 @@ def fromfunction(function, shape, **kwargs):
     return function(*args, **kwargs)
 
 
+def _frombuffer(buf, dtype, shape, order):
+    return frombuffer(buf, dtype=dtype).reshape(shape, order=order)
+
+
 def isscalar(num):
     """
     Returns True if the type of `num` is a scalar type.
@@ -1942,10 +2023,46 @@ def isscalar(num):
     val : bool
         True if `num` is a scalar type, False if it is not.
 
+    See Also
+    --------
+    ndim : Get the number of dimensions of an array
+
+    Notes
+    -----
+    In almost all cases ``np.ndim(x) == 0`` should be used instead of this
+    function, as that will also return true for 0d arrays. This is how
+    numpy overloads functions in the style of the ``dx`` arguments to `gradient`
+    and the ``bins`` argument to `histogram`. Some key differences:
+
+    +--------------------------------------+---------------+-------------------+
+    | x                                    |``isscalar(x)``|``np.ndim(x) == 0``|
+    +======================================+===============+===================+
+    | PEP 3141 numeric objects (including  | ``True``      | ``True``          |
+    | builtins)                            |               |                   |
+    +--------------------------------------+---------------+-------------------+
+    | builtin string and buffer objects    | ``True``      | ``True``          |
+    +--------------------------------------+---------------+-------------------+
+    | other builtin objects, like          | ``False``     | ``True``          |
+    | `pathlib.Path`, `Exception`,         |               |                   |
+    | the result of `re.compile`           |               |                   |
+    +--------------------------------------+---------------+-------------------+
+    | third-party objects like             | ``False``     | ``True``          |
+    | `matplotlib.figure.Figure`           |               |                   |
+    +--------------------------------------+---------------+-------------------+
+    | zero-dimensional numpy arrays        | ``False``     | ``True``          |
+    +--------------------------------------+---------------+-------------------+
+    | other numpy arrays                   | ``False``     | ``False``         |
+    +--------------------------------------+---------------+-------------------+
+    | `list`, `tuple`, and other sequence  | ``False``     | ``False``         |
+    | objects                              |               |                   |
+    +--------------------------------------+---------------+-------------------+
+
     Examples
     --------
     >>> np.isscalar(3.1)
     True
+    >>> np.isscalar(np.array(3.1))
+    False
     >>> np.isscalar([3.1])
     False
     >>> np.isscalar(False)
@@ -2204,6 +2321,11 @@ def identity(n, dtype=None):
     return eye(n, dtype=dtype)
 
 
+def _allclose_dispatcher(a, b, rtol=None, atol=None, equal_nan=None):
+    return (a, b)
+
+
+@array_function_dispatch(_allclose_dispatcher)
 def allclose(a, b, rtol=1.e-5, atol=1.e-8, equal_nan=False):
     """
     Returns True if two arrays are element-wise equal within a tolerance.
@@ -2275,6 +2397,11 @@ def allclose(a, b, rtol=1.e-5, atol=1.e-8, equal_nan=False):
     return bool(res)
 
 
+def _isclose_dispatcher(a, b, rtol=None, atol=None, equal_nan=None):
+    return (a, b)
+
+
+@array_function_dispatch(_isclose_dispatcher)
 def isclose(a, b, rtol=1.e-5, atol=1.e-8, equal_nan=False):
     """
     Returns a boolean array where two arrays are element-wise equal within a
@@ -2390,6 +2517,11 @@ def isclose(a, b, rtol=1.e-5, atol=1.e-8, equal_nan=False):
         return cond[()]  # Flatten 0d arrays to scalars
 
 
+def _array_equal_dispatcher(a1, a2):
+    return (a1, a2)
+
+
+@array_function_dispatch(_array_equal_dispatcher)
 def array_equal(a1, a2):
     """
     True if two arrays have the same shape and elements, False otherwise.
@@ -2432,6 +2564,11 @@ def array_equal(a1, a2):
     return bool(asarray(a1 == a2).all())
 
 
+def _array_equiv_dispatcher(a1, a2):
+    return (a1, a2)
+
+
+@array_function_dispatch(_array_equiv_dispatcher)
 def array_equiv(a1, a2):
     """
     Returns True if input arrays are shape consistent and all elements equal.
diff --git a/numpy/core/numerictypes.py b/numpy/core/numerictypes.py
index 817af4c7b..2fb841f7c 100644
--- a/numpy/core/numerictypes.py
+++ b/numpy/core/numerictypes.py
@@ -92,7 +92,6 @@ from numpy.core.multiarray import (
         datetime_as_string, busday_offset, busday_count, is_busday,
         busdaycalendar
         )
-from numpy._globals import VisibleDeprecationWarning
 
 # we add more at the bottom
 __all__ = ['sctypeDict', 'sctypeNA', 'typeDict', 'typeNA', 'sctypes',
@@ -102,6 +101,23 @@ __all__ = ['sctypeDict', 'sctypeNA', 'typeDict', 'typeNA', 'sctypes',
            'busday_offset', 'busday_count', 'is_busday', 'busdaycalendar',
            ]
 
+# we don't need all these imports, but we need to keep them for compatibility
+# for users using np.core.numerictypes.UPPER_TABLE
+from ._string_helpers import (
+    english_lower, english_upper, english_capitalize, LOWER_TABLE, UPPER_TABLE
+)
+
+from ._type_aliases import (
+    sctypeDict,
+    sctypeNA,
+    allTypes,
+    bitname,
+    sctypes,
+    _concrete_types,
+    _concrete_typeinfo,
+    _bits_of,
+)
+from ._dtype import _kind_name
 
 # we don't export these for import *, but we do want them accessible
 # as numerictypes.bool, etc.
@@ -112,387 +128,9 @@ else:
     from __builtin__ import bool, int, float, complex, object, unicode, str
 
 
-# String-handling utilities to avoid locale-dependence.
-
-# "import string" is costly to import!
-# Construct the translation tables directly
-#   "A" = chr(65), "a" = chr(97)
-_all_chars = [chr(_m) for _m in range(256)]
-_ascii_upper = _all_chars[65:65+26]
-_ascii_lower = _all_chars[97:97+26]
-LOWER_TABLE = "".join(_all_chars[:65] + _ascii_lower + _all_chars[65+26:])
-UPPER_TABLE = "".join(_all_chars[:97] + _ascii_upper + _all_chars[97+26:])
-
-
-def english_lower(s):
-    """ Apply English case rules to convert ASCII strings to all lower case.
-
-    This is an internal utility function to replace calls to str.lower() such
-    that we can avoid changing behavior with changing locales. In particular,
-    Turkish has distinct dotted and dotless variants of the Latin letter "I" in
-    both lowercase and uppercase. Thus, "I".lower() != "i" in a "tr" locale.
-
-    Parameters
-    ----------
-    s : str
-
-    Returns
-    -------
-    lowered : str
-
-    Examples
-    --------
-    >>> from numpy.core.numerictypes import english_lower
-    >>> english_lower('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_')
-    'abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz0123456789_'
-    >>> english_lower('')
-    ''
-    """
-    lowered = s.translate(LOWER_TABLE)
-    return lowered
-
-def english_upper(s):
-    """ Apply English case rules to convert ASCII strings to all upper case.
-
-    This is an internal utility function to replace calls to str.upper() such
-    that we can avoid changing behavior with changing locales. In particular,
-    Turkish has distinct dotted and dotless variants of the Latin letter "I" in
-    both lowercase and uppercase. Thus, "i".upper() != "I" in a "tr" locale.
-
-    Parameters
-    ----------
-    s : str
-
-    Returns
-    -------
-    uppered : str
-
-    Examples
-    --------
-    >>> from numpy.core.numerictypes import english_upper
-    >>> english_upper('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_')
-    'ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_'
-    >>> english_upper('')
-    ''
-    """
-    uppered = s.translate(UPPER_TABLE)
-    return uppered
-
-def english_capitalize(s):
-    """ Apply English case rules to convert the first character of an ASCII
-    string to upper case.
-
-    This is an internal utility function to replace calls to str.capitalize()
-    such that we can avoid changing behavior with changing locales.
-
-    Parameters
-    ----------
-    s : str
-
-    Returns
-    -------
-    capitalized : str
-
-    Examples
-    --------
-    >>> from numpy.core.numerictypes import english_capitalize
-    >>> english_capitalize('int8')
-    'Int8'
-    >>> english_capitalize('Int8')
-    'Int8'
-    >>> english_capitalize('')
-    ''
-    """
-    if s:
-        return english_upper(s[0]) + s[1:]
-    else:
-        return s
-
-
-sctypeDict = {}      # Contains all leaf-node scalar types with aliases
-class TypeNADict(dict):
-    def __getitem__(self, key):
-        # 2018-06-24, 1.16
-        warnings.warn('sctypeNA and typeNA will be removed in v1.18 '
-                      'of numpy', VisibleDeprecationWarning, stacklevel=2)
-        return dict.__getitem__(self, key)
-    def get(self, key, default=None):
-        # 2018-06-24, 1.16
-        warnings.warn('sctypeNA and typeNA will be removed in v1.18 '
-                      'of numpy', VisibleDeprecationWarning, stacklevel=2)
-        return dict.get(self, key, default)
-
-sctypeNA = TypeNADict()  # Contails all leaf-node types -> numarray type equivalences
-allTypes = {}            # Collect the types we will add to the module here
-
-
-# separate the actual type info from the abtract base classes
-_abstract_types = {}
-_concrete_typeinfo = {}
-for k, v in typeinfo.items():
-    # make all the keys lowercase too
-    k = english_lower(k)
-    if isinstance(v, type):
-        _abstract_types[k] = v
-    else:
-        _concrete_typeinfo[k] = v
-
-
-def _evalname(name):
-    k = 0
-    for ch in name:
-        if ch in '0123456789':
-            break
-        k += 1
-    try:
-        bits = int(name[k:])
-    except ValueError:
-        bits = 0
-    base = name[:k]
-    return base, bits
-
-def bitname(obj):
-    """Return a bit-width name for a given type object"""
-    name = obj.__name__
-    base = ''
-    char = ''
-    try:
-        if name[-1] == '_':
-            newname = name[:-1]
-        else:
-            newname = name
-        info = _concrete_typeinfo[english_lower(newname)]
-        assert(info.type == obj)  # sanity check
-        bits = info.bits
-
-    except KeyError:     # bit-width name
-        base, bits = _evalname(name)
-        char = base[0]
-
-    if name == 'bool_':
-        char = 'b'
-        base = 'bool'
-    elif name == 'void':
-        char = 'V'
-        base = 'void'
-    elif name == 'object_':
-        char = 'O'
-        base = 'object'
-        bits = 0
-    elif name == 'datetime64':
-        char = 'M'
-    elif name == 'timedelta64':
-        char = 'm'
-
-    if sys.version_info[0] >= 3:
-        if name == 'bytes_':
-            char = 'S'
-            base = 'bytes'
-        elif name == 'str_':
-            char = 'U'
-            base = 'str'
-    else:
-        if name == 'string_':
-            char = 'S'
-            base = 'string'
-        elif name == 'unicode_':
-            char = 'U'
-            base = 'unicode'
-
-    bytes = bits // 8
-
-    if char != '' and bytes != 0:
-        char = "%s%d" % (char, bytes)
-
-    return base, bits, char
-
-
-def _add_types():
-    for name, info in _concrete_typeinfo.items():
-        # define C-name and insert typenum and typechar references also
-        allTypes[name] = info.type
-        sctypeDict[name] = info.type
-        sctypeDict[info.char] = info.type
-        sctypeDict[info.num] = info.type
-
-    for name, cls in _abstract_types.items():
-        allTypes[name] = cls
-_add_types()
-
-# This is the priority order used to assign the bit-sized NPY_INTxx names, which
-# must match the order in npy_common.h in order for NPY_INTxx and np.intxx to be
-# consistent.
-# If two C types have the same size, then the earliest one in this list is used
-# as the sized name.
-_int_ctypes = ['long', 'longlong', 'int', 'short', 'byte']
-_uint_ctypes = list('u' + t for t in _int_ctypes)
-
-def _add_aliases():
-    for name, info in _concrete_typeinfo.items():
-        # these are handled by _add_integer_aliases
-        if name in _int_ctypes or name in _uint_ctypes:
-            continue
-
-        # insert bit-width version for this class (if relevant)
-        base, bit, char = bitname(info.type)
-
-        assert base != ''
-        myname = "%s%d" % (base, bit)
-
-        # ensure that (c)longdouble does not overwrite the aliases assigned to
-        # (c)double
-        if name in ('longdouble', 'clongdouble') and myname in allTypes:
-            continue
-
-        base_capitalize = english_capitalize(base)
-        if base == 'complex':
-            na_name = '%s%d' % (base_capitalize, bit//2)
-        elif base == 'bool':
-            na_name = base_capitalize
-        else:
-            na_name = "%s%d" % (base_capitalize, bit)
-
-        allTypes[myname] = info.type
-
-        # add mapping for both the bit name and the numarray name
-        sctypeDict[myname] = info.type
-        sctypeDict[na_name] = info.type
-
-        # add forward, reverse, and string mapping to numarray
-        sctypeNA[na_name] = info.type
-        sctypeNA[info.type] = na_name
-        sctypeNA[info.char] = na_name
-
-        assert char != ''
-        sctypeDict[char] = info.type
-        sctypeNA[char] = na_name
-_add_aliases()
-
-def _add_integer_aliases():
-    seen_bits = set()
-    for i_ctype, u_ctype in zip(_int_ctypes, _uint_ctypes):
-        i_info = _concrete_typeinfo[i_ctype]
-        u_info = _concrete_typeinfo[u_ctype]
-        bits = i_info.bits  # same for both
-
-        for info, charname, intname, Intname in [
-                (i_info,'i%d' % (bits//8,), 'int%d' % bits, 'Int%d' % bits),
-                (u_info,'u%d' % (bits//8,), 'uint%d' % bits, 'UInt%d' % bits)]:
-            if bits not in seen_bits:
-                # sometimes two different types have the same number of bits
-                # if so, the one iterated over first takes precedence
-                allTypes[intname] = info.type
-                sctypeDict[intname] = info.type
-                sctypeDict[Intname] = info.type
-                sctypeDict[charname] = info.type
-                sctypeNA[Intname] = info.type
-                sctypeNA[charname] = info.type
-            sctypeNA[info.type] = Intname
-            sctypeNA[info.char] = Intname
-
-        seen_bits.add(bits)
-
-_add_integer_aliases()
-
-# We use these later
-void = allTypes['void']
+# We use this later
 generic = allTypes['generic']
 
-#
-# Rework the Python names (so that float and complex and int are consistent
-#                            with Python usage)
-#
-def _set_up_aliases():
-    type_pairs = [('complex_', 'cdouble'),
-                  ('int0', 'intp'),
-                  ('uint0', 'uintp'),
-                  ('single', 'float'),
-                  ('csingle', 'cfloat'),
-                  ('singlecomplex', 'cfloat'),
-                  ('float_', 'double'),
-                  ('intc', 'int'),
-                  ('uintc', 'uint'),
-                  ('int_', 'long'),
-                  ('uint', 'ulong'),
-                  ('cfloat', 'cdouble'),
-                  ('longfloat', 'longdouble'),
-                  ('clongfloat', 'clongdouble'),
-                  ('longcomplex', 'clongdouble'),
-                  ('bool_', 'bool'),
-                  ('bytes_', 'string'),
-                  ('string_', 'string'),
-                  ('unicode_', 'unicode'),
-                  ('object_', 'object')]
-    if sys.version_info[0] >= 3:
-        type_pairs.extend([('str_', 'unicode')])
-    else:
-        type_pairs.extend([('str_', 'string')])
-    for alias, t in type_pairs:
-        allTypes[alias] = allTypes[t]
-        sctypeDict[alias] = sctypeDict[t]
-    # Remove aliases overriding python types and modules
-    to_remove = ['ulong', 'object', 'unicode', 'int', 'long', 'float',
-                 'complex', 'bool', 'string', 'datetime', 'timedelta']
-    if sys.version_info[0] >= 3:
-        # Py3K
-        to_remove.append('bytes')
-        to_remove.append('str')
-        to_remove.remove('unicode')
-        to_remove.remove('long')
-    for t in to_remove:
-        try:
-            del allTypes[t]
-            del sctypeDict[t]
-        except KeyError:
-            pass
-_set_up_aliases()
-
-# Now, construct dictionary to lookup character codes from types
-_sctype2char_dict = {}
-def _construct_char_code_lookup():
-    for name, info in _concrete_typeinfo.items():
-        if info.char not in ['p', 'P']:
-            _sctype2char_dict[info.type] = info.char
-_construct_char_code_lookup()
-
-
-sctypes = {'int': [],
-           'uint':[],
-           'float':[],
-           'complex':[],
-           'others':[bool, object, bytes, unicode, void]}
-
-def _add_array_type(typename, bits):
-    try:
-        t = allTypes['%s%d' % (typename, bits)]
-    except KeyError:
-        pass
-    else:
-        sctypes[typename].append(t)
-
-def _set_array_types():
-    ibytes = [1, 2, 4, 8, 16, 32, 64]
-    fbytes = [2, 4, 8, 10, 12, 16, 32, 64]
-    for bytes in ibytes:
-        bits = 8*bytes
-        _add_array_type('int', bits)
-        _add_array_type('uint', bits)
-    for bytes in fbytes:
-        bits = 8*bytes
-        _add_array_type('float', bits)
-        _add_array_type('complex', 2*bits)
-    _gi = dtype('p')
-    if _gi.type not in sctypes['int']:
-        indx = 0
-        sz = _gi.itemsize
-        _lst = sctypes['int']
-        while (indx < len(_lst) and sz >= _lst[indx](0).itemsize):
-            indx += 1
-        sctypes['int'].insert(indx, _gi.type)
-        sctypes['uint'].insert(indx, dtype('P').type)
-_set_array_types()
-
-
 genericTypeRank = ['bool', 'int8', 'uint8', 'int16', 'uint16',
                    'int32', 'uint32', 'int64', 'uint64', 'int128',
                    'uint128', 'float16',
@@ -543,13 +181,11 @@ def maximum_sctype(t):
     if g is None:
         return t
     t = g
-    name = t.__name__
-    base, bits = _evalname(name)
-    if bits == 0:
-        return t
-    else:
+    base = _kind_name(dtype(t))
+    if base in sctypes:
         return sctypes[base][-1]
-
+    else:
+        return t
 
 def issctype(rep):
     """
@@ -855,13 +491,17 @@ def sctype2char(sctype):
     sctype = obj2sctype(sctype)
     if sctype is None:
         raise ValueError("unrecognized type")
-    return _sctype2char_dict[sctype]
+    if sctype not in _concrete_types:
+        # for compatibility
+        raise KeyError(sctype)
+    return dtype(sctype).char
 
 # Create dictionary of casting functions that wrap sequences
 # indexed by type or type character
-
-
 cast = _typedict()
+for key in _concrete_types:
+    cast[key] = lambda x, k=key: array(x, copy=False).astype(k)
+
 try:
     ScalarType = [_types.IntType, _types.FloatType, _types.ComplexType,
                   _types.LongType, _types.BooleanType,
@@ -870,41 +510,9 @@ except AttributeError:
     # Py3K
     ScalarType = [int, float, complex, int, bool, bytes, str, memoryview]
 
-ScalarType.extend(_sctype2char_dict.keys())
+ScalarType.extend(_concrete_types)
 ScalarType = tuple(ScalarType)
-for key in _sctype2char_dict.keys():
-    cast[key] = lambda x, k=key: array(x, copy=False).astype(k)
-
-# Create the typestring lookup dictionary
-_typestr = _typedict()
-for key in _sctype2char_dict.keys():
-    if issubclass(key, allTypes['flexible']):
-        _typestr[key] = _sctype2char_dict[key]
-    else:
-        _typestr[key] = empty((1,), key).dtype.str[1:]
-
-# Make sure all typestrings are in sctypeDict
-for key, val in _typestr.items():
-    if val not in sctypeDict:
-        sctypeDict[val] = key
-
-# Add additional strings to the sctypeDict
-
-if sys.version_info[0] >= 3:
-    _toadd = ['int', 'float', 'complex', 'bool', 'object',
-              'str', 'bytes', 'object', ('a', allTypes['bytes_'])]
-else:
-    _toadd = ['int', 'float', 'complex', 'bool', 'object', 'string',
-              ('str', allTypes['string_']),
-              'unicode', 'object', ('a', allTypes['string_'])]
-
-for name in _toadd:
-    if isinstance(name, tuple):
-        sctypeDict[name[0]] = name[1]
-    else:
-        sctypeDict[name] = allTypes['%s_' % name]
 
-del _toadd, name
 
 # Now add the types we've determined to this module
 for key in allTypes:
diff --git a/numpy/core/overrides.py b/numpy/core/overrides.py
new file mode 100644
index 000000000..77ee9e6ee
--- /dev/null
+++ b/numpy/core/overrides.py
@@ -0,0 +1,154 @@
+"""Preliminary implementation of NEP-18
+
+TODO: rewrite this in C for performance.
+"""
+import collections
+import functools
+
+from numpy.core._multiarray_umath import ndarray
+from numpy.compat._inspect import getargspec
+
+
+_NDARRAY_ARRAY_FUNCTION = ndarray.__array_function__
+
+
+def get_overloaded_types_and_args(relevant_args):
+    """Returns a list of arguments on which to call __array_function__.
+
+    Parameters
+    ----------
+    relevant_args : iterable of array-like
+        Iterable of array-like arguments to check for __array_function__
+        methods.
+
+    Returns
+    -------
+    overloaded_types : collection of types
+        Types of arguments from relevant_args with __array_function__ methods.
+    overloaded_args : list
+        Arguments from relevant_args on which to call __array_function__
+        methods, in the order in which they should be called.
+    """
+    # Runtime is O(num_arguments * num_unique_types)
+    overloaded_types = []
+    overloaded_args = []
+    for arg in relevant_args:
+        arg_type = type(arg)
+        # We only collect arguments if they have a unique type, which ensures
+        # reasonable performance even with a long list of possibly overloaded
+        # arguments.
+        if (arg_type not in overloaded_types and
+                hasattr(arg_type, '__array_function__')):
+
+            overloaded_types.append(arg_type)
+
+            # By default, insert this argument at the end, but if it is
+            # subclass of another argument, insert it before that argument.
+            # This ensures "subclasses before superclasses".
+            index = len(overloaded_args)
+            for i, old_arg in enumerate(overloaded_args):
+                if issubclass(arg_type, type(old_arg)):
+                    index = i
+                    break
+            overloaded_args.insert(index, arg)
+
+    # Special handling for ndarray.__array_function__
+    overloaded_args = [
+        arg for arg in overloaded_args
+        if type(arg).__array_function__ is not _NDARRAY_ARRAY_FUNCTION
+    ]
+
+    return overloaded_types, overloaded_args
+
+
+def array_function_implementation_or_override(
+        implementation, public_api, relevant_args, args, kwargs):
+    """Implement a function with checks for __array_function__ overrides.
+
+    Arguments
+    ---------
+    implementation : function
+        Function that implements the operation on NumPy array without
+        overrides when called like ``implementation(*args, **kwargs)``.
+    public_api : function
+        Function exposed by NumPy's public API riginally called like
+        ``public_api(*args, **kwargs`` on which arguments are now being
+        checked.
+    relevant_args : iterable
+        Iterable of arguments to check for __array_function__ methods.
+    args : tuple
+        Arbitrary positional arguments originally passed into ``public_api``.
+    kwargs : tuple
+        Arbitrary keyword arguments originally passed into ``public_api``.
+
+    Returns
+    -------
+    Result from calling `implementation()` or an `__array_function__`
+    method, as appropriate.
+
+    Raises
+    ------
+    TypeError : if no implementation is found.
+    """
+    # Check for __array_function__ methods.
+    types, overloaded_args = get_overloaded_types_and_args(relevant_args)
+    if not overloaded_args:
+        return implementation(*args, **kwargs)
+
+    # Call overrides
+    for overloaded_arg in overloaded_args:
+        # Use `public_api` instead of `implemenation` so __array_function__
+        # implementations can do equality/identity comparisons.
+        result = overloaded_arg.__array_function__(
+            public_api, types, args, kwargs)
+
+        if result is not NotImplemented:
+            return result
+
+    raise TypeError('no implementation found for {} on types that implement '
+                    '__array_function__: {}'
+                    .format(public_api, list(map(type, overloaded_args))))
+
+
+ArgSpec = collections.namedtuple('ArgSpec', 'args varargs keywords defaults')
+
+
+def verify_matching_signatures(implementation, dispatcher):
+    """Verify that a dispatcher function has the right signature."""
+    implementation_spec = ArgSpec(*getargspec(implementation))
+    dispatcher_spec = ArgSpec(*getargspec(dispatcher))
+
+    if (implementation_spec.args != dispatcher_spec.args or
+            implementation_spec.varargs != dispatcher_spec.varargs or
+            implementation_spec.keywords != dispatcher_spec.keywords or
+            (bool(implementation_spec.defaults) !=
+             bool(dispatcher_spec.defaults)) or
+            (implementation_spec.defaults is not None and
+             len(implementation_spec.defaults) !=
+             len(dispatcher_spec.defaults))):
+        raise RuntimeError('implementation and dispatcher for %s have '
+                           'different function signatures' % implementation)
+
+    if implementation_spec.defaults is not None:
+        if dispatcher_spec.defaults != (None,) * len(dispatcher_spec.defaults):
+            raise RuntimeError('dispatcher functions can only use None for '
+                               'default argument values')
+
+
+def array_function_dispatch(dispatcher, verify=True):
+    """Decorator for adding dispatch with the __array_function__ protocol."""
+    def decorator(implementation):
+        # TODO: only do this check when the appropriate flag is enabled or for
+        # a dev install. We want this check for testing but don't want to
+        # slow down all numpy imports.
+        if verify:
+            verify_matching_signatures(implementation, dispatcher)
+
+        @functools.wraps(implementation)
+        def public_api(*args, **kwargs):
+            relevant_args = dispatcher(*args, **kwargs)
+            return array_function_implementation_or_override(
+                implementation, public_api, relevant_args, args, kwargs)
+        return public_api
+
+    return decorator
diff --git a/numpy/core/setup.py b/numpy/core/setup.py
index 1588a2634..fc15fe59f 100644
--- a/numpy/core/setup.py
+++ b/numpy/core/setup.py
@@ -4,7 +4,6 @@ import os
 import sys
 import pickle
 import copy
-import sysconfig
 import warnings
 import platform
 from os.path import join
@@ -153,7 +152,8 @@ def check_math_capabilities(config, moredefs, mathlibs):
 
     for h in OPTIONAL_HEADERS:
         if config.check_func("", decl=False, call=False, headers=[h]):
-            moredefs.append((fname2def(h).replace(".", "_"), 1))
+            h = h.replace(".", "_").replace(os.path.sep, "_")
+            moredefs.append((fname2def(h), 1))
 
     for tup in OPTIONAL_INTRINSICS:
         headers = None
@@ -737,6 +737,7 @@ def configuration(parent_package='',top_path=None):
             join('src', 'common', 'ucsnarrow.h'),
             join('src', 'common', 'ufunc_override.h'),
             join('src', 'common', 'umathmodule.h'),
+            join('src', 'common', 'numpyos.h'),
             ]
 
     common_src = [
@@ -746,6 +747,7 @@ def configuration(parent_package='',top_path=None):
             join('src', 'common', 'templ_common.h.src'),
             join('src', 'common', 'ucsnarrow.c'),
             join('src', 'common', 'ufunc_override.c'),
+            join('src', 'common', 'numpyos.c'),
             ]
 
     blas_info = get_info('blas_opt', 0)
@@ -785,7 +787,6 @@ def configuration(parent_package='',top_path=None):
             join('src', 'multiarray', 'multiarraymodule.h'),
             join('src', 'multiarray', 'nditer_impl.h'),
             join('src', 'multiarray', 'number.h'),
-            join('src', 'multiarray', 'numpyos.h'),
             join('src', 'multiarray', 'refcount.h'),
             join('src', 'multiarray', 'scalartypes.h'),
             join('src', 'multiarray', 'sequence.h'),
@@ -851,7 +852,6 @@ def configuration(parent_package='',top_path=None):
             join('src', 'multiarray', 'nditer_constr.c'),
             join('src', 'multiarray', 'nditer_pywrap.c'),
             join('src', 'multiarray', 'number.c'),
-            join('src', 'multiarray', 'numpyos.c'),
             join('src', 'multiarray', 'refcount.c'),
             join('src', 'multiarray', 'sequence.c'),
             join('src', 'multiarray', 'shape.c'),
diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py
index 356482b07..f837df112 100644
--- a/numpy/core/setup_common.py
+++ b/numpy/core/setup_common.py
@@ -41,7 +41,8 @@ C_ABI_VERSION = 0x01000009
 # 0x0000000b - 1.13.x
 # 0x0000000c - 1.14.x
 # 0x0000000c - 1.15.x
-C_API_VERSION = 0x0000000c
+# 0x0000000d - 1.16.x
+C_API_VERSION = 0x0000000d
 
 class MismatchCAPIWarning(Warning):
     pass
@@ -110,7 +111,7 @@ OPTIONAL_STDFUNCS = ["expm1", "log1p", "acosh", "asinh", "atanh",
         "rint", "trunc", "exp2", "log2", "hypot", "atan2", "pow",
         "copysign", "nextafter", "ftello", "fseeko",
         "strtoll", "strtoull", "cbrt", "strtold_l", "fallocate",
-        "backtrace"]
+        "backtrace", "madvise"]
 
 
 OPTIONAL_HEADERS = [
@@ -120,6 +121,7 @@ OPTIONAL_HEADERS = [
                 "features.h",  # for glibc version linux
                 "xlocale.h",  # see GH#8367
                 "dlfcn.h", # dladdr
+                "sys/mman.h", #madvise
 ]
 
 # optional gcc compiler builtins and their call arguments and optional a
diff --git a/numpy/core/shape_base.py b/numpy/core/shape_base.py
index 319c25088..fde23076b 100644
--- a/numpy/core/shape_base.py
+++ b/numpy/core/shape_base.py
@@ -7,7 +7,14 @@ __all__ = ['atleast_1d', 'atleast_2d', 'atleast_3d', 'block', 'hstack',
 from . import numeric as _nx
 from .numeric import array, asanyarray, newaxis
 from .multiarray import normalize_axis_index
+from .overrides import array_function_dispatch
 
+
+def _atleast_1d_dispatcher(*arys):
+    return arys
+
+
+@array_function_dispatch(_atleast_1d_dispatcher)
 def atleast_1d(*arys):
     """
     Convert inputs to arrays with at least one dimension.
@@ -60,6 +67,12 @@ def atleast_1d(*arys):
     else:
         return res
 
+
+def _atleast_2d_dispatcher(*arys):
+    return arys
+
+
+@array_function_dispatch(_atleast_2d_dispatcher)
 def atleast_2d(*arys):
     """
     View inputs as arrays with at least two dimensions.
@@ -112,6 +125,12 @@ def atleast_2d(*arys):
     else:
         return res
 
+
+def _atleast_3d_dispatcher(*arys):
+    return arys
+
+
+@array_function_dispatch(_atleast_3d_dispatcher)
 def atleast_3d(*arys):
     """
     View inputs as arrays with at least three dimensions.
@@ -179,6 +198,11 @@ def atleast_3d(*arys):
         return res
 
 
+def _vstack_dispatcher(tup):
+    return tup
+
+
+@array_function_dispatch(_vstack_dispatcher)
 def vstack(tup):
     """
     Stack arrays in sequence vertically (row wise).
@@ -233,6 +257,12 @@ def vstack(tup):
     """
     return _nx.concatenate([atleast_2d(_m) for _m in tup], 0)
 
+
+def _hstack_dispatcher(tup):
+    return tup
+
+
+@array_function_dispatch(_hstack_dispatcher)
 def hstack(tup):
     """
     Stack arrays in sequence horizontally (column wise).
@@ -288,6 +318,14 @@ def hstack(tup):
         return _nx.concatenate(arrs, 1)
 
 
+def _stack_dispatcher(arrays, axis=None, out=None):
+    for a in arrays:
+        yield a
+    if out is not None:
+        yield out
+
+
+@array_function_dispatch(_stack_dispatcher)
 def stack(arrays, axis=0, out=None):
     """
     Join a sequence of arrays along a new axis.
@@ -360,6 +398,14 @@ def stack(arrays, axis=0, out=None):
     return _nx.concatenate(expanded_arrays, axis=axis, out=out)
 
 
+def _block_format_index(index):
+    """
+    Convert a list of indices ``[0, 1, 2]`` into ``"arrays[0][1][2]"``.
+    """
+    idx_str = ''.join('[{}]'.format(i) for i in index if i is not None)
+    return 'arrays' + idx_str
+
+
 def _block_check_depths_match(arrays, parent_index=[]):
     """
     Recursive function checking that the depths of nested lists in `arrays`
@@ -370,19 +416,23 @@ def _block_check_depths_match(arrays, parent_index=[]):
     for each innermost list, in case an error needs to be raised, so that
     the index of the offending list can be printed as part of the error.
 
-    The parameter `parent_index` is the full index of `arrays` within the
-    nested lists passed to _block_check_depths_match at the top of the
-    recursion.
-    The return value is a pair. The first item returned is the full index
-    of an element (specifically the first element) from the bottom of the
-    nesting in `arrays`. An empty list at the bottom of the nesting is
-    represented by a `None` index.
-    The second item is the maximum of the ndims of the arrays nested in
-    `arrays`.
+    Parameters
+    ----------
+    arrays : nested list of arrays
+        The arrays to check
+    parent_index : list of int
+        The full index of `arrays` within the nested lists passed to
+        `_block_check_depths_match` at the top of the recursion.
+
+    Returns
+    -------
+    first_index : list of int
+        The full index of an element from the bottom of the nesting in
+        `arrays`. If any element at the bottom is an empty list, this will
+        refer to it, and the last index along the empty axis will be `None`.
+    max_arr_ndim : int
+        The maximum of the ndims of the arrays nested in `arrays`.
     """
-    def format_index(index):
-        idx_str = ''.join('[{}]'.format(i) for i in index if i is not None)
-        return 'arrays' + idx_str
     if type(arrays) is tuple:
         # not strictly necessary, but saves us from:
         #  - more than one way to do things - no point treating tuples like
@@ -393,7 +443,7 @@ def _block_check_depths_match(arrays, parent_index=[]):
             '{} is a tuple. '
             'Only lists can be used to arrange blocks, and np.block does '
             'not allow implicit conversion from tuple to ndarray.'.format(
-                format_index(parent_index)
+                _block_format_index(parent_index)
             )
         )
     elif type(arrays) is list and len(arrays) > 0:
@@ -410,9 +460,12 @@ def _block_check_depths_match(arrays, parent_index=[]):
                     "{}, but there is an element at depth {} ({})".format(
                         len(first_index),
                         len(index),
-                        format_index(index)
+                        _block_format_index(index)
                     )
                 )
+            # propagate our flag that indicates an empty list at the bottom
+            if index[-1] is None:
+                first_index = index
         return first_index, max_arr_ndim
     elif type(arrays) is list and len(arrays) == 0:
         # We've 'bottomed out' on an empty list
@@ -422,7 +475,13 @@ def _block_check_depths_match(arrays, parent_index=[]):
         return parent_index, _nx.ndim(arrays)
 
 
-def _block(arrays, max_depth, result_ndim):
+def _atleast_nd(a, ndim):
+    # Ensures `a` has at least `ndim` dimensions by prepending
+    # ones to `a.shape` as necessary
+    return array(a, ndmin=ndim, copy=False, subok=True)
+
+
+def _block(arrays, max_depth, result_ndim, depth=0):
     """
     Internal implementation of block. `arrays` is the argument passed to
     block. `max_depth` is the depth of nested lists within `arrays` and
@@ -430,31 +489,17 @@ def _block(arrays, max_depth, result_ndim):
     `arrays` and the depth of the lists in `arrays` (see block docstring
     for details).
     """
-    def atleast_nd(a, ndim):
-        # Ensures `a` has at least `ndim` dimensions by prepending
-        # ones to `a.shape` as necessary
-        return array(a, ndmin=ndim, copy=False, subok=True)
-
-    def block_recursion(arrays, depth=0):
-        if depth < max_depth:
-            if len(arrays) == 0:
-                raise ValueError('Lists cannot be empty')
-            arrs = [block_recursion(arr, depth+1) for arr in arrays]
-            return _nx.concatenate(arrs, axis=-(max_depth-depth))
-        else:
-            # We've 'bottomed out' - arrays is either a scalar or an array
-            # type(arrays) is not list
-            return atleast_nd(arrays, result_ndim)
-
-    try:
-        return block_recursion(arrays)
-    finally:
-        # recursive closures have a cyclic reference to themselves, which
-        # requires gc to collect (gh-10620). To avoid this problem, for
-        # performance and PyPy friendliness, we break the cycle:
-        block_recursion = None
+    if depth < max_depth:
+        arrs = [_block(arr, max_depth, result_ndim, depth+1)
+                for arr in arrays]
+        return _nx.concatenate(arrs, axis=-(max_depth-depth))
+    else:
+        # We've 'bottomed out' - arrays is either a scalar or an array
+        # type(arrays) is not list
+        return _atleast_nd(arrays, result_ndim)
 
 
+# TODO: support array_function_dispatch
 def block(arrays):
     """
     Assemble an nd-array from nested lists of blocks.
@@ -605,4 +650,17 @@ def block(arrays):
     """
     bottom_index, arr_ndim = _block_check_depths_match(arrays)
     list_ndim = len(bottom_index)
-    return _block(arrays, list_ndim, max(arr_ndim, list_ndim))
+    if bottom_index and bottom_index[-1] is None:
+        raise ValueError(
+            'List at {} cannot be empty'.format(
+                _block_format_index(bottom_index)
+            )
+        )
+    result = _block(arrays, list_ndim, max(arr_ndim, list_ndim))
+    if list_ndim == 0:
+        # Catch an edge case where _block returns a view because
+        # `arrays` is a single numpy array and not a list of numpy arrays.
+        # This might copy scalars or lists twice, but this isn't a likely
+        # usecase for those interested in performance
+        result = result.copy()
+    return result
diff --git a/numpy/core/src/common/array_assign.c b/numpy/core/src/common/array_assign.c
index a48e245d8..ac3fdbef7 100644
--- a/numpy/core/src/common/array_assign.c
+++ b/numpy/core/src/common/array_assign.c
@@ -84,14 +84,43 @@ broadcast_error: {
 
 /* See array_assign.h for parameter documentation */
 NPY_NO_EXPORT int
-raw_array_is_aligned(int ndim, char *data, npy_intp *strides, int alignment)
+raw_array_is_aligned(int ndim, npy_intp *shape,
+                     char *data, npy_intp *strides, int alignment)
 {
-    if (alignment > 1) {
-        npy_intp align_check = (npy_intp)data;
-        int idim;
 
-        for (idim = 0; idim < ndim; ++idim) {
-            align_check |= strides[idim];
+    /*
+     * The code below expects the following:
+     *  * that alignment is a power of two, as required by the C standard.
+     *  * that casting from pointer to uintp gives a sensible representation
+     *    we can use bitwise operations on (perhaps *not* req. by C std,
+     *    but assumed by glibc so it should be fine)
+     *  * that casting stride from intp to uintp (to avoid dependence on the
+     *    signed int representation) preserves remainder wrt alignment, so
+     *    stride%a is the same as ((unsigned intp)stride)%a. Req. by C std.
+     *
+     *  The code checks whether the lowest log2(alignment) bits of `data`
+     *  and all `strides` are 0, as this implies that
+     *  (data + n*stride)%alignment == 0 for all integers n.
+     */
+
+    if (alignment > 1) {
+        npy_uintp align_check = (npy_uintp)data;
+        int i;
+
+        for (i = 0; i < ndim; i++) {
+#if NPY_RELAXED_STRIDES_CHECKING
+            /* skip dim == 1 as it is not required to have stride 0 */
+            if (shape[i] > 1) {
+                /* if shape[i] == 1, the stride is never used */
+                align_check |= (npy_uintp)strides[i];
+            }
+            else if (shape[i] == 0) {
+                /* an array with zero elements is always aligned */
+                return 1;
+            }
+#else /* not NPY_RELAXED_STRIDES_CHECKING */
+            align_check |= (npy_uintp)strides[i];
+#endif /* not NPY_RELAXED_STRIDES_CHECKING */
         }
 
         return npy_is_aligned((void *)align_check, alignment);
@@ -101,6 +130,23 @@ raw_array_is_aligned(int ndim, char *data, npy_intp *strides, int alignment)
     }
 }
 
+NPY_NO_EXPORT int
+IsAligned(PyArrayObject *ap)
+{
+    return raw_array_is_aligned(PyArray_NDIM(ap), PyArray_DIMS(ap),
+                                PyArray_DATA(ap), PyArray_STRIDES(ap),
+                                PyArray_DESCR(ap)->alignment);
+}
+
+NPY_NO_EXPORT int
+IsUintAligned(PyArrayObject *ap)
+{
+    return raw_array_is_aligned(PyArray_NDIM(ap), PyArray_DIMS(ap),
+                                PyArray_DATA(ap), PyArray_STRIDES(ap),
+                                npy_uint_alignment(PyArray_DESCR(ap)->elsize));
+}
+
+
 
 /* Returns 1 if the arrays have overlapping data, 0 otherwise */
 NPY_NO_EXPORT int
diff --git a/numpy/core/src/common/array_assign.h b/numpy/core/src/common/array_assign.h
index 3fecff007..07438c5e8 100644
--- a/numpy/core/src/common/array_assign.h
+++ b/numpy/core/src/common/array_assign.h
@@ -87,10 +87,26 @@ broadcast_strides(int ndim, npy_intp *shape,
 
 /*
  * Checks whether a data pointer + set of strides refers to a raw
- * array which is fully aligned data.
+ * array whose elements are all aligned to a given alignment.
+ * alignment should be a power of two.
  */
 NPY_NO_EXPORT int
-raw_array_is_aligned(int ndim, char *data, npy_intp *strides, int alignment);
+raw_array_is_aligned(int ndim, npy_intp *shape,
+                     char *data, npy_intp *strides, int alignment);
+
+/*
+ * Checks if an array is aligned to its "true alignment"
+ * given by dtype->alignment.
+ */
+NPY_NO_EXPORT int
+IsAligned(PyArrayObject *ap);
+
+/*
+ * Checks if an array is aligned to its "uint alignment"
+ * given by npy_uint_alignment(dtype->elsize).
+ */
+NPY_NO_EXPORT int
+IsUintAligned(PyArrayObject *ap);
 
 /* Returns 1 if the arrays have overlapping data, 0 otherwise */
 NPY_NO_EXPORT int
diff --git a/numpy/core/src/common/lowlevel_strided_loops.h b/numpy/core/src/common/lowlevel_strided_loops.h
index f9c671f77..5f139cffb 100644
--- a/numpy/core/src/common/lowlevel_strided_loops.h
+++ b/numpy/core/src/common/lowlevel_strided_loops.h
@@ -7,7 +7,9 @@
 /*
  * NOTE: This API should remain private for the time being, to allow
  *       for further refinement.  I think the 'aligned' mechanism
- *       needs changing, for example.
+ *       needs changing, for example. 
+ *
+ *       Note: Updated in 2018 to distinguish "true" from "uint" alignment.
  */
 
 /*
@@ -69,8 +71,9 @@ typedef void (PyArray_StridedBinaryOp)(char *dst, npy_intp dst_stride,
  * strided memory.  Returns NULL if there is a problem with the inputs.
  *
  * aligned:
- *      Should be 1 if the src and dst pointers are always aligned,
- *      0 otherwise.
+ *      Should be 1 if the src and dst pointers always point to
+ *      locations at which a uint of equal size to dtype->elsize
+ *      would be aligned, 0 otherwise.
  * src_stride:
  *      Should be the src stride if it will always be the same,
  *      NPY_MAX_INTP otherwise.
@@ -165,8 +168,9 @@ PyArray_GetDTypeCopySwapFn(int aligned,
  * function when the transfer function is no longer required.
  *
  * aligned:
- *      Should be 1 if the src and dst pointers are always aligned,
- *      0 otherwise.
+ *      Should be 1 if the src and dst pointers always point to
+ *      locations at which a uint of equal size to dtype->elsize
+ *      would be aligned, 0 otherwise.
  * src_stride:
  *      Should be the src stride if it will always be the same,
  *      NPY_MAX_INTP otherwise.
diff --git a/numpy/core/src/common/npy_config.h b/numpy/core/src/common/npy_config.h
index 8143e7719..eedfbe364 100644
--- a/numpy/core/src/common/npy_config.h
+++ b/numpy/core/src/common/npy_config.h
@@ -6,22 +6,6 @@
 #include "numpy/npy_cpu.h"
 #include "numpy/npy_os.h"
 
-/*
- * largest alignment the copy loops might require
- * required as string, void and complex types might get copied using larger
- * instructions than required to operate on them. E.g. complex float is copied
- * in 8 byte moves but arithmetic on them only loads in 4 byte moves.
- * the sparc platform may need that alignment for long doubles.
- * amd64 is not harmed much by the bloat as the system provides 16 byte
- * alignment by default.
- */
-#if (defined NPY_CPU_X86 || defined _WIN32 || defined NPY_CPU_ARMEL_AARCH32 ||\
-     defined NPY_CPU_ARMEB_AARCH32)
-#define NPY_MAX_COPY_ALIGNMENT 8
-#else
-#define NPY_MAX_COPY_ALIGNMENT 16
-#endif
-
 /* blacklist */
 
 /* Disable broken Sun Workshop Pro math functions */
@@ -46,7 +30,7 @@
 
 #endif
 
-#if defined(_MSC_VER) && (_MSC_VER == 1900)
+#if defined(_MSC_VER) && (_MSC_VER >= 1900)
 
 #undef HAVE_CASIN
 #undef HAVE_CASINF
@@ -60,6 +44,18 @@
 #undef HAVE_CATANH
 #undef HAVE_CATANHF
 #undef HAVE_CATANHL
+#undef HAVE_CSQRT
+#undef HAVE_CSQRTF
+#undef HAVE_CSQRTL
+#undef HAVE_CLOG
+#undef HAVE_CLOGF
+#undef HAVE_CLOGL
+#undef HAVE_CACOS
+#undef HAVE_CACOSF
+#undef HAVE_CACOSL
+#undef HAVE_CACOSH
+#undef HAVE_CACOSHF
+#undef HAVE_CACOSHL
 
 #endif
 
diff --git a/numpy/core/src/common/npy_longdouble.c b/numpy/core/src/common/npy_longdouble.c
index 508fbceac..561f4b825 100644
--- a/numpy/core/src/common/npy_longdouble.c
+++ b/numpy/core/src/common/npy_longdouble.c
@@ -1,17 +1,11 @@
 #include <Python.h>
 
 #define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+
 #include "numpy/ndarraytypes.h"
 #include "numpy/npy_math.h"
-
-/* This is a backport of Py_SETREF */
-#define NPY_SETREF(op, op2)                      \
-    do {                                        \
-        PyObject *_py_tmp = (PyObject *)(op);   \
-        (op) = (op2);                           \
-        Py_DECREF(_py_tmp);                     \
-    } while (0)
-
+#include "npy_pycompat.h"
 
 /*
  * Heavily derived from PyLong_FromDouble
@@ -66,7 +60,7 @@ npy_longdouble_to_PyLong(npy_longdouble ldval)
         npy_ulonglong chunk = (npy_ulonglong)frac;
         PyObject *l_chunk;
         /* v = v << chunk_size */
-        NPY_SETREF(v, PyNumber_Lshift(v, l_chunk_size));
+        Py_SETREF(v, PyNumber_Lshift(v, l_chunk_size));
         if (v == NULL) {
             goto done;
         }
@@ -77,7 +71,7 @@ npy_longdouble_to_PyLong(npy_longdouble ldval)
             goto done;
         }
         /* v = v | chunk */
-        NPY_SETREF(v, PyNumber_Or(v, l_chunk));
+        Py_SETREF(v, PyNumber_Or(v, l_chunk));
         Py_DECREF(l_chunk);
         if (v == NULL) {
             goto done;
@@ -90,7 +84,7 @@ npy_longdouble_to_PyLong(npy_longdouble ldval)
 
     /* v = -v */
     if (neg) {
-        NPY_SETREF(v, PyNumber_Negative(v));
+        Py_SETREF(v, PyNumber_Negative(v));
         if (v == NULL) {
             goto done;
         }
diff --git a/numpy/core/src/multiarray/numpyos.c b/numpy/core/src/common/numpyos.c
index 52dcbf3c8..d60b1ca17 100644
--- a/numpy/core/src/multiarray/numpyos.c
+++ b/numpy/core/src/common/numpyos.c
@@ -769,3 +769,31 @@ NumPyOS_ascii_ftoLf(FILE *fp, long double *value)
     }
     return r;
 }
+
+NPY_NO_EXPORT npy_longlong
+NumPyOS_strtoll(const char *str, char **endptr, int base)
+{
+#if defined HAVE_STRTOLL
+    return strtoll(str, endptr, base);
+#elif defined _MSC_VER
+    return _strtoi64(str, endptr, base);
+#else
+    /* ok on 64 bit posix */
+    return PyOS_strtol(str, endptr, base);
+#endif
+}
+
+NPY_NO_EXPORT npy_ulonglong
+NumPyOS_strtoull(const char *str, char **endptr, int base)
+{
+#if defined HAVE_STRTOULL
+    return strtoull(str, endptr, base);
+#elif defined _MSC_VER
+    return _strtoui64(str, endptr, base);
+#else
+    /* ok on 64 bit posix */
+    return PyOS_strtoul(str, endptr, base);
+#endif
+}
+
+
diff --git a/numpy/core/src/multiarray/numpyos.h b/numpy/core/src/common/numpyos.h
index 7ca795a6f..4deed8400 100644
--- a/numpy/core/src/multiarray/numpyos.h
+++ b/numpy/core/src/common/numpyos.h
@@ -31,4 +31,11 @@ NumPyOS_ascii_ftoLf(FILE *fp, long double *value);
 NPY_NO_EXPORT int
 NumPyOS_ascii_isspace(int c);
 
+/* Convert a string to an int in an arbitrary base */
+NPY_NO_EXPORT npy_longlong
+NumPyOS_strtoll(const char *str, char **endptr, int base);
+
+/* Convert a string to an int in an arbitrary base */
+NPY_NO_EXPORT npy_ulonglong
+NumPyOS_strtoull(const char *str, char **endptr, int base);
 #endif
diff --git a/numpy/core/src/multiarray/_multiarray_tests.c.src b/numpy/core/src/multiarray/_multiarray_tests.c.src
index 67c9a333c..6c4d49bd1 100644
--- a/numpy/core/src/multiarray/_multiarray_tests.c.src
+++ b/numpy/core/src/multiarray/_multiarray_tests.c.src
@@ -6,6 +6,7 @@
 #include "numpy/arrayscalars.h"
 #include "numpy/npy_math.h"
 #include "numpy/halffloat.h"
+#include "common.h"
 #include "mem_overlap.h"
 #include "npy_extint128.h"
 #include "common.h"
@@ -1641,6 +1642,42 @@ extint_ceildiv_128_64(PyObject *NPY_UNUSED(self), PyObject *args) {
     return pylong_from_int128(c);
 }
 
+struct TestStruct1 {
+    npy_uint8 a;
+    npy_complex64 b;
+};
+
+struct TestStruct2 {
+    npy_uint32 a;
+    npy_complex64 b;
+};
+
+struct TestStruct3 {
+    npy_uint8 a;
+    struct TestStruct1 b;
+};
+
+static PyObject *
+get_struct_alignments(PyObject *NPY_UNUSED(self), PyObject *args) {
+    PyObject *ret = PyTuple_New(3);
+    PyObject *alignment, *size, *val;
+
+/**begin repeat
+ * #N = 1,2,3#
+ */
+    alignment = PyInt_FromLong(_ALIGN(struct TestStruct@N@));
+    size = PyInt_FromLong(sizeof(struct TestStruct@N@));
+    val = PyTuple_Pack(2, alignment, size);
+    Py_DECREF(alignment);
+    Py_DECREF(size);
+    if (val == NULL) {
+        return NULL;
+    }
+    PyTuple_SET_ITEM(ret, @N@-1, val);
+/**end repeat**/
+    return ret;
+}
+
 
 static char get_fpu_mode_doc[] = (
     "get_fpu_mode()\n"
@@ -1956,6 +1993,9 @@ static PyMethodDef Multiarray_TestsMethods[] = {
     {"format_float_OSprintf_g",
         (PyCFunction)printf_float_g,
         METH_VARARGS , NULL},
+    {"get_struct_alignments",
+        get_struct_alignments,
+        METH_VARARGS, NULL},
     {NULL, NULL, 0, NULL}        /* Sentinel */
 };
 
diff --git a/numpy/core/src/multiarray/alloc.c b/numpy/core/src/multiarray/alloc.c
index ae4b81cf5..6755095d7 100644
--- a/numpy/core/src/multiarray/alloc.c
+++ b/numpy/core/src/multiarray/alloc.c
@@ -22,8 +22,16 @@
 #include "npy_config.h"
 #include "alloc.h"
 
+
 #include <assert.h>
 
+#ifdef HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#if defined MADV_HUGEPAGE && defined HAVE_MADVISE
+#define HAVE_MADV_HUGEPAGE
+#endif
+#endif
+
 #define NBUCKETS 1024 /* number of buckets for data*/
 #define NBUCKETS_DIM 16 /* number of buckets for dimensions/strides */
 #define NCACHE 7 /* number of cache entries per bucket */
@@ -35,6 +43,13 @@ typedef struct {
 static cache_bucket datacache[NBUCKETS];
 static cache_bucket dimcache[NBUCKETS_DIM];
 
+/* as the cache is managed in global variables verify the GIL is held */
+#if defined(NPY_PY3K)
+#define NPY_CHECK_GIL_HELD() PyGILState_Check()
+#else
+#define NPY_CHECK_GIL_HELD() 1
+#endif
+
 /*
  * very simplistic small memory block cache to avoid more expensive libc
  * allocations
@@ -45,26 +60,30 @@ static NPY_INLINE void *
 _npy_alloc_cache(npy_uintp nelem, npy_uintp esz, npy_uint msz,
                  cache_bucket * cache, void * (*alloc)(size_t))
 {
+    void * p;
     assert((esz == 1 && cache == datacache) ||
            (esz == sizeof(npy_intp) && cache == dimcache));
+    assert(NPY_CHECK_GIL_HELD());
     if (nelem < msz) {
         if (cache[nelem].available > 0) {
             return cache[nelem].ptrs[--(cache[nelem].available)];
         }
     }
+    p = alloc(nelem * esz);
+    if (p) {
 #ifdef _PyPyGC_AddMemoryPressure
-    {
-        size_t size = nelem * esz;
-        void * ret = alloc(size);
-        if (ret != NULL)
-        {
-            _PyPyPyGC_AddMemoryPressure(size);
+        _PyPyPyGC_AddMemoryPressure(nelem * esz);
+#endif
+#ifdef HAVE_MADV_HUGEPAGE
+        /* allow kernel allocating huge pages for large arrays */
+        if (NPY_UNLIKELY(nelem * esz >= ((1u<<22u)))) {
+            npy_uintp offset = 4096u - (npy_uintp)p % (4096u);
+            npy_uintp length = nelem * esz - offset;
+            madvise((void*)((npy_uintp)p + offset), length, MADV_HUGEPAGE);
         }
-        return ret;
-    }
-#else
-     return alloc(nelem * esz);
 #endif
+    }
+    return p;
 }
 
 /*
@@ -75,6 +94,7 @@ static NPY_INLINE void
 _npy_free_cache(void * p, npy_uintp nelem, npy_uint msz,
                 cache_bucket * cache, void (*dealloc)(void *))
 {
+    assert(NPY_CHECK_GIL_HELD());
     if (p != NULL && nelem < msz) {
         if (cache[nelem].available < NCACHE) {
             cache[nelem].ptrs[cache[nelem].available++] = p;
diff --git a/numpy/core/src/multiarray/array_assign_array.c b/numpy/core/src/multiarray/array_assign_array.c
index 74fbb88c2..f692e0307 100644
--- a/numpy/core/src/multiarray/array_assign_array.c
+++ b/numpy/core/src/multiarray/array_assign_array.c
@@ -49,10 +49,10 @@ raw_array_assign_array(int ndim, npy_intp *shape,
     NPY_BEGIN_THREADS_DEF;
 
     /* Check alignment */
-    aligned = raw_array_is_aligned(ndim,
-                        dst_data, dst_strides, dst_dtype->alignment) &&
-              raw_array_is_aligned(ndim,
-                        src_data, src_strides, src_dtype->alignment);
+    aligned = raw_array_is_aligned(ndim, shape, dst_data, dst_strides,
+                                   npy_uint_alignment(dst_dtype->elsize)) &&
+              raw_array_is_aligned(ndim, shape, src_data, src_strides,
+                                   npy_uint_alignment(src_dtype->elsize));
 
     /* Use raw iteration with no heap allocation */
     if (PyArray_PrepareTwoRawArrayIter(
@@ -134,10 +134,10 @@ raw_array_wheremasked_assign_array(int ndim, npy_intp *shape,
     NPY_BEGIN_THREADS_DEF;
 
     /* Check alignment */
-    aligned = raw_array_is_aligned(ndim,
-                        dst_data, dst_strides, dst_dtype->alignment) &&
-              raw_array_is_aligned(ndim,
-                        src_data, src_strides, src_dtype->alignment);
+    aligned = raw_array_is_aligned(ndim, shape, dst_data, dst_strides,
+                                   npy_uint_alignment(dst_dtype->elsize)) &&
+              raw_array_is_aligned(ndim, shape, src_data, src_strides,
+                                   npy_uint_alignment(src_dtype->elsize));
 
     /* Use raw iteration with no heap allocation */
     if (PyArray_PrepareThreeRawArrayIter(
diff --git a/numpy/core/src/multiarray/array_assign_scalar.c b/numpy/core/src/multiarray/array_assign_scalar.c
index 17de99cb9..841a41850 100644
--- a/numpy/core/src/multiarray/array_assign_scalar.c
+++ b/numpy/core/src/multiarray/array_assign_scalar.c
@@ -46,11 +46,9 @@ raw_array_assign_scalar(int ndim, npy_intp *shape,
     NPY_BEGIN_THREADS_DEF;
 
     /* Check alignment */
-    aligned = raw_array_is_aligned(ndim, dst_data, dst_strides,
-                                    dst_dtype->alignment);
-    if (!npy_is_aligned(src_data, src_dtype->alignment)) {
-        aligned = 0;
-    }
+    aligned = raw_array_is_aligned(ndim, shape, dst_data, dst_strides,
+                                   npy_uint_alignment(dst_dtype->elsize)) &&
+              npy_is_aligned(src_data, npy_uint_alignment(src_dtype->elsize));
 
     /* Use raw iteration with no heap allocation */
     if (PyArray_PrepareOneRawArrayIter(
@@ -119,11 +117,9 @@ raw_array_wheremasked_assign_scalar(int ndim, npy_intp *shape,
     NPY_BEGIN_THREADS_DEF;
 
     /* Check alignment */
-    aligned = raw_array_is_aligned(ndim, dst_data, dst_strides,
-                                    dst_dtype->alignment);
-    if (!npy_is_aligned(src_data, src_dtype->alignment)) {
-        aligned = 0;
-    }
+    aligned = raw_array_is_aligned(ndim, shape, dst_data, dst_strides,
+                                   npy_uint_alignment(dst_dtype->elsize)) &&
+              npy_is_aligned(src_data, npy_uint_alignment(src_dtype->elsize));
 
     /* Use raw iteration with no heap allocation */
     if (PyArray_PrepareTwoRawArrayIter(
@@ -224,7 +220,7 @@ PyArray_AssignRawScalar(PyArrayObject *dst,
      * we also skip this if 'dst' has an object dtype.
      */
     if ((!PyArray_EquivTypes(PyArray_DESCR(dst), src_dtype) ||
-                !npy_is_aligned(src_data, src_dtype->alignment)) &&
+            !npy_is_aligned(src_data, npy_uint_alignment(src_dtype->elsize))) &&
                     PyArray_SIZE(dst) > 1 &&
                     !PyDataType_REFCHK(PyArray_DESCR(dst))) {
         char *tmp_src_data;
diff --git a/numpy/core/src/multiarray/arrayobject.c b/numpy/core/src/multiarray/arrayobject.c
index 368f5ded7..341682588 100644
--- a/numpy/core/src/multiarray/arrayobject.c
+++ b/numpy/core/src/multiarray/arrayobject.c
@@ -1218,37 +1218,6 @@ _void_compare(PyArrayObject *self, PyArrayObject *other, int cmp_op)
     }
 }
 
-/* This is a copy of _PyErr_ChainExceptions, with:
- *  - a minimal implementation for python 2
- *  - __cause__ used instead of __context__
- */
-NPY_NO_EXPORT void
-PyArray_ChainExceptionsCause(PyObject *exc, PyObject *val, PyObject *tb)
-{
-    if (exc == NULL)
-        return;
-
-    if (PyErr_Occurred()) {
-        /* only py3 supports this anyway */
-        #ifdef NPY_PY3K
-            PyObject *exc2, *val2, *tb2;
-            PyErr_Fetch(&exc2, &val2, &tb2);
-            PyErr_NormalizeException(&exc, &val, &tb);
-            if (tb != NULL) {
-                PyException_SetTraceback(val, tb);
-                Py_DECREF(tb);
-            }
-            Py_DECREF(exc);
-            PyErr_NormalizeException(&exc2, &val2, &tb2);
-            PyException_SetCause(val2, val);
-            PyErr_Restore(exc2, val2, tb2);
-        #endif
-    }
-    else {
-        PyErr_Restore(exc, val, tb);
-    }
-}
-
 /*
  * Silence the current error and emit a deprecation warning instead.
  *
@@ -1260,7 +1229,7 @@ DEPRECATE_silence_error(const char *msg) {
     PyObject *exc, *val, *tb;
     PyErr_Fetch(&exc, &val, &tb);
     if (DEPRECATE(msg) < 0) {
-        PyArray_ChainExceptionsCause(exc, val, tb);
+        npy_PyErr_ChainExceptionsCause(exc, val, tb);
         return -1;
     }
     Py_XDECREF(exc);
@@ -1377,7 +1346,7 @@ fail:
     /*
      * Reraise the original exception, possibly chaining with a new one.
      */
-    PyArray_ChainExceptionsCause(exc, val, tb);
+    npy_PyErr_ChainExceptionsCause(exc, val, tb);
     return NULL;
 }
 
diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src
index d622effe6..46a3ffb3d 100644
--- a/numpy/core/src/multiarray/arraytypes.c.src
+++ b/numpy/core/src/multiarray/arraytypes.c.src
@@ -150,32 +150,6 @@ MyPyLong_AsUnsigned@Type@ (PyObject *obj)
 
 /**end repeat**/
 
-static npy_longlong
-npy_strtoll(const char *str, char **endptr, int base)
-{
-#if defined HAVE_STRTOLL
-    return strtoll(str, endptr, base);
-#elif defined _MSC_VER
-    return _strtoi64(str, endptr, base);
-#else
-    /* ok on 64 bit posix */
-    return PyOS_strtol(str, endptr, base);
-#endif
-}
-
-static npy_ulonglong
-npy_strtoull(const char *str, char **endptr, int base)
-{
-#if defined HAVE_STRTOULL
-    return strtoull(str, endptr, base);
-#elif defined _MSC_VER
-    return _strtoui64(str, endptr, base);
-#else
-    /* ok on 64 bit posix */
-    return PyOS_strtoul(str, endptr, base);
-#endif
-}
-
 /*
  *****************************************************************************
  **                         GETITEM AND SETITEM                             **
@@ -1489,10 +1463,14 @@ OBJECT_to_@TOTYPE@(void *input, void *output, npy_intp n,
 
     for (i = 0; i < n; i++, ip++, op += skip) {
         if (*ip == NULL) {
-            @TOTYPE@_setitem(Py_False, op, aop);
+            if (@TOTYPE@_setitem(Py_False, op, aop) < 0) {
+                return;
+            }
         }
         else {
-            @TOTYPE@_setitem(*ip, op, aop);
+            if (@TOTYPE@_setitem(*ip, op, aop) < 0) {
+                return;
+            }
         }
     }
 }
@@ -1792,8 +1770,8 @@ BOOL_scan(FILE *fp, npy_bool *ip, void *NPY_UNUSED(ignore),
  * #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint,
  *         npy_long, npy_ulong, npy_longlong, npy_ulonglong,
  *         npy_datetime, npy_timedelta#
- * #func = (PyOS_strtol, PyOS_strtoul)*4, npy_strtoll, npy_strtoull,
- *         npy_strtoll*2#
+ * #func = (PyOS_strtol, PyOS_strtoul)*4, NumPyOS_strtoll, NumPyOS_strtoull,
+ *         NumPyOS_strtoll*2#
  * #btype = (npy_long, npy_ulong)*4, npy_longlong, npy_ulonglong,
  *          npy_longlong*2#
  */
@@ -4322,12 +4300,11 @@ static PyArray_Descr @from@_Descr = {
  *         cfloat, cdouble, clongdouble,
  *         object, datetime, timedelta#
  * #sort = 1*18, 0*1, 1*2#
- * #num = 1*15, 2*3, 1*3#
  * #fromtype = npy_bool,
  *             npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint,
  *             npy_long, npy_ulong, npy_longlong, npy_ulonglong,
  *             npy_half, npy_float, npy_double, npy_longdouble,
- *             npy_float, npy_double, npy_longdouble,
+ *             npy_cfloat, npy_cdouble, npy_clongdouble,
  *             PyObject *, npy_datetime, npy_timedelta#
  * #NAME = Bool,
  *         Byte, UByte, Short, UShort, Int, UInt,
@@ -4428,10 +4405,9 @@ NPY_NO_EXPORT PyArray_Descr @from@_Descr = {
     /* type_num */
     NPY_@from@,
     /* elsize */
-    @num@ * sizeof(@fromtype@),
+    sizeof(@fromtype@),
     /* alignment */
-    @num@ * _ALIGN(@fromtype@) > NPY_MAX_COPY_ALIGNMENT ?
-        NPY_MAX_COPY_ALIGNMENT : @num@ * _ALIGN(@fromtype@),
+    _ALIGN(@fromtype@),
     /* subarray */
     NULL,
     /* fields */
@@ -4786,13 +4762,10 @@ set_typeinfo(PyObject *dict)
      *         CFLOAT, CDOUBLE, CLONGDOUBLE#
      * #Name = Half, Float, Double, LongDouble,
      *         CFloat, CDouble, CLongDouble#
-     * #num  = 1, 1, 1, 1, 2, 2, 2#
      */
     s = PyArray_typeinfo(
         NPY_@name@LTR, NPY_@name@, NPY_BITSOF_@name@,
-        @num@ * _ALIGN(@type@) > NPY_MAX_COPY_ALIGNMENT ?
-            NPY_MAX_COPY_ALIGNMENT : @num@ * _ALIGN(@type@),
-        &Py@Name@ArrType_Type
+        _ALIGN(@type@), &Py@Name@ArrType_Type
     );
     if (s == NULL) {
         return -1;
diff --git a/numpy/core/src/multiarray/buffer.c b/numpy/core/src/multiarray/buffer.c
index c8e3da8bc..9a2750aea 100644
--- a/numpy/core/src/multiarray/buffer.c
+++ b/numpy/core/src/multiarray/buffer.c
@@ -134,7 +134,7 @@ static int
 _append_str(_tmp_string_t *s, char const *p)
 {
     for (; *p != '\0'; p++) {
-        if (_append_char(s, *p) != 0) {
+        if (_append_char(s, *p) < 0) {
             return -1;
         }
     }
@@ -142,6 +142,53 @@ _append_str(_tmp_string_t *s, char const *p)
 }
 
 /*
+ * Append a PEP3118-formatted field name, ":name:", to str
+ */
+static int
+_append_field_name(_tmp_string_t *str, PyObject *name)
+{
+    int ret = -1;
+    char *p;
+    Py_ssize_t len;
+    PyObject *tmp;
+#if defined(NPY_PY3K)
+    /* FIXME: XXX -- should it use UTF-8 here? */
+    tmp = PyUnicode_AsUTF8String(name);
+#else
+    tmp = name;
+    Py_INCREF(tmp);
+#endif
+    if (tmp == NULL || PyBytes_AsStringAndSize(tmp, &p, &len) < 0) {
+        PyErr_Clear();
+        PyErr_SetString(PyExc_ValueError, "invalid field name");
+        goto fail;
+    }
+    if (_append_char(str, ':') < 0) {
+        goto fail;
+    }
+    while (len > 0) {
+        if (*p == ':') {
+            PyErr_SetString(PyExc_ValueError,
+                            "':' is not an allowed character in buffer "
+                            "field names");
+            goto fail;
+        }
+        if (_append_char(str, *p) < 0) {
+            goto fail;
+        }
+        ++p;
+        --len;
+    }
+    if (_append_char(str, ':') < 0) {
+        goto fail;
+    }
+    ret = 0;
+fail:
+    Py_XDECREF(tmp);
+    return ret;
+}
+
+/*
  * Return non-zero if a type is aligned in each item in the given array,
  * AND, the descr element size is a multiple of the alignment,
  * AND, the array data is positioned to alignment granularity.
@@ -215,37 +262,49 @@ _buffer_format_string(PyArray_Descr *descr, _tmp_string_t *str,
             subarray_tuple = Py_BuildValue("(O)", descr->subarray->shape);
         }
 
-        _append_char(str, '(');
+        if (_append_char(str, '(') < 0) {
+            ret = -1;
+            goto subarray_fail;
+        }
         for (k = 0; k < PyTuple_GET_SIZE(subarray_tuple); ++k) {
             if (k > 0) {
-                _append_char(str, ',');
+                if (_append_char(str, ',') < 0) {
+                    ret = -1;
+                    goto subarray_fail;
+                }
             }
             item = PyTuple_GET_ITEM(subarray_tuple, k);
             dim_size = PyNumber_AsSsize_t(item, NULL);
 
             PyOS_snprintf(buf, sizeof(buf), "%ld", (long)dim_size);
-            _append_str(str, buf);
+            if (_append_str(str, buf) < 0) {
+                ret = -1;
+                goto subarray_fail;
+            }
             total_count *= dim_size;
         }
-        _append_char(str, ')');
-
-        Py_DECREF(subarray_tuple);
+        if (_append_char(str, ')') < 0) {
+            ret = -1;
+            goto subarray_fail;
+        }
 
         old_offset = *offset;
         ret = _buffer_format_string(descr->subarray->base, str, obj, offset,
                                     active_byteorder);
         *offset = old_offset + (*offset - old_offset) * total_count;
+
+    subarray_fail:
+        Py_DECREF(subarray_tuple);
         return ret;
     }
     else if (PyDataType_HASFIELDS(descr)) {
         Py_ssize_t base_offset = *offset;
 
-        _append_str(str, "T{");
+        if (_append_str(str, "T{") < 0) return -1;
         for (k = 0; k < PyTuple_GET_SIZE(descr->names); ++k) {
-            PyObject *name, *item, *offset_obj, *tmp;
+            PyObject *name, *item, *offset_obj;
             PyArray_Descr *child;
-            char *p;
-            Py_ssize_t len, new_offset;
+            Py_ssize_t new_offset;
             int ret;
 
             name = PyTuple_GET_ITEM(descr->names, k);
@@ -269,7 +328,7 @@ _buffer_format_string(PyArray_Descr *descr, _tmp_string_t *str,
                 return -1;
             }
             while (*offset < new_offset) {
-                _append_char(str, 'x');
+                if (_append_char(str, 'x') < 0) return -1;
                 ++*offset;
             }
 
@@ -281,36 +340,9 @@ _buffer_format_string(PyArray_Descr *descr, _tmp_string_t *str,
             }
 
             /* Insert field name */
-#if defined(NPY_PY3K)
-            /* FIXME: XXX -- should it use UTF-8 here? */
-            tmp = PyUnicode_AsUTF8String(name);
-#else
-            tmp = name;
-#endif
-            if (tmp == NULL || PyBytes_AsStringAndSize(tmp, &p, &len) < 0) {
-                PyErr_Clear();
-                PyErr_SetString(PyExc_ValueError, "invalid field name");
-                return -1;
-            }
-            _append_char(str, ':');
-            while (len > 0) {
-                if (*p == ':') {
-                    Py_DECREF(tmp);
-                    PyErr_SetString(PyExc_ValueError,
-                                    "':' is not an allowed character in buffer "
-                                    "field names");
-                    return -1;
-                }
-                _append_char(str, *p);
-                ++p;
-                --len;
-            }
-            _append_char(str, ':');
-#if defined(NPY_PY3K)
-            Py_DECREF(tmp);
-#endif
+            if (_append_field_name(str, name) < 0) return -1;
         }
-        _append_char(str, '}');
+        if (_append_char(str, '}') < 0) return -1;
     }
     else {
         int is_standard_size = 1;
@@ -338,7 +370,7 @@ _buffer_format_string(PyArray_Descr *descr, _tmp_string_t *str,
             /* Prefer native types, to cater for Cython */
             is_standard_size = 0;
             if (*active_byteorder != '@') {
-                _append_char(str, '@');
+                if (_append_char(str, '@') < 0) return -1;
                 *active_byteorder = '@';
             }
         }
@@ -346,7 +378,7 @@ _buffer_format_string(PyArray_Descr *descr, _tmp_string_t *str,
             /* Data types that have no standard size */
             is_standard_size = 0;
             if (*active_byteorder != '^') {
-                _append_char(str, '^');
+                if (_append_char(str, '^') < 0) return -1;
                 *active_byteorder = '^';
             }
         }
@@ -354,7 +386,7 @@ _buffer_format_string(PyArray_Descr *descr, _tmp_string_t *str,
                  descr->byteorder == '=') {
             is_standard_size = 1;
             if (*active_byteorder != descr->byteorder) {
-                _append_char(str, descr->byteorder);
+                if (_append_char(str, descr->byteorder) < 0) return -1;
                 *active_byteorder = descr->byteorder;
             }
 
@@ -372,45 +404,45 @@ _buffer_format_string(PyArray_Descr *descr, _tmp_string_t *str,
         }
 
         switch (descr->type_num) {
-        case NPY_BOOL:         if (_append_char(str, '?')) return -1; break;
-        case NPY_BYTE:         if (_append_char(str, 'b')) return -1; break;
-        case NPY_UBYTE:        if (_append_char(str, 'B')) return -1; break;
-        case NPY_SHORT:        if (_append_char(str, 'h')) return -1; break;
-        case NPY_USHORT:       if (_append_char(str, 'H')) return -1; break;
-        case NPY_INT:          if (_append_char(str, 'i')) return -1; break;
-        case NPY_UINT:         if (_append_char(str, 'I')) return -1; break;
+        case NPY_BOOL:         if (_append_char(str, '?') < 0) return -1; break;
+        case NPY_BYTE:         if (_append_char(str, 'b') < 0) return -1; break;
+        case NPY_UBYTE:        if (_append_char(str, 'B') < 0) return -1; break;
+        case NPY_SHORT:        if (_append_char(str, 'h') < 0) return -1; break;
+        case NPY_USHORT:       if (_append_char(str, 'H') < 0) return -1; break;
+        case NPY_INT:          if (_append_char(str, 'i') < 0) return -1; break;
+        case NPY_UINT:         if (_append_char(str, 'I') < 0) return -1; break;
         case NPY_LONG:
             if (is_standard_size && (NPY_SIZEOF_LONG == 8)) {
-                if (_append_char(str, 'q')) return -1;
+                if (_append_char(str, 'q') < 0) return -1;
             }
             else {
-                if (_append_char(str, 'l')) return -1;
+                if (_append_char(str, 'l') < 0) return -1;
             }
             break;
         case NPY_ULONG:
             if (is_standard_size && (NPY_SIZEOF_LONG == 8)) {
-                if (_append_char(str, 'Q')) return -1;
+                if (_append_char(str, 'Q') < 0) return -1;
             }
             else {
-                if (_append_char(str, 'L')) return -1;
+                if (_append_char(str, 'L') < 0) return -1;
             }
             break;
-        case NPY_LONGLONG:     if (_append_char(str, 'q')) return -1; break;
-        case NPY_ULONGLONG:    if (_append_char(str, 'Q')) return -1; break;
-        case NPY_HALF:         if (_append_char(str, 'e')) return -1; break;
-        case NPY_FLOAT:        if (_append_char(str, 'f')) return -1; break;
-        case NPY_DOUBLE:       if (_append_char(str, 'd')) return -1; break;
-        case NPY_LONGDOUBLE:   if (_append_char(str, 'g')) return -1; break;
-        case NPY_CFLOAT:       if (_append_str(str, "Zf")) return -1; break;
-        case NPY_CDOUBLE:      if (_append_str(str, "Zd")) return -1; break;
-        case NPY_CLONGDOUBLE:  if (_append_str(str, "Zg")) return -1; break;
+        case NPY_LONGLONG:     if (_append_char(str, 'q') < 0) return -1; break;
+        case NPY_ULONGLONG:    if (_append_char(str, 'Q') < 0) return -1; break;
+        case NPY_HALF:         if (_append_char(str, 'e') < 0) return -1; break;
+        case NPY_FLOAT:        if (_append_char(str, 'f') < 0) return -1; break;
+        case NPY_DOUBLE:       if (_append_char(str, 'd') < 0) return -1; break;
+        case NPY_LONGDOUBLE:   if (_append_char(str, 'g') < 0) return -1; break;
+        case NPY_CFLOAT:       if (_append_str(str, "Zf") < 0) return -1; break;
+        case NPY_CDOUBLE:      if (_append_str(str, "Zd") < 0) return -1; break;
+        case NPY_CLONGDOUBLE:  if (_append_str(str, "Zg") < 0) return -1; break;
         /* XXX NPY_DATETIME */
         /* XXX NPY_TIMEDELTA */
-        case NPY_OBJECT:       if (_append_char(str, 'O')) return -1; break;
+        case NPY_OBJECT:       if (_append_char(str, 'O') < 0) return -1; break;
         case NPY_STRING: {
             char buf[128];
             PyOS_snprintf(buf, sizeof(buf), "%ds", descr->elsize);
-            if (_append_str(str, buf)) return -1;
+            if (_append_str(str, buf) < 0) return -1;
             break;
         }
         case NPY_UNICODE: {
@@ -418,14 +450,14 @@ _buffer_format_string(PyArray_Descr *descr, _tmp_string_t *str,
             char buf[128];
             assert(descr->elsize % 4 == 0);
             PyOS_snprintf(buf, sizeof(buf), "%dw", descr->elsize / 4);
-            if (_append_str(str, buf)) return -1;
+            if (_append_str(str, buf) < 0) return -1;
             break;
         }
         case NPY_VOID: {
             /* Insert padding bytes */
             char buf[128];
             PyOS_snprintf(buf, sizeof(buf), "%dx", descr->elsize);
-            if (_append_str(str, buf)) return -1;
+            if (_append_str(str, buf) < 0) return -1;
             break;
         }
         default:
@@ -491,8 +523,12 @@ _buffer_info_new(PyObject *obj)
          * fields will not hit this code path and are currently unsupported
          * in _buffer_format_string.
          */
-        _append_char(&fmt, 'B');
-        _append_char(&fmt, '\0');
+        if (_append_char(&fmt, 'B') < 0) {
+            goto fail;
+        }
+        if (_append_char(&fmt, '\0') < 0) {
+            goto fail;
+        }
         info->ndim = 1;
         info->shape = malloc(sizeof(Py_ssize_t) * 2);
         if (info->shape == NULL) {
@@ -543,15 +579,17 @@ _buffer_info_new(PyObject *obj)
     err = _buffer_format_string(descr, &fmt, obj, NULL, NULL);
     Py_DECREF(descr);
     if (err != 0) {
-        free(fmt.s);
         goto fail;
     }
-    _append_char(&fmt, '\0');
+    if (_append_char(&fmt, '\0') < 0) {
+        goto fail;
+    }
     info->format = fmt.s;
 
     return info;
 
 fail:
+    free(fmt.s);
     free(info);
     return NULL;
 }
@@ -989,8 +1027,11 @@ _descriptor_from_pep3118_format(char *s)
     Py_DECREF(str);
     Py_DECREF(_numpy_internal);
     if (descr == NULL) {
+        PyObject *exc, *val, *tb;
+        PyErr_Fetch(&exc, &val, &tb);
         PyErr_Format(PyExc_ValueError,
                      "'%s' is not a valid PEP 3118 buffer format string", buf);
+        npy_PyErr_ChainExceptionsCause(exc, val, tb);
         free(buf);
         return NULL;
     }
diff --git a/numpy/core/src/multiarray/common.c b/numpy/core/src/multiarray/common.c
index 4f695fdc7..5b4611e8a 100644
--- a/numpy/core/src/multiarray/common.c
+++ b/numpy/core/src/multiarray/common.c
@@ -587,50 +587,6 @@ _zerofill(PyArrayObject *ret)
     return 0;
 }
 
-NPY_NO_EXPORT int
-_IsAligned(PyArrayObject *ap)
-{
-    int i;
-    npy_uintp aligned;
-    npy_uintp alignment = PyArray_DESCR(ap)->alignment;
-
-    /* alignment 1 types should have a efficient alignment for copy loops */
-    if (PyArray_ISFLEXIBLE(ap) || PyArray_ISSTRING(ap)) {
-        npy_intp itemsize = PyArray_ITEMSIZE(ap);
-        /* power of two sizes may be loaded in larger moves */
-        if (((itemsize & (itemsize - 1)) == 0)) {
-            alignment = itemsize > NPY_MAX_COPY_ALIGNMENT ?
-                NPY_MAX_COPY_ALIGNMENT : itemsize;
-        }
-        else {
-            /* if not power of two it will be accessed bytewise */
-            alignment = 1;
-        }
-    }
-
-    if (alignment == 1) {
-        return 1;
-    }
-    aligned = (npy_uintp)PyArray_DATA(ap);
-
-    for (i = 0; i < PyArray_NDIM(ap); i++) {
-#if NPY_RELAXED_STRIDES_CHECKING
-        /* skip dim == 1 as it is not required to have stride 0 */
-        if (PyArray_DIM(ap, i) > 1) {
-            /* if shape[i] == 1, the stride is never used */
-            aligned |= (npy_uintp)PyArray_STRIDES(ap)[i];
-        }
-        else if (PyArray_DIM(ap, i) == 0) {
-            /* an array with zero elements is always aligned */
-            return 1;
-        }
-#else /* not NPY_RELAXED_STRIDES_CHECKING */
-        aligned |= (npy_uintp)PyArray_STRIDES(ap)[i];
-#endif /* not NPY_RELAXED_STRIDES_CHECKING */
-    }
-    return npy_is_aligned((void *)aligned, alignment);
-}
-
 NPY_NO_EXPORT npy_bool
 _IsWriteable(PyArrayObject *ap)
 {
diff --git a/numpy/core/src/multiarray/common.h b/numpy/core/src/multiarray/common.h
index db0a49920..2b8d3d3a4 100644
--- a/numpy/core/src/multiarray/common.h
+++ b/numpy/core/src/multiarray/common.h
@@ -1,5 +1,6 @@
 #ifndef _NPY_PRIVATE_COMMON_H_
 #define _NPY_PRIVATE_COMMON_H_
+#include "structmember.h"
 #include <numpy/npy_common.h>
 #include <numpy/npy_cpu.h>
 #include <numpy/ndarraytypes.h>
@@ -56,9 +57,6 @@ index2ptr(PyArrayObject *mp, npy_intp i);
 NPY_NO_EXPORT int
 _zerofill(PyArrayObject *ret);
 
-NPY_NO_EXPORT int
-_IsAligned(PyArrayObject *ap);
-
 NPY_NO_EXPORT npy_bool
 _IsWriteable(PyArrayObject *ap);
 
@@ -182,6 +180,15 @@ check_and_adjust_axis(int *axis, int ndim)
     return check_and_adjust_axis_msg(axis, ndim, Py_None);
 }
 
+/* used for some alignment checks */
+#define _ALIGN(type) offsetof(struct {char c; type v;}, v)
+/*
+ * Disable harmless compiler warning "4116: unnamed type definition in
+ * parentheses" which is caused by the _ALIGN macro.
+ */
+#if defined(_MSC_VER)
+#pragma warning(disable:4116)
+#endif
 
 /*
  * return true if pointer is aligned to 'alignment'
@@ -190,15 +197,44 @@ static NPY_INLINE int
 npy_is_aligned(const void * p, const npy_uintp alignment)
 {
     /*
-     * alignment is usually a power of two
-     * the test is faster than a direct modulo
+     * Assumes alignment is a power of two, as required by the C standard.
+     * Assumes cast from pointer to uintp gives a sensible representation we
+     * can use bitwise & on (not required by C standard, but used by glibc).
+     * This test is faster than a direct modulo.
      */
-    if (NPY_LIKELY((alignment & (alignment - 1)) == 0)) {
-        return ((npy_uintp)(p) & ((alignment) - 1)) == 0;
-    }
-    else {
-        return ((npy_uintp)(p) % alignment) == 0;
+    return ((npy_uintp)(p) & ((alignment) - 1)) == 0;
+}
+
+/* Get equivalent "uint" alignment given an itemsize, for use in copy code */
+static NPY_INLINE int
+npy_uint_alignment(int itemsize)
+{
+    npy_uintp alignment = 0; /* return value of 0 means unaligned */
+
+    switch(itemsize){
+        case 1:
+            return 1;
+        case 2:
+            alignment = _ALIGN(npy_uint16);
+            break;
+        case 4:
+            alignment = _ALIGN(npy_uint32);
+            break;
+        case 8:
+            alignment = _ALIGN(npy_uint64);
+            break;
+        case 16:
+            /*
+             * 16 byte types are copied using 2 uint64 assignments.
+             * See the strided copy function in lowlevel_strided_loops.c.
+             */
+            alignment = _ALIGN(npy_uint64);
+            break;
+        default:
+            break;
     }
+    
+    return alignment;
 }
 
 /*
diff --git a/numpy/core/src/multiarray/compiled_base.c b/numpy/core/src/multiarray/compiled_base.c
index 1c27f8394..e8380e3bc 100644
--- a/numpy/core/src/multiarray/compiled_base.c
+++ b/numpy/core/src/multiarray/compiled_base.c
@@ -1156,7 +1156,27 @@ arr_unravel_index(PyObject *self, PyObject *args, PyObject *kwds)
     int i, ret_ndim;
     npy_intp ret_dims[NPY_MAXDIMS], ret_strides[NPY_MAXDIMS];
 
-    char *kwlist[] = {"indices", "dims", "order", NULL};
+    char *kwlist[] = {"indices", "shape", "order", NULL};
+
+    /* Continue to support the older "dims" argument in place
+     * of the "shape" argument. Issue an appropriate warning
+     * if "dims" is detected in keywords, then replace it with
+     * the new "shape" argument and continue processing as usual */
+
+
+    if (kwds) {
+        PyObject *dims_item, *shape_item;
+        dims_item = PyDict_GetItemString(kwds, "dims");
+        shape_item = PyDict_GetItemString(kwds, "shape");
+        if (dims_item != NULL && shape_item == NULL) {
+            if (DEPRECATE("'shape' argument should be"
+                          " used instead of 'dims'") < 0) {
+                return NULL;
+            }
+            PyDict_SetItemString(kwds, "shape", dims_item);
+            PyDict_DelItemString(kwds, "dims");
+        }
+    }
 
     if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&|O&:unravel_index",
                     kwlist,
diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c
index 0d79f294c..33a706412 100644
--- a/numpy/core/src/multiarray/convert_datatype.c
+++ b/numpy/core/src/multiarray/convert_datatype.c
@@ -149,11 +149,6 @@ PyArray_AdaptFlexibleDType(PyObject *data_obj, PyArray_Descr *data_dtype,
 {
     PyArray_DatetimeMetaData *meta;
     int flex_type_num;
-    PyArrayObject *arr = NULL;
-    PyArray_Descr *dtype = NULL;
-    int ndim = 0;
-    npy_intp dims[NPY_MAXDIMS];
-    int result;
 
     if (*flex_dtype == NULL) {
         if (!PyErr_Occurred()) {
@@ -168,7 +163,7 @@ PyArray_AdaptFlexibleDType(PyObject *data_obj, PyArray_Descr *data_dtype,
 
     /* Flexible types with expandable size */
     if (PyDataType_ISUNSIZED(*flex_dtype)) {
-        /* First replace the flex dtype */
+        /* First replace the flex_dtype */
         PyArray_DESCR_REPLACE(*flex_dtype);
         if (*flex_dtype == NULL) {
             return;
@@ -259,6 +254,11 @@ PyArray_AdaptFlexibleDType(PyObject *data_obj, PyArray_Descr *data_dtype,
                              * GetArrayParamsFromObject won't iterate over
                              * array.
                              */
+                            PyArray_Descr *dtype = NULL;
+                            PyArrayObject *arr = NULL;
+                            int result;
+                            int ndim = 0;
+                            npy_intp dims[NPY_MAXDIMS];
                             list = PyArray_ToList((PyArrayObject *)data_obj);
                             result = PyArray_GetArrayParamsFromObject(
                                     list,
@@ -273,6 +273,8 @@ PyArray_AdaptFlexibleDType(PyObject *data_obj, PyArray_Descr *data_dtype,
                                     size = dtype->elsize;
                                 }
                             }
+                            Py_XDECREF(dtype);
+                            Py_XDECREF(arr);
                             Py_DECREF(list);
                         }
                         else if (PyArray_IsPythonScalar(data_obj)) {
@@ -2026,7 +2028,6 @@ PyArray_ConvertToCommonType(PyObject *op, int *retn)
 {
     int i, n, allscalars = 0;
     PyArrayObject **mps = NULL;
-    PyObject *otmp;
     PyArray_Descr *intype = NULL, *stype = NULL;
     PyArray_Descr *newtype = NULL;
     NPY_SCALARKIND scalarkind = NPY_NOSCALAR, intypekind = NPY_NOSCALAR;
@@ -2065,9 +2066,13 @@ PyArray_ConvertToCommonType(PyObject *op, int *retn)
     }
 
     for (i = 0; i < n; i++) {
-        otmp = PySequence_GetItem(op, i);
+        PyObject *otmp = PySequence_GetItem(op, i);
+        if (otmp == NULL) {
+            goto fail;
+        }
         if (!PyArray_CheckAnyScalar(otmp)) {
             newtype = PyArray_DescrFromObject(otmp, intype);
+            Py_DECREF(otmp);
             Py_XDECREF(intype);
             if (newtype == NULL) {
                 goto fail;
@@ -2077,6 +2082,7 @@ PyArray_ConvertToCommonType(PyObject *op, int *retn)
         }
         else {
             newtype = PyArray_DescrFromObject(otmp, stype);
+            Py_DECREF(otmp);
             Py_XDECREF(stype);
             if (newtype == NULL) {
                 goto fail;
@@ -2086,7 +2092,6 @@ PyArray_ConvertToCommonType(PyObject *op, int *retn)
             mps[i] = (PyArrayObject *)Py_None;
             Py_INCREF(Py_None);
         }
-        Py_XDECREF(otmp);
     }
     if (intype == NULL) {
         /* all scalars */
@@ -2110,6 +2115,9 @@ PyArray_ConvertToCommonType(PyObject *op, int *retn)
             newtype = PyArray_PromoteTypes(intype, stype);
             Py_XDECREF(intype);
             intype = newtype;
+            if (newtype == NULL) {
+                goto fail;
+            }
         }
         for (i = 0; i < n; i++) {
             Py_XDECREF(mps[i]);
@@ -2121,8 +2129,9 @@ PyArray_ConvertToCommonType(PyObject *op, int *retn)
     /* Make sure all arrays are actual array objects. */
     for (i = 0; i < n; i++) {
         int flags = NPY_ARRAY_CARRAY;
+        PyObject *otmp = PySequence_GetItem(op, i);
 
-        if ((otmp = PySequence_GetItem(op, i)) == NULL) {
+        if (otmp == NULL) {
             goto fail;
         }
         if (!allscalars && ((PyObject *)(mps[i]) == Py_None)) {
@@ -2131,8 +2140,8 @@ PyArray_ConvertToCommonType(PyObject *op, int *retn)
             Py_DECREF(Py_None);
         }
         Py_INCREF(intype);
-        mps[i] = (PyArrayObject*)
-            PyArray_FromAny(otmp, intype, 0, 0, flags, NULL);
+        mps[i] = (PyArrayObject*)PyArray_FromAny(otmp, intype, 0, 0,
+                                                 flags, NULL);
         Py_DECREF(otmp);
         if (mps[i] == NULL) {
             goto fail;
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index f1b8a0209..aaaaeee82 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -2832,7 +2832,7 @@ PyArray_CopyAsFlat(PyArrayObject *dst, PyArrayObject *src, NPY_ORDER order)
      * contiguous strides, etc.
      */
     if (PyArray_GetDTypeTransferFunction(
-                    PyArray_ISALIGNED(src) && PyArray_ISALIGNED(dst),
+                    IsUintAligned(src) && IsUintAligned(dst),
                     src_stride, dst_stride,
                     PyArray_DESCR(src), PyArray_DESCR(dst),
                     0,
diff --git a/numpy/core/src/multiarray/descriptor.c b/numpy/core/src/multiarray/descriptor.c
index 1d44cf8be..439980877 100644
--- a/numpy/core/src/multiarray/descriptor.c
+++ b/numpy/core/src/multiarray/descriptor.c
@@ -1439,6 +1439,12 @@ PyArray_DescrConverter(PyObject *obj, PyArray_Descr **at)
         PyObject *obj2;
         obj2 = PyUnicode_AsASCIIString(obj);
         if (obj2 == NULL) {
+            /* Convert the exception into a TypeError */
+            PyObject *err = PyErr_Occurred();
+            if (PyErr_GivenExceptionMatches(err, PyExc_UnicodeEncodeError)) {
+                PyErr_SetString(PyExc_TypeError,
+                        "data type not understood");
+            }
             return NPY_FAIL;
         }
         retval = PyArray_DescrConverter(obj2, at);
@@ -1853,72 +1859,17 @@ arraydescr_protocol_typestr_get(PyArray_Descr *self)
 }
 
 static PyObject *
-arraydescr_typename_get(PyArray_Descr *self)
+arraydescr_name_get(PyArray_Descr *self)
 {
-    static const char np_prefix[] = "numpy.";
-    const int np_prefix_len = sizeof(np_prefix) - 1;
-    PyTypeObject *typeobj = self->typeobj;
+    /* let python handle this */
+    PyObject *_numpy_dtype;
     PyObject *res;
-    char *s;
-    int len;
-    int prefix_len;
-    int suffix_len;
-
-    if (PyTypeNum_ISUSERDEF(self->type_num)) {
-        s = strrchr(typeobj->tp_name, '.');
-        if (s == NULL) {
-            res = PyUString_FromString(typeobj->tp_name);
-        }
-        else {
-            res = PyUString_FromStringAndSize(s + 1, strlen(s) - 1);
-        }
-        return res;
-    }
-    else {
-        /*
-         * NumPy type or subclass
-         *
-         * res is derived from typeobj->tp_name with the following rules:
-         * - if starts with "numpy.", that prefix is removed
-         * - if ends with "_", that suffix is removed
-         */
-        len = strlen(typeobj->tp_name);
-
-        if (! strncmp(typeobj->tp_name, np_prefix, np_prefix_len)) {
-            prefix_len = np_prefix_len;
-        }
-        else {
-            prefix_len = 0;
-        }
-
-        if (typeobj->tp_name[len - 1] == '_') {
-            suffix_len = 1;
-        }
-        else {
-            suffix_len = 0;
-        }
-
-        len -= prefix_len;
-        len -= suffix_len;
-        res = PyUString_FromStringAndSize(typeobj->tp_name+prefix_len, len);
-    }
-    if (PyTypeNum_ISFLEXIBLE(self->type_num) && !PyDataType_ISUNSIZED(self)) {
-        PyObject *p;
-        p = PyUString_FromFormat("%d", self->elsize * 8);
-        PyUString_ConcatAndDel(&res, p);
-    }
-    if (PyDataType_ISDATETIME(self)) {
-        PyArray_DatetimeMetaData *meta;
-
-        meta = get_datetime_metadata_from_dtype(self);
-        if (meta == NULL) {
-            Py_DECREF(res);
-            return NULL;
-        }
-
-        res = append_metastr_to_string(meta, 0, res);
+    _numpy_dtype = PyImport_ImportModule("numpy.core._dtype");
+    if (_numpy_dtype == NULL) {
+        return NULL;
     }
-
+    res = PyObject_CallMethod(_numpy_dtype, "_name_get", "O", self);
+    Py_DECREF(_numpy_dtype);
     return res;
 }
 
@@ -2212,7 +2163,7 @@ static PyGetSetDef arraydescr_getsets[] = {
         (getter)arraydescr_protocol_typestr_get,
         NULL, NULL, NULL},
     {"name",
-        (getter)arraydescr_typename_get,
+        (getter)arraydescr_name_get,
         NULL, NULL, NULL},
     {"base",
         (getter)arraydescr_base_get,
@@ -3166,462 +3117,36 @@ is_dtype_struct_simple_unaligned_layout(PyArray_Descr *dtype)
 }
 
 /*
- * Returns a string representation of a structured array,
- * in a list format.
+ * The general dtype repr function.
  */
 static PyObject *
-arraydescr_struct_list_str(PyArray_Descr *dtype)
+arraydescr_repr(PyArray_Descr *dtype)
 {
-    PyObject *names, *key, *fields, *ret, *tmp, *tup, *title;
-    Py_ssize_t i, names_size;
-    PyArray_Descr *fld_dtype;
-    int fld_offset;
-
-    names = dtype->names;
-    names_size = PyTuple_GET_SIZE(names);
-    fields = dtype->fields;
-
-    /* Build up a string to make the list */
-
-    /* Go through all the names */
-    ret = PyUString_FromString("[");
-    for (i = 0; i < names_size; ++i) {
-        key = PyTuple_GET_ITEM(names, i);
-        tup = PyDict_GetItem(fields, key);
-        if (tup == NULL) {
-            return 0;
-        }
-        title = NULL;
-        if (!PyArg_ParseTuple(tup, "Oi|O", &fld_dtype, &fld_offset, &title)) {
-            PyErr_Clear();
-            return 0;
-        }
-        PyUString_ConcatAndDel(&ret, PyUString_FromString("("));
-        /* Check for whether to do titles as well */
-        if (title != NULL && title != Py_None) {
-            PyUString_ConcatAndDel(&ret, PyUString_FromString("("));
-            PyUString_ConcatAndDel(&ret, PyObject_Repr(title));
-            PyUString_ConcatAndDel(&ret, PyUString_FromString(", "));
-            PyUString_ConcatAndDel(&ret, PyObject_Repr(key));
-            PyUString_ConcatAndDel(&ret, PyUString_FromString("), "));
-        }
-        else {
-            PyUString_ConcatAndDel(&ret, PyObject_Repr(key));
-            PyUString_ConcatAndDel(&ret, PyUString_FromString(", "));
-        }
-        /* Special case subarray handling here */
-        if (PyDataType_HASSUBARRAY(fld_dtype)) {
-            tmp = arraydescr_construction_repr(
-                            fld_dtype->subarray->base, 0, 1);
-            PyUString_ConcatAndDel(&ret, tmp);
-            PyUString_ConcatAndDel(&ret, PyUString_FromString(", "));
-            PyUString_ConcatAndDel(&ret,
-                            PyObject_Str(fld_dtype->subarray->shape));
-        }
-        else {
-            tmp = arraydescr_construction_repr(fld_dtype, 0, 1);
-            PyUString_ConcatAndDel(&ret, tmp);
-        }
-        PyUString_ConcatAndDel(&ret, PyUString_FromString(")"));
-        if (i != names_size - 1) {
-            PyUString_ConcatAndDel(&ret, PyUString_FromString(", "));
-        }
+    PyObject *_numpy_dtype;
+    PyObject *res;
+    _numpy_dtype = PyImport_ImportModule("numpy.core._dtype");
+    if (_numpy_dtype == NULL) {
+        return NULL;
     }
-    PyUString_ConcatAndDel(&ret, PyUString_FromString("]"));
-
-    return ret;
+    res = PyObject_CallMethod(_numpy_dtype, "__repr__", "O", dtype);
+    Py_DECREF(_numpy_dtype);
+    return res;
 }
-
 /*
- * Returns a string representation of a structured array,
- * in a dict format.
+ * The general dtype str function.
  */
 static PyObject *
-arraydescr_struct_dict_str(PyArray_Descr *dtype, int includealignedflag)
-{
-    PyObject *names, *key, *fields, *ret, *tmp, *tup, *title;
-    Py_ssize_t i, names_size;
-    PyArray_Descr *fld_dtype;
-    int fld_offset, has_titles;
-
-    names = dtype->names;
-    names_size = PyTuple_GET_SIZE(names);
-    fields = dtype->fields;
-    has_titles = 0;
-
-    /* Build up a string to make the dictionary */
-
-    /* First, the names */
-    ret = PyUString_FromString("{'names':[");
-    for (i = 0; i < names_size; ++i) {
-        key = PyTuple_GET_ITEM(names, i);
-        PyUString_ConcatAndDel(&ret, PyObject_Repr(key));
-        if (i != names_size - 1) {
-            PyUString_ConcatAndDel(&ret, PyUString_FromString(","));
-        }
-    }
-    /* Second, the formats */
-    PyUString_ConcatAndDel(&ret, PyUString_FromString("], 'formats':["));
-    for (i = 0; i < names_size; ++i) {
-        key = PyTuple_GET_ITEM(names, i);
-        tup = PyDict_GetItem(fields, key);
-        if (tup == NULL) {
-            return 0;
-        }
-        title = NULL;
-        if (!PyArg_ParseTuple(tup, "Oi|O", &fld_dtype, &fld_offset, &title)) {
-            PyErr_Clear();
-            return 0;
-        }
-        /* Check for whether to do titles as well */
-        if (title != NULL && title != Py_None) {
-            has_titles = 1;
-        }
-        tmp = arraydescr_construction_repr(fld_dtype, 0, 1);
-        PyUString_ConcatAndDel(&ret, tmp);
-        if (i != names_size - 1) {
-            PyUString_ConcatAndDel(&ret, PyUString_FromString(","));
-        }
-    }
-    /* Third, the offsets */
-    PyUString_ConcatAndDel(&ret, PyUString_FromString("], 'offsets':["));
-    for (i = 0; i < names_size; ++i) {
-        key = PyTuple_GET_ITEM(names, i);
-        tup = PyDict_GetItem(fields, key);
-        if (tup == NULL) {
-            return 0;
-        }
-        if (!PyArg_ParseTuple(tup, "Oi|O", &fld_dtype, &fld_offset, &title)) {
-            PyErr_Clear();
-            return 0;
-        }
-        PyUString_ConcatAndDel(&ret, PyUString_FromFormat("%d", fld_offset));
-        if (i != names_size - 1) {
-            PyUString_ConcatAndDel(&ret, PyUString_FromString(","));
-        }
-    }
-    /* Fourth, the titles */
-    if (has_titles) {
-        PyUString_ConcatAndDel(&ret, PyUString_FromString("], 'titles':["));
-        for (i = 0; i < names_size; ++i) {
-            key = PyTuple_GET_ITEM(names, i);
-            tup = PyDict_GetItem(fields, key);
-            if (tup == NULL) {
-                return 0;
-            }
-            title = Py_None;
-            if (!PyArg_ParseTuple(tup, "Oi|O", &fld_dtype,
-                                            &fld_offset, &title)) {
-                PyErr_Clear();
-                return 0;
-            }
-            PyUString_ConcatAndDel(&ret, PyObject_Repr(title));
-            if (i != names_size - 1) {
-                PyUString_ConcatAndDel(&ret, PyUString_FromString(","));
-            }
-        }
-    }
-    if (includealignedflag && (dtype->flags&NPY_ALIGNED_STRUCT)) {
-        /* Finally, the itemsize/itemsize and aligned flag */
-        PyUString_ConcatAndDel(&ret,
-                PyUString_FromFormat("], 'itemsize':%d, 'aligned':True}",
-                        (int)dtype->elsize));
-    }
-    else {
-        /* Finally, the itemsize/itemsize*/
-        PyUString_ConcatAndDel(&ret,
-                PyUString_FromFormat("], 'itemsize':%d}", (int)dtype->elsize));
-    }
-
-    return ret;
-}
-
-/* Produces a string representation for a structured dtype */
-static PyObject *
-arraydescr_struct_str(PyArray_Descr *dtype, int includealignflag)
-{
-    PyObject *sub;
-
-    /*
-     * The list str representation can't include the 'align=' flag,
-     * so if it is requested and the struct has the aligned flag set,
-     * we must use the dict str instead.
-     */
-    if (!(includealignflag && (dtype->flags&NPY_ALIGNED_STRUCT)) &&
-                        is_dtype_struct_simple_unaligned_layout(dtype)) {
-        sub = arraydescr_struct_list_str(dtype);
-    }
-    else {
-        sub = arraydescr_struct_dict_str(dtype, includealignflag);
-    }
-
-    /* If the data type isn't the default, void, show it */
-    if (dtype->typeobj != &PyVoidArrType_Type) {
-        /*
-         * Note: We cannot get the type name from dtype->typeobj->tp_name
-         * because its value depends on whether the type is dynamically or
-         * statically allocated.  Instead use __name__ and __module__.
-         * See https://docs.python.org/2/c-api/typeobj.html.
-         */
-
-        PyObject *str_name, *namestr, *str_module, *modulestr, *ret;
-
-        str_name = PyUString_FromString("__name__");
-        namestr = PyObject_GetAttr((PyObject*)(dtype->typeobj), str_name);
-        Py_DECREF(str_name);
-
-        if (namestr == NULL) {
-            /* this should never happen since types always have __name__ */
-            PyErr_Format(PyExc_RuntimeError,
-                         "dtype does not have a __name__ attribute");
-            return NULL;
-        }
-
-        str_module = PyUString_FromString("__module__");
-        modulestr = PyObject_GetAttr((PyObject*)(dtype->typeobj), str_module);
-        Py_DECREF(str_module);
-
-        ret = PyUString_FromString("(");
-        if (modulestr != NULL) {
-            /* Note: if modulestr == NULL, the type is unpicklable */
-            PyUString_ConcatAndDel(&ret, modulestr);
-            PyUString_ConcatAndDel(&ret, PyUString_FromString("."));
-        }
-        PyUString_ConcatAndDel(&ret, namestr);
-        PyUString_ConcatAndDel(&ret, PyUString_FromString(", "));
-        PyUString_ConcatAndDel(&ret, sub);
-        PyUString_ConcatAndDel(&ret, PyUString_FromString(")"));
-        return ret;
-    }
-    else {
-        return sub;
-    }
-}
-
-/* Produces a string representation for a subarray dtype */
-static PyObject *
-arraydescr_subarray_str(PyArray_Descr *dtype)
-{
-    PyObject *p, *ret;
-
-    ret = PyUString_FromString("(");
-    p = arraydescr_construction_repr(dtype->subarray->base, 0, 1);
-    PyUString_ConcatAndDel(&ret, p);
-    PyUString_ConcatAndDel(&ret, PyUString_FromString(", "));
-    PyUString_ConcatAndDel(&ret, PyObject_Str(dtype->subarray->shape));
-    PyUString_ConcatAndDel(&ret, PyUString_FromString(")"));
-
-    return ret;
-}
-
-static PyObject *
 arraydescr_str(PyArray_Descr *dtype)
 {
-    PyObject *sub;
-
-    if (PyDataType_HASFIELDS(dtype)) {
-        sub = arraydescr_struct_str(dtype, 1);
-    }
-    else if (PyDataType_HASSUBARRAY(dtype)) {
-        sub = arraydescr_subarray_str(dtype);
-    }
-    else if (PyDataType_ISFLEXIBLE(dtype) || !PyArray_ISNBO(dtype->byteorder)) {
-        sub = arraydescr_protocol_typestr_get(dtype);
-    }
-    else {
-        sub = arraydescr_typename_get(dtype);
-    }
-    return sub;
-}
-
-/*
- * The dtype repr function specifically for structured arrays.
- */
-static PyObject *
-arraydescr_struct_repr(PyArray_Descr *dtype)
-{
-    PyObject *sub, *s;
-
-    s = PyUString_FromString("dtype(");
-    sub = arraydescr_struct_str(dtype, 0);
-    if (sub == NULL) {
+    PyObject *_numpy_dtype;
+    PyObject *res;
+    _numpy_dtype = PyImport_ImportModule("numpy.core._dtype");
+    if (_numpy_dtype == NULL) {
         return NULL;
     }
-
-    PyUString_ConcatAndDel(&s, sub);
-
-    /* If it's an aligned structure, add the align=True parameter */
-    if (dtype->flags&NPY_ALIGNED_STRUCT) {
-        PyUString_ConcatAndDel(&s, PyUString_FromString(", align=True"));
-    }
-
-    PyUString_ConcatAndDel(&s, PyUString_FromString(")"));
-    return s;
-}
-
-/* See descriptor.h for documentation */
-NPY_NO_EXPORT PyObject *
-arraydescr_construction_repr(PyArray_Descr *dtype, int includealignflag,
-                                int shortrepr)
-{
-    PyObject *ret;
-    PyArray_DatetimeMetaData *meta;
-    char byteorder[2];
-
-    if (PyDataType_HASFIELDS(dtype)) {
-        return arraydescr_struct_str(dtype, includealignflag);
-    }
-    else if (PyDataType_HASSUBARRAY(dtype)) {
-        return arraydescr_subarray_str(dtype);
-    }
-
-    /* Normalize byteorder to '<' or '>' */
-    switch (dtype->byteorder) {
-        case NPY_NATIVE:
-            byteorder[0] = NPY_NATBYTE;
-            break;
-        case NPY_SWAP:
-            byteorder[0] = NPY_OPPBYTE;
-            break;
-        case NPY_IGNORE:
-            byteorder[0] = '\0';
-            break;
-        default:
-            byteorder[0] = dtype->byteorder;
-            break;
-    }
-    byteorder[1] = '\0';
-
-    /* Handle booleans, numbers, and custom dtypes */
-    if (dtype->type_num == NPY_BOOL) {
-        if (shortrepr) {
-            return PyUString_FromString("'?'");
-        }
-        else {
-            return PyUString_FromString("'bool'");
-        }
-    }
-    else if (PyTypeNum_ISNUMBER(dtype->type_num)) {
-        /* Short repr with endianness, like '<f8' */
-        if (shortrepr || (dtype->byteorder != NPY_NATIVE &&
-                          dtype->byteorder != NPY_IGNORE)) {
-            return PyUString_FromFormat("'%s%c%d'", byteorder,
-                                        (int)dtype->kind, dtype->elsize);
-        }
-        /* Longer repr, like 'float64' */
-        else {
-            char *kindstr;
-            switch (dtype->kind) {
-                case 'u':
-                    kindstr = "uint";
-                    break;
-                case 'i':
-                    kindstr = "int";
-                    break;
-                case 'f':
-                    kindstr = "float";
-                    break;
-                case 'c':
-                    kindstr = "complex";
-                    break;
-                default:
-                    PyErr_Format(PyExc_RuntimeError,
-                            "internal dtype repr error, unknown kind '%c'",
-                            (int)dtype->kind);
-                    return NULL;
-            }
-            return PyUString_FromFormat("'%s%d'", kindstr, 8*dtype->elsize);
-        }
-    }
-    else if (PyTypeNum_ISUSERDEF(dtype->type_num)) {
-        char *s = strrchr(dtype->typeobj->tp_name, '.');
-        if (s == NULL) {
-            return PyUString_FromString(dtype->typeobj->tp_name);
-        }
-        else {
-            return PyUString_FromStringAndSize(s + 1, strlen(s) - 1);
-        }
-    }
-
-    /* All the rest which don't fit in the same pattern */
-    switch (dtype->type_num) {
-        /*
-         * The object reference may be different sizes on different
-         * platforms, so it should never include the itemsize here.
-         */
-        case NPY_OBJECT:
-            return PyUString_FromString("'O'");
-
-        case NPY_STRING:
-            if (PyDataType_ISUNSIZED(dtype)) {
-                return PyUString_FromString("'S'");
-            }
-            else {
-                return PyUString_FromFormat("'S%d'", (int)dtype->elsize);
-            }
-
-        case NPY_UNICODE:
-            if (PyDataType_ISUNSIZED(dtype)) {
-                return PyUString_FromFormat("'%sU'", byteorder);
-            }
-            else {
-                return PyUString_FromFormat("'%sU%d'", byteorder,
-                                                (int)dtype->elsize / 4);
-            }
-
-        case NPY_VOID:
-            if (PyDataType_ISUNSIZED(dtype)) {
-                return PyUString_FromString("'V'");
-            }
-            else {
-                return PyUString_FromFormat("'V%d'", (int)dtype->elsize);
-            }
-
-        case NPY_DATETIME:
-            meta = get_datetime_metadata_from_dtype(dtype);
-            if (meta == NULL) {
-                return NULL;
-            }
-            ret = PyUString_FromFormat("'%sM8", byteorder);
-            ret = append_metastr_to_string(meta, 0, ret);
-            PyUString_ConcatAndDel(&ret, PyUString_FromString("'"));
-            return ret;
-
-        case NPY_TIMEDELTA:
-            meta = get_datetime_metadata_from_dtype(dtype);
-            if (meta == NULL) {
-                return NULL;
-            }
-            ret = PyUString_FromFormat("'%sm8", byteorder);
-            ret = append_metastr_to_string(meta, 0, ret);
-            PyUString_ConcatAndDel(&ret, PyUString_FromString("'"));
-            return ret;
-
-        default:
-            PyErr_SetString(PyExc_RuntimeError, "Internal error: NumPy dtype "
-                            "unrecognized type number");
-            return NULL;
-    }
-}
-
-/*
- * The general dtype repr function.
- */
-static PyObject *
-arraydescr_repr(PyArray_Descr *dtype)
-{
-    PyObject *ret;
-
-    if (PyDataType_HASFIELDS(dtype)) {
-        return arraydescr_struct_repr(dtype);
-    }
-    else {
-        ret = PyUString_FromString("dtype(");
-        PyUString_ConcatAndDel(&ret,
-                            arraydescr_construction_repr(dtype, 1, 0));
-        PyUString_ConcatAndDel(&ret, PyUString_FromString(")"));
-        return ret;
-    }
+    res = PyObject_CallMethod(_numpy_dtype, "__str__", "O", dtype);
+    Py_DECREF(_numpy_dtype);
+    return res;
 }
 
 static PyObject *
@@ -3759,10 +3284,15 @@ _check_has_fields(PyArray_Descr *self)
 {
     if (!PyDataType_HASFIELDS(self)) {
         PyObject *astr = arraydescr_str(self);
+        if (astr == NULL) {
+            return -1;
+        }
 #if defined(NPY_PY3K)
-        PyObject *bstr = PyUnicode_AsUnicodeEscapeString(astr);
-        Py_DECREF(astr);
-        astr = bstr;
+        {
+            PyObject *bstr = PyUnicode_AsUnicodeEscapeString(astr);
+            Py_DECREF(astr);
+            astr = bstr;
+        }
 #endif
         PyErr_Format(PyExc_KeyError,
                 "There are no fields in dtype %s.", PyBytes_AsString(astr));
diff --git a/numpy/core/src/multiarray/descriptor.h b/numpy/core/src/multiarray/descriptor.h
index f95041195..5a3e4b15f 100644
--- a/numpy/core/src/multiarray/descriptor.h
+++ b/numpy/core/src/multiarray/descriptor.h
@@ -14,32 +14,6 @@ _arraydescr_fromobj(PyObject *obj);
 NPY_NO_EXPORT int
 is_dtype_struct_simple_unaligned_layout(PyArray_Descr *dtype);
 
-/*
- * Creates a string repr of the dtype, excluding the 'dtype()' part
- * surrounding the object. This object may be a string, a list, or
- * a dict depending on the nature of the dtype. This
- * is the object passed as the first parameter to the dtype
- * constructor, and if no additional constructor parameters are
- * given, will reproduce the exact memory layout.
- *
- * If 'shortrepr' is non-zero, this creates a shorter repr using
- * 'kind' and 'itemsize', instead of the longer type name.
- *
- * If 'includealignflag' is true, this includes the 'align=True' parameter
- * inside the struct dtype construction dict when needed. Use this flag
- * if you want a proper repr string without the 'dtype()' part around it.
- *
- * If 'includealignflag' is false, this does not preserve the
- * 'align=True' parameter or sticky NPY_ALIGNED_STRUCT flag for
- * struct arrays like the regular repr does, because the 'align'
- * flag is not part of first dtype constructor parameter. This
- * mode is intended for a full 'repr', where the 'align=True' is
- * provided as the second parameter.
- */
-NPY_NO_EXPORT PyObject *
-arraydescr_construction_repr(PyArray_Descr *dtype, int includealignflag,
-                                int shortrepr);
-
 extern NPY_NO_EXPORT char *_datetime_strings[];
 
 #endif
diff --git a/numpy/core/src/multiarray/dtype_transfer.c b/numpy/core/src/multiarray/dtype_transfer.c
index 2cb1e0a95..97d899ce0 100644
--- a/numpy/core/src/multiarray/dtype_transfer.c
+++ b/numpy/core/src/multiarray/dtype_transfer.c
@@ -2965,6 +2965,10 @@ static void _strided_masked_wrapper_decsrcref_transfer_function(
         dst += subloopsize * dst_stride;
         src += subloopsize * src_stride;
         N -= subloopsize;
+        if (N <= 0) {
+            break;
+        }
+
         /* Process unmasked values */
         mask = (npy_bool*)npy_memchr((char *)mask, 0, mask_stride, N,
                                      &subloopsize, 0);
@@ -3000,6 +3004,10 @@ static void _strided_masked_wrapper_transfer_function(
         dst += subloopsize * dst_stride;
         src += subloopsize * src_stride;
         N -= subloopsize;
+        if (N <= 0) {
+            break;
+        }
+
         /* Process unmasked values */
         mask = (npy_bool*)npy_memchr((char *)mask, 0, mask_stride, N,
                                      &subloopsize, 0);
diff --git a/numpy/core/src/multiarray/flagsobject.c b/numpy/core/src/multiarray/flagsobject.c
index a78bedccb..85ea49fb4 100644
--- a/numpy/core/src/multiarray/flagsobject.c
+++ b/numpy/core/src/multiarray/flagsobject.c
@@ -12,6 +12,7 @@
 #include "npy_config.h"
 
 #include "npy_pycompat.h"
+#include "array_assign.h"
 
 #include "common.h"
 
@@ -64,7 +65,7 @@ PyArray_UpdateFlags(PyArrayObject *ret, int flagmask)
         _UpdateContiguousFlags(ret);
     }
     if (flagmask & NPY_ARRAY_ALIGNED) {
-        if (_IsAligned(ret)) {
+        if (IsAligned(ret)) {
             PyArray_ENABLEFLAGS(ret, NPY_ARRAY_ALIGNED);
         }
         else {
diff --git a/numpy/core/src/multiarray/item_selection.c b/numpy/core/src/multiarray/item_selection.c
index 141b2d922..de54ca1b3 100644
--- a/numpy/core/src/multiarray/item_selection.c
+++ b/numpy/core/src/multiarray/item_selection.c
@@ -19,6 +19,7 @@
 #include "arrayobject.h"
 #include "ctors.h"
 #include "lowlevel_strided_loops.h"
+#include "array_assign.h"
 
 #include "item_selection.h"
 #include "npy_sort.h"
@@ -809,7 +810,7 @@ _new_sortlike(PyArrayObject *op, int axis, PyArray_SortFunc *sort,
     npy_intp elsize = (npy_intp)PyArray_ITEMSIZE(op);
     npy_intp astride = PyArray_STRIDE(op, axis);
     int swap = PyArray_ISBYTESWAPPED(op);
-    int needcopy = !PyArray_ISALIGNED(op) || swap || astride != elsize;
+    int needcopy = !IsAligned(op) || swap || astride != elsize;
     int hasrefs = PyDataType_REFCHK(PyArray_DESCR(op));
 
     PyArray_CopySwapNFunc *copyswapn = PyArray_DESCR(op)->f->copyswapn;
@@ -833,8 +834,6 @@ _new_sortlike(PyArrayObject *op, int axis, PyArray_SortFunc *sort,
     }
     size = it->size;
 
-    NPY_BEGIN_THREADS_DESCR(PyArray_DESCR(op));
-
     if (needcopy) {
         buffer = npy_alloc_cache(N * elsize);
         if (buffer == NULL) {
@@ -843,6 +842,8 @@ _new_sortlike(PyArrayObject *op, int axis, PyArray_SortFunc *sort,
         }
     }
 
+    NPY_BEGIN_THREADS_DESCR(PyArray_DESCR(op));
+
     while (size--) {
         char *bufptr = it->dataptr;
 
@@ -917,8 +918,8 @@ _new_sortlike(PyArrayObject *op, int axis, PyArray_SortFunc *sort,
     }
 
 fail:
-    npy_free_cache(buffer, N * elsize);
     NPY_END_THREADS_DESCR(PyArray_DESCR(op));
+    npy_free_cache(buffer, N * elsize);
     if (ret < 0 && !PyErr_Occurred()) {
         /* Out of memory during sorting or buffer creation */
         PyErr_NoMemory();
@@ -937,7 +938,7 @@ _new_argsortlike(PyArrayObject *op, int axis, PyArray_ArgSortFunc *argsort,
     npy_intp elsize = (npy_intp)PyArray_ITEMSIZE(op);
     npy_intp astride = PyArray_STRIDE(op, axis);
     int swap = PyArray_ISBYTESWAPPED(op);
-    int needcopy = !PyArray_ISALIGNED(op) || swap || astride != elsize;
+    int needcopy = !IsAligned(op) || swap || astride != elsize;
     int hasrefs = PyDataType_REFCHK(PyArray_DESCR(op));
     int needidxbuffer;
 
@@ -979,8 +980,6 @@ _new_argsortlike(PyArrayObject *op, int axis, PyArray_ArgSortFunc *argsort,
     }
     size = it->size;
 
-    NPY_BEGIN_THREADS_DESCR(PyArray_DESCR(op));
-
     if (needcopy) {
         valbuffer = npy_alloc_cache(N * elsize);
         if (valbuffer == NULL) {
@@ -997,6 +996,8 @@ _new_argsortlike(PyArrayObject *op, int axis, PyArray_ArgSortFunc *argsort,
         }
     }
 
+    NPY_BEGIN_THREADS_DESCR(PyArray_DESCR(op));
+
     while (size--) {
         char *valptr = it->dataptr;
         npy_intp *idxptr = (npy_intp *)rit->dataptr;
@@ -1080,9 +1081,9 @@ _new_argsortlike(PyArrayObject *op, int axis, PyArray_ArgSortFunc *argsort,
     }
 
 fail:
+    NPY_END_THREADS_DESCR(PyArray_DESCR(op));
     npy_free_cache(valbuffer, N * elsize);
     npy_free_cache(idxbuffer, N * sizeof(npy_intp));
-    NPY_END_THREADS_DESCR(PyArray_DESCR(op));
     if (ret < 0) {
         if (!PyErr_Occurred()) {
             /* Out of memory during sorting or buffer creation */
@@ -1498,13 +1499,13 @@ PyArray_LexSort(PyObject *sort_keys, int axis)
         char *valbuffer, *indbuffer;
         int *swaps;
 
-        valbuffer = npy_alloc_cache(N * maxelsize);
+        valbuffer = PyDataMem_NEW(N * maxelsize);
         if (valbuffer == NULL) {
             goto fail;
         }
-        indbuffer = npy_alloc_cache(N * sizeof(npy_intp));
+        indbuffer = PyDataMem_NEW(N * sizeof(npy_intp));
         if (indbuffer == NULL) {
-            npy_free_cache(indbuffer, N * sizeof(npy_intp));
+            PyDataMem_FREE(indbuffer);
             goto fail;
         }
         swaps = malloc(n*sizeof(int));
@@ -1547,8 +1548,8 @@ PyArray_LexSort(PyObject *sort_keys, int axis)
                                          sizeof(npy_intp), N, sizeof(npy_intp));
             PyArray_ITER_NEXT(rit);
         }
-        npy_free_cache(valbuffer, N * maxelsize);
-        npy_free_cache(indbuffer, N * sizeof(npy_intp));
+        PyDataMem_FREE(valbuffer);
+        PyDataMem_FREE(indbuffer);
         free(swaps);
     }
     else {
diff --git a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
index b25b4a8b6..159bb4103 100644
--- a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
+++ b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
@@ -10,7 +10,6 @@
 
 #define PY_SSIZE_T_CLEAN
 #include "Python.h"
-#include "structmember.h"
 
 #define NPY_NO_DEPRECATED_API NPY_API_VERSION
 #define _MULTIARRAYMODULE
@@ -19,16 +18,7 @@
 #include <numpy/halffloat.h>
 
 #include "lowlevel_strided_loops.h"
-
-/* used for some alignment checks */
-#define _ALIGN(type) offsetof(struct {char c; type v;}, v)
-/*
- * Disable harmless compiler warning "4116: unnamed type definition in
- * parentheses" which is caused by the _ALIGN macro.
- */
-#if defined(_MSC_VER)
-#pragma warning(disable:4116)
-#endif
+#include "array_assign.h"
 
 
 /*
@@ -1385,7 +1375,7 @@ mapiter_trivial_@name@(PyArrayObject *self, PyArrayObject *ind,
 
     npy_intp itersize;
 
-    int is_aligned = PyArray_ISALIGNED(self) && PyArray_ISALIGNED(result);
+    int is_aligned = IsUintAligned(self) && IsUintAligned(result);
     int needs_api = PyDataType_REFCHK(PyArray_DESCR(self));
 
     PyArray_CopySwapFunc *copyswap = PyArray_DESCR(self)->f->copyswap;
@@ -1518,7 +1508,7 @@ mapiter_@name@(PyArrayMapIterObject *mit)
      * could also check extra_op is buffered, but it should rarely matter.
      */
 
-    is_aligned = PyArray_ISALIGNED(array) && PyArray_ISALIGNED(mit->extra_op);
+    is_aligned = IsUintAligned(array) && IsUintAligned(mit->extra_op);
 
     if (mit->size == 0) {
        return 0;
diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c
index 2fdb3ebf6..d371ae762 100644
--- a/numpy/core/src/multiarray/mapping.c
+++ b/numpy/core/src/multiarray/mapping.c
@@ -20,6 +20,7 @@
 #include "lowlevel_strided_loops.h"
 #include "item_selection.h"
 #include "mem_overlap.h"
+#include "array_assign.h"
 
 
 #define HAS_INTEGER 1
@@ -1063,7 +1064,7 @@ array_boolean_subscript(PyArrayObject *self,
 
         /* Get a dtype transfer function */
         NpyIter_GetInnerFixedStrideArray(iter, fixed_strides);
-        if (PyArray_GetDTypeTransferFunction(PyArray_ISALIGNED(self),
+        if (PyArray_GetDTypeTransferFunction(IsUintAligned(self),
                         fixed_strides[0], itemsize,
                         dtype, dtype,
                         0,
@@ -1125,10 +1126,10 @@ array_boolean_subscript(PyArrayObject *self,
         ret = (PyArrayObject *)PyArray_NewFromDescrAndBase(
                 Py_TYPE(self), dtype,
                 1, &size, PyArray_STRIDES(ret), PyArray_BYTES(ret),
-                PyArray_FLAGS(self), (PyObject *)self, (PyObject *)self);
+                PyArray_FLAGS(self), (PyObject *)self, (PyObject *)tmp);
 
+        Py_DECREF(tmp);
         if (ret == NULL) {
-            Py_DECREF(tmp);
             return NULL;
         }
     }
@@ -1252,7 +1253,7 @@ array_assign_boolean_subscript(PyArrayObject *self,
         /* Get a dtype transfer function */
         NpyIter_GetInnerFixedStrideArray(iter, fixed_strides);
         if (PyArray_GetDTypeTransferFunction(
-                        PyArray_ISALIGNED(self) && PyArray_ISALIGNED(v),
+                        IsUintAligned(self) && IsUintAligned(v),
                         v_stride, fixed_strides[0],
                         PyArray_DESCR(v), PyArray_DESCR(self),
                         0,
@@ -1723,7 +1724,7 @@ array_subscript(PyArrayObject *self, PyObject *op)
                 /* Check if the type is equivalent to INTP */
                 PyArray_ITEMSIZE(ind) == sizeof(npy_intp) &&
                 PyArray_DESCR(ind)->kind == 'i' &&
-                PyArray_ISALIGNED(ind) &&
+                IsUintAligned(ind) &&
                 PyDataType_ISNOTSWAPPED(PyArray_DESCR(ind))) {
 
             Py_INCREF(PyArray_DESCR(self));
@@ -2086,7 +2087,7 @@ array_assign_subscript(PyArrayObject *self, PyObject *ind, PyObject *op)
                 /* Check if the type is equivalent to INTP */
                 PyArray_ITEMSIZE(ind) == sizeof(npy_intp) &&
                 PyArray_DESCR(ind)->kind == 'i' &&
-                PyArray_ISALIGNED(ind) &&
+                IsUintAligned(ind) &&
                 PyDataType_ISNOTSWAPPED(PyArray_DESCR(ind))) {
 
             /* trivial_set checks the index for us */
@@ -2606,7 +2607,7 @@ PyArray_MapIterCheckIndices(PyArrayMapIterObject *mit)
                 /* Check if the type is equivalent to INTP */
                 PyArray_ITEMSIZE(op) == sizeof(npy_intp) &&
                 PyArray_DESCR(op)->kind == 'i' &&
-                PyArray_ISALIGNED(op) &&
+                IsUintAligned(op) &&
                 PyDataType_ISNOTSWAPPED(PyArray_DESCR(op))) {
             char *data;
             npy_intp stride;
diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c
index 3d2cce5e1..23b0bfd24 100644
--- a/numpy/core/src/multiarray/methods.c
+++ b/numpy/core/src/multiarray/methods.c
@@ -21,6 +21,7 @@
 #include "conversion_utils.h"
 #include "shape.h"
 #include "strfuncs.h"
+#include "array_assign.h"
 
 #include "methods.h"
 #include "alloc.h"
@@ -355,6 +356,7 @@ PyArray_GetField(PyArrayObject *self, PyArray_Descr *typed, int offset)
     PyObject *ret = NULL;
     PyObject *safe;
     static PyObject *checkfunc = NULL;
+    int self_elsize, typed_elsize;
 
     /* check that we are not reinterpreting memory containing Objects. */
     if (_may_have_objects(PyArray_DESCR(self)) || _may_have_objects(typed)) {
@@ -372,6 +374,22 @@ PyArray_GetField(PyArrayObject *self, PyArray_Descr *typed, int offset)
         }
         Py_DECREF(safe);
     }
+    self_elsize = PyArray_ITEMSIZE(self);
+    typed_elsize = typed->elsize;
+
+    /* check that values are valid */
+    if (typed_elsize > self_elsize) {
+        PyErr_SetString(PyExc_ValueError, "new type is larger than original type");
+        return NULL;
+    }
+    if (offset < 0) {
+        PyErr_SetString(PyExc_ValueError, "offset is negative");
+        return NULL;
+    }
+    if (offset > self_elsize - typed_elsize) {
+        PyErr_SetString(PyExc_ValueError, "new type plus offset is larger than original type");
+        return NULL;
+    }
 
     ret = PyArray_NewFromDescr_int(
             Py_TYPE(self), typed,
@@ -1022,6 +1040,13 @@ cleanup:
 
 
 static PyObject *
+array_function(PyArrayObject *self, PyObject *args, PyObject *kwds)
+{
+    NPY_FORWARD_NDARRAY_METHOD("_array_function");
+}
+
+
+static PyObject *
 array_copy(PyArrayObject *self, PyObject *args, PyObject *kwds)
 {
     NPY_ORDER order = NPY_CORDER;
@@ -1594,6 +1619,8 @@ array_reduce(PyArrayObject *self, PyObject *NPY_UNUSED(args))
 
        Notice because Python does not describe a mechanism to write
        raw data to the pickle, this performs a copy to a string first
+       This issue is now adressed in protocol 5, where a buffer is serialized
+       instead of a string,
     */
 
     state = PyTuple_New(5);
@@ -1627,6 +1654,132 @@ array_reduce(PyArrayObject *self, PyObject *NPY_UNUSED(args))
 }
 
 static PyObject *
+array_reduce_ex(PyArrayObject *self, PyObject *args)
+{
+    int protocol;
+    PyObject *ret = NULL, *numeric_mod = NULL, *from_buffer_func = NULL;
+    PyObject *buffer_tuple = NULL, *pickle_module = NULL, *pickle_class = NULL;
+    PyObject *class_args = NULL, *class_args_tuple = NULL, *unused = NULL;
+    PyObject *subclass_array_reduce = NULL;
+    PyObject *buffer = NULL, *transposed_array = NULL;
+    PyArray_Descr *descr = NULL;
+    char order;
+
+    if (PyArg_ParseTuple(args, "i", &protocol)){
+        descr = PyArray_DESCR(self);
+        if ((protocol < 5) ||
+            (!PyArray_IS_C_CONTIGUOUS((PyArrayObject*)self) &&
+             !PyArray_IS_F_CONTIGUOUS((PyArrayObject*)self)) ||
+            PyDataType_FLAGCHK(descr, NPY_ITEM_HASOBJECT) ||
+            (PyType_IsSubtype(((PyObject*)self)->ob_type, &PyArray_Type) &&
+             ((PyObject*)self)->ob_type != &PyArray_Type) ||
+            PyDataType_ISUNSIZED(descr)) {
+            /* The PickleBuffer class from version 5 of the pickle protocol
+             * can only be used for arrays backed by a contiguous data buffer.
+             * For all other cases we fallback to the generic array_reduce
+             * method that involves using a temporary bytes allocation. However
+             * we do not call array_reduce directly but instead lookup and call
+             * the __reduce__ method to make sure that it's possible customize
+             * pickling in sub-classes. */
+            subclass_array_reduce = PyObject_GetAttrString((PyObject *)self,
+                                                           "__reduce__");
+            return PyObject_CallObject(subclass_array_reduce, unused);
+        }
+        else if (protocol == 5){
+            ret = PyTuple_New(2);
+
+            if (ret == NULL) {
+                return NULL;
+            }
+
+            /* if the python version is below 3.8, the pickle module does not provide
+             * built-in support for protocol 5. We try importing the pickle5
+             * backport instead */
+#if PY_VERSION_HEX >= 0x03080000
+            pickle_module = PyImport_ImportModule("pickle");
+#elif PY_VERSION_HEX < 0x03080000 && PY_VERSION_HEX >= 0x03060000
+            pickle_module = PyImport_ImportModule("pickle5");
+            if (pickle_module == NULL){
+                /* for protocol 5, raise a clear ImportError if pickle5 is not found
+                 */
+                PyErr_SetString(PyExc_ImportError, "Using pickle protocol 5 "
+                        "requires the pickle5 module for python versions >=3.6 "
+                        "and <3.8");
+                return NULL;
+            }
+#else
+            PyErr_SetString(PyExc_ValueError, "pickle protocol 5 is not available "
+                                               "for python versions < 3.6");
+            return NULL;
+#endif
+            if (pickle_module == NULL){
+                return NULL;
+            }
+
+            pickle_class = PyObject_GetAttrString(pickle_module,
+                                                  "PickleBuffer");
+
+            class_args_tuple = PyTuple_New(1);
+            if (!PyArray_IS_C_CONTIGUOUS((PyArrayObject*)self) &&
+                PyArray_IS_F_CONTIGUOUS((PyArrayObject*)self)){
+
+                /* if the array if Fortran-contiguous and not C-contiguous,
+                 * the PickleBuffer instance will hold a view on the transpose
+                 * of the initial array, that is C-contiguous. */
+                order = 'F';
+                transposed_array = PyArray_Transpose((PyArrayObject*)self, NULL);
+                PyTuple_SET_ITEM(class_args_tuple, 0, transposed_array);
+            }
+            else {
+                order = 'C';
+                PyTuple_SET_ITEM(class_args_tuple, 0, (PyObject *)self);
+                Py_INCREF(self);
+            }
+
+            class_args = Py_BuildValue("O", class_args_tuple);
+
+            buffer = PyObject_CallObject(pickle_class, class_args);
+
+            numeric_mod = PyImport_ImportModule("numpy.core.numeric");
+            if (numeric_mod == NULL) {
+                Py_DECREF(ret);
+                return NULL;
+            }
+            from_buffer_func = PyObject_GetAttrString(numeric_mod,
+                                                      "_frombuffer");
+            Py_DECREF(numeric_mod);
+
+            Py_INCREF(descr);
+
+            buffer_tuple = PyTuple_New(4);
+            PyTuple_SET_ITEM(buffer_tuple, 0, buffer);
+            PyTuple_SET_ITEM(buffer_tuple, 1, (PyObject *)descr);
+            PyTuple_SET_ITEM(buffer_tuple, 2,
+                             PyObject_GetAttrString((PyObject *)self,
+                                                    "shape"));
+            PyTuple_SET_ITEM(buffer_tuple, 3,
+                             PyUnicode_FromStringAndSize(&order,
+                                                         (Py_ssize_t)1));
+
+            PyTuple_SET_ITEM(ret, 0, from_buffer_func);
+            PyTuple_SET_ITEM(ret, 1, buffer_tuple);
+
+            return ret;
+        }
+        else {
+            PyErr_Format(PyExc_ValueError,
+                         "cannot call __reduce_ex__ with protocol >= %d",
+                         5);
+            return NULL;
+        }
+    }
+    else {
+        return NULL;
+    }
+
+}
+
+static PyObject *
 array_setstate(PyArrayObject *self, PyObject *args)
 {
     PyObject *shape;
@@ -1778,11 +1931,11 @@ array_setstate(PyArrayObject *self, PyObject *args)
         fa->data = datastr;
 #ifndef NPY_PY3K
         /* Check that the string is not interned */
-        if (!_IsAligned(self) || swap || PyString_CHECK_INTERNED(rawdata)) {
+        if (!IsAligned(self) || swap || PyString_CHECK_INTERNED(rawdata)) {
 #else
         /* Bytes should always be considered immutable, but we just grab the
          * pointer if they are large, to save memory. */
-        if (!_IsAligned(self) || swap || (len <= 1000)) {
+        if (!IsAligned(self) || swap || (len <= 1000)) {
 #endif
             npy_intp num = PyArray_NBYTES(self);
             fa->data = PyDataMem_NEW(num);
@@ -2274,7 +2427,7 @@ array_setflags(PyArrayObject *self, PyObject *args, PyObject *kwds)
         if (PyObject_Not(align_flag)) {
             PyArray_CLEARFLAGS(self, NPY_ARRAY_ALIGNED);
         }
-        else if (_IsAligned(self)) {
+        else if (IsAligned(self)) {
             PyArray_ENABLEFLAGS(self, NPY_ARRAY_ALIGNED);
         }
         else {
@@ -2472,6 +2625,9 @@ NPY_NO_EXPORT PyMethodDef array_methods[] = {
     {"__array_ufunc__",
         (PyCFunction)array_ufunc,
         METH_VARARGS | METH_KEYWORDS, NULL},
+    {"__array_function__",
+        (PyCFunction)array_function,
+        METH_VARARGS | METH_KEYWORDS, NULL},
 
 #ifndef NPY_PY3K
     {"__unicode__",
@@ -2496,6 +2652,9 @@ NPY_NO_EXPORT PyMethodDef array_methods[] = {
     {"__reduce__",
         (PyCFunction) array_reduce,
         METH_VARARGS, NULL},
+    {"__reduce_ex__",
+        (PyCFunction) array_reduce_ex,
+        METH_VARARGS, NULL},
     {"__setstate__",
         (PyCFunction) array_setstate,
         METH_VARARGS, NULL},
diff --git a/numpy/core/src/multiarray/nditer_constr.c b/numpy/core/src/multiarray/nditer_constr.c
index c56376f58..dbb24f26b 100644
--- a/numpy/core/src/multiarray/nditer_constr.c
+++ b/numpy/core/src/multiarray/nditer_constr.c
@@ -17,8 +17,7 @@
 
 #include "arrayobject.h"
 #include "templ_common.h"
-#include "mem_overlap.h"
-
+#include "array_assign.h"
 
 /* Internal helper functions private to this file */
 static int
@@ -1133,7 +1132,7 @@ npyiter_prepare_one_operand(PyArrayObject **op,
         /* Check if the operand is aligned */
         if (op_flags & NPY_ITER_ALIGNED) {
             /* Check alignment */
-            if (!PyArray_ISALIGNED(*op)) {
+            if (!IsUintAligned(*op)) {
                 NPY_IT_DBG_PRINT("Iterator: Setting NPY_OP_ITFLAG_CAST "
                                     "because of NPY_ITER_ALIGNED\n");
                 *op_itflags |= NPY_OP_ITFLAG_CAST;
@@ -2975,7 +2974,7 @@ npyiter_allocate_arrays(NpyIter *iter,
              * If the operand is aligned, any buffering can use aligned
              * optimizations.
              */
-            if (PyArray_ISALIGNED(op[iop])) {
+            if (IsUintAligned(op[iop])) {
                 op_itflags[iop] |= NPY_OP_ITFLAG_ALIGNED;
             }
         }
diff --git a/numpy/core/src/multiarray/number.c b/numpy/core/src/multiarray/number.c
index f71d39405..dabbae064 100644
--- a/numpy/core/src/multiarray/number.c
+++ b/numpy/core/src/multiarray/number.c
@@ -119,7 +119,7 @@ PyArray_SetNumericOps(PyObject *dict)
     return 0;
 }
 
-/* FIXME - macro contains goto */
+/* Note - macro contains goto */
 #define GET(op) if (n_ops.op &&                                         \
                     (PyDict_SetItemString(dict, #op, n_ops.op)==-1))    \
         goto fail;
diff --git a/numpy/core/src/multiarray/scalartypes.c.src b/numpy/core/src/multiarray/scalartypes.c.src
index fdd4d7878..6dd8b1a29 100644
--- a/numpy/core/src/multiarray/scalartypes.c.src
+++ b/numpy/core/src/multiarray/scalartypes.c.src
@@ -3748,30 +3748,21 @@ static PyMappingMethods gentype_as_mapping = {
  * #CNAME = FLOAT, DOUBLE, LONGDOUBLE#
  */
 #if NPY_BITSOF_@CNAME@ == 16
-#define _THIS_SIZE2 "16"
-#define _THIS_SIZE1 "32"
+#define _THIS_SIZE "32"
 #elif NPY_BITSOF_@CNAME@ == 32
-#define _THIS_SIZE2 "32"
-#define _THIS_SIZE1 "64"
+#define _THIS_SIZE "64"
 #elif NPY_BITSOF_@CNAME@ == 64
-#define _THIS_SIZE2 "64"
-#define _THIS_SIZE1 "128"
+#define _THIS_SIZE "128"
 #elif NPY_BITSOF_@CNAME@ == 80
-#define _THIS_SIZE2 "80"
-#define _THIS_SIZE1 "160"
+#define _THIS_SIZE "160"
 #elif NPY_BITSOF_@CNAME@ == 96
-#define _THIS_SIZE2 "96"
-#define _THIS_SIZE1 "192"
+#define _THIS_SIZE "192"
 #elif NPY_BITSOF_@CNAME@ == 128
-#define _THIS_SIZE2 "128"
-#define _THIS_SIZE1 "256"
+#define _THIS_SIZE "256"
 #elif NPY_BITSOF_@CNAME@ == 256
-#define _THIS_SIZE2 "256"
-#define _THIS_SIZE1 "512"
+#define _THIS_SIZE "512"
 #endif
 
-#define _THIS_DOC "Composed of two " _THIS_SIZE2 " bit floats"
-
 NPY_NO_EXPORT PyTypeObject Py@NAME@ArrType_Type = {
 #if defined(NPY_PY3K)
     PyVarObject_HEAD_INIT(0, 0)
@@ -3779,7 +3770,7 @@ NPY_NO_EXPORT PyTypeObject Py@NAME@ArrType_Type = {
     PyObject_HEAD_INIT(0)
     0,                                          /* ob_size */
 #endif
-    "numpy.@name@" _THIS_SIZE1,                 /* tp_name*/
+    "numpy.@name@" _THIS_SIZE,                  /* tp_name*/
     sizeof(Py@NAME@ScalarObject),               /* tp_basicsize*/
     0,                                          /* tp_itemsize*/
     0,                                          /* tp_dealloc*/
@@ -3802,7 +3793,7 @@ NPY_NO_EXPORT PyTypeObject Py@NAME@ArrType_Type = {
     0,                                          /* tp_setattro*/
     0,                                          /* tp_as_buffer*/
     Py_TPFLAGS_DEFAULT,                         /* tp_flags*/
-    _THIS_DOC,                                  /* tp_doc */
+    0,                                          /* tp_doc */
     0,                                          /* tp_traverse */
     0,                                          /* tp_clear */
     0,                                          /* tp_richcompare */
@@ -3830,9 +3821,7 @@ NPY_NO_EXPORT PyTypeObject Py@NAME@ArrType_Type = {
     0,                                          /* tp_del */
     0,                                          /* tp_version_tag */
 };
-#undef _THIS_SIZE1
-#undef _THIS_SIZE2
-#undef _THIS_DOC
+#undef _THIS_SIZE
 
 /**end repeat**/
 
diff --git a/numpy/core/src/multiarray/shape.c b/numpy/core/src/multiarray/shape.c
index 3ac71e285..30820737e 100644
--- a/numpy/core/src/multiarray/shape.c
+++ b/numpy/core/src/multiarray/shape.c
@@ -89,11 +89,19 @@ PyArray_Resize(PyArrayObject *self, PyArray_Dims *newshape, int refcheck,
             return NULL;
         }
 
+        if (PyArray_BASE(self) != NULL
+              || (((PyArrayObject_fields *)self)->weakreflist != NULL)) {
+            PyErr_SetString(PyExc_ValueError,
+                    "cannot resize an array that "
+                    "references or is referenced\n"
+                    "by another array in this way. Use the np.resize function.");
+            return NULL;
+        }
         if (refcheck) {
 #ifdef PYPY_VERSION
             PyErr_SetString(PyExc_ValueError,
                     "cannot resize an array with refcheck=True on PyPy.\n"
-                    "Use the resize function or refcheck=False");
+                    "Use the np.resize function or refcheck=False");
             return NULL;
 #else
             refcnt = PyArray_REFCOUNT(self);
@@ -102,13 +110,12 @@ PyArray_Resize(PyArrayObject *self, PyArray_Dims *newshape, int refcheck,
         else {
             refcnt = 1;
         }
-        if ((refcnt > 2)
-                || (PyArray_BASE(self) != NULL)
-                || (((PyArrayObject_fields *)self)->weakreflist != NULL)) {
+        if (refcnt > 2) {
             PyErr_SetString(PyExc_ValueError,
                     "cannot resize an array that "
                     "references or is referenced\n"
-                    "by another array in this way.  Use the resize function");
+                    "by another array in this way.\n"
+                    "Use the np.resize function or refcheck=False");
             return NULL;
         }
 
diff --git a/numpy/core/src/npymath/ieee754.c.src b/numpy/core/src/npymath/ieee754.c.src
index 8b5eef87a..d960838c8 100644
--- a/numpy/core/src/npymath/ieee754.c.src
+++ b/numpy/core/src/npymath/ieee754.c.src
@@ -568,13 +568,21 @@ int npy_get_floatstatus() {
 
 /*
  * Functions to set the floating point status word.
- * keep in sync with NO_FLOATING_POINT_SUPPORT in ufuncobject.h
  */
 
 #if (defined(__unix__) || defined(unix)) && !defined(USG)
 #include <sys/param.h>
 #endif
 
+
+/*
+ * Define floating point status functions. We must define
+ * npy_get_floatstatus_barrier, npy_clear_floatstatus_barrier,
+ * npy_set_floatstatus_{divbyzero, overflow, underflow, invalid}
+ * for all supported platforms.
+ */
+
+
 /* Solaris --------------------------------------------------------*/
 /* --------ignoring SunOS ieee_flags approach, someone else can
 **         deal with that! */
@@ -626,117 +634,94 @@ void npy_set_floatstatus_invalid(void)
     fpsetsticky(FP_X_INV);
 }
 
+#elif defined(_AIX)
+#include <float.h>
+#include <fpxcp.h>
 
-#elif defined(__GLIBC__) || defined(__APPLE__) || \
-      defined(__CYGWIN__) || defined(__MINGW32__) || \
-      (defined(__FreeBSD__) && (__FreeBSD_version >= 502114))
-#  include <fenv.h>
-
-int npy_get_floatstatus_barrier(char* param)
+int npy_get_floatstatus_barrier(char *param)
 {
-    int fpstatus = fetestexcept(FE_DIVBYZERO | FE_OVERFLOW |
-                                FE_UNDERFLOW | FE_INVALID);
+    int fpstatus = fp_read_flag();
     /*
      * By using a volatile, the compiler cannot reorder this call
      */
     if (param != NULL) {
         volatile char NPY_UNUSED(c) = *(char*)param;
     }
-
-    return ((FE_DIVBYZERO  & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
-           ((FE_OVERFLOW   & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
-           ((FE_UNDERFLOW  & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
-           ((FE_INVALID    & fpstatus) ? NPY_FPE_INVALID : 0);
+    return ((FP_DIV_BY_ZERO & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
+           ((FP_OVERFLOW & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
+           ((FP_UNDERFLOW & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
+           ((FP_INVALID & fpstatus) ? NPY_FPE_INVALID : 0);
 }
 
 int npy_clear_floatstatus_barrier(char * param)
 {
-    /* testing float status is 50-100 times faster than clearing on x86 */
     int fpstatus = npy_get_floatstatus_barrier(param);
-    if (fpstatus != 0) {
-        feclearexcept(FE_DIVBYZERO | FE_OVERFLOW |
-                      FE_UNDERFLOW | FE_INVALID);
-    }
+    fp_swap_flag(0);
 
     return fpstatus;
 }
 
-
 void npy_set_floatstatus_divbyzero(void)
 {
-    feraiseexcept(FE_DIVBYZERO);
+    fp_raise_xcp(FP_DIV_BY_ZERO);
 }
 
 void npy_set_floatstatus_overflow(void)
 {
-    feraiseexcept(FE_OVERFLOW);
+    fp_raise_xcp(FP_OVERFLOW);
 }
 
 void npy_set_floatstatus_underflow(void)
 {
-    feraiseexcept(FE_UNDERFLOW);
+    fp_raise_xcp(FP_UNDERFLOW);
 }
 
 void npy_set_floatstatus_invalid(void)
 {
-    feraiseexcept(FE_INVALID);
-}
-
-#elif defined(_AIX)
-#include <float.h>
-#include <fpxcp.h>
-
-int npy_get_floatstatus_barrier(char *param)
-{
-    int fpstatus = fp_read_flag();
-    /*
-     * By using a volatile, the compiler cannot reorder this call
-     */
-    if (param != NULL) {
-        volatile char NPY_UNUSED(c) = *(char*)param;
-    }
-    return ((FP_DIV_BY_ZERO & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
-           ((FP_OVERFLOW & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
-           ((FP_UNDERFLOW & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
-           ((FP_INVALID & fpstatus) ? NPY_FPE_INVALID : 0);
+    fp_raise_xcp(FP_INVALID);
 }
 
-int npy_clear_floatstatus_barrier(char * param)
-{
-    int fpstatus = npy_get_floatstatus_barrier(param);
-    fp_swap_flag(0);
+#elif defined(_MSC_VER) || (defined(__osf__) && defined(__alpha))
 
-    return fpstatus;
-}
+/*
+ * By using a volatile floating point value,
+ * the compiler is forced to actually do the requested
+ * operations because of potential concurrency.
+ *
+ * We shouldn't write multiple values to a single
+ * global here, because that would cause
+ * a race condition.
+ */
+static volatile double _npy_floatstatus_x,
+    _npy_floatstatus_zero = 0.0, _npy_floatstatus_big = 1e300,
+    _npy_floatstatus_small = 1e-300, _npy_floatstatus_inf;
 
 void npy_set_floatstatus_divbyzero(void)
 {
-    fp_raise_xcp(FP_DIV_BY_ZERO);
+    _npy_floatstatus_x = 1.0 / _npy_floatstatus_zero;
 }
 
 void npy_set_floatstatus_overflow(void)
 {
-    fp_raise_xcp(FP_OVERFLOW);
+    _npy_floatstatus_x = _npy_floatstatus_big * 1e300;
 }
 
 void npy_set_floatstatus_underflow(void)
 {
-    fp_raise_xcp(FP_UNDERFLOW);
+    _npy_floatstatus_x = _npy_floatstatus_small * 1e-300;
 }
 
 void npy_set_floatstatus_invalid(void)
 {
-    fp_raise_xcp(FP_INVALID);
+    _npy_floatstatus_inf = NPY_INFINITY;
+    _npy_floatstatus_x = _npy_floatstatus_inf - NPY_INFINITY;
 }
 
-#else
-
 /* MS Windows -----------------------------------------------------*/
 #if defined(_MSC_VER)
 
 #include <float.h>
 
-
 int npy_get_floatstatus_barrier(char *param)
 {
     /*
@@ -796,53 +781,61 @@ int npy_clear_floatstatus_barrier(char *param)
     return fpstatus;
 }
 
+#endif
+/* End of defined(_MSC_VER) || (defined(__osf__) && defined(__alpha)) */
+
 #else
+/* General GCC code, should work on most platforms */
+#  include <fenv.h>
 
-int npy_get_floatstatus_barrier(char *NPY_UNUSED(param))
+int npy_get_floatstatus_barrier(char* param)
 {
-    return 0;
+    int fpstatus = fetestexcept(FE_DIVBYZERO | FE_OVERFLOW |
+                                FE_UNDERFLOW | FE_INVALID);
+    /*
+     * By using a volatile, the compiler cannot reorder this call
+     */
+    if (param != NULL) {
+        volatile char NPY_UNUSED(c) = *(char*)param;
+    }
+
+    return ((FE_DIVBYZERO  & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
+           ((FE_OVERFLOW   & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
+           ((FE_UNDERFLOW  & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
+           ((FE_INVALID    & fpstatus) ? NPY_FPE_INVALID : 0);
 }
 
-int npy_clear_floatstatus_barrier(char *param)
+int npy_clear_floatstatus_barrier(char * param)
 {
+    /* testing float status is 50-100 times faster than clearing on x86 */
     int fpstatus = npy_get_floatstatus_barrier(param);
-    return 0;
-}
+    if (fpstatus != 0) {
+        feclearexcept(FE_DIVBYZERO | FE_OVERFLOW |
+                      FE_UNDERFLOW | FE_INVALID);
+    }
 
-#endif
+    return fpstatus;
+}
 
-/*
- * By using a volatile floating point value,
- * the compiler is forced to actually do the requested
- * operations because of potential concurrency.
- *
- * We shouldn't write multiple values to a single
- * global here, because that would cause
- * a race condition.
- */
-static volatile double _npy_floatstatus_x,
-    _npy_floatstatus_zero = 0.0, _npy_floatstatus_big = 1e300,
-    _npy_floatstatus_small = 1e-300, _npy_floatstatus_inf;
 
 void npy_set_floatstatus_divbyzero(void)
 {
-    _npy_floatstatus_x = 1.0 / _npy_floatstatus_zero;
+    feraiseexcept(FE_DIVBYZERO);
 }
 
 void npy_set_floatstatus_overflow(void)
 {
-    _npy_floatstatus_x = _npy_floatstatus_big * 1e300;
+    feraiseexcept(FE_OVERFLOW);
 }
 
 void npy_set_floatstatus_underflow(void)
 {
-    _npy_floatstatus_x = _npy_floatstatus_small * 1e-300;
+    feraiseexcept(FE_UNDERFLOW);
 }
 
 void npy_set_floatstatus_invalid(void)
 {
-    _npy_floatstatus_inf = NPY_INFINITY;
-    _npy_floatstatus_x = _npy_floatstatus_inf - NPY_INFINITY;
+    feraiseexcept(FE_INVALID);
 }
 
 #endif
diff --git a/numpy/core/src/umath/_umath_tests.c.src b/numpy/core/src/umath/_umath_tests.c.src
index fcbdbe330..8cb74f177 100644
--- a/numpy/core/src/umath/_umath_tests.c.src
+++ b/numpy/core/src/umath/_umath_tests.c.src
@@ -128,6 +128,8 @@ static void
 /**end repeat**/
 
 char *matrix_multiply_signature = "(m,n),(n,p)->(m,p)";
+/* for use with matrix_multiply code, but different signature */
+char *matmul_signature = "(m?,n),(n,p?)->(m?,p?)";
 
 /**begin repeat
 
@@ -195,6 +197,45 @@ static void
 
 /**end repeat**/
 
+char *cross1d_signature = "(3),(3)->(3)";
+
+/**begin repeat
+
+   #TYPE=LONG,DOUBLE#
+   #typ=npy_long, npy_double#
+*/
+
+/*
+ *  This implements the cross product:
+ *        out[n, 0] = in1[n, 1]*in2[n, 2] - in1[n, 2]*in2[n, 1]
+ *        out[n, 1] = in1[n, 2]*in2[n, 0] - in1[n, 0]*in2[n, 2]
+ *        out[n, 2] = in1[n, 0]*in2[n, 1] - in1[n, 1]*in2[n, 0]
+ */
+static void
+@TYPE@_cross1d(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
+{
+    INIT_OUTER_LOOP_3
+    npy_intp is1=steps[0], is2=steps[1], os = steps[2];
+    BEGIN_OUTER_LOOP_3
+        @typ@ i1_x = *(@typ@ *)(args[0] + 0*is1);
+        @typ@ i1_y = *(@typ@ *)(args[0] + 1*is1);
+        @typ@ i1_z = *(@typ@ *)(args[0] + 2*is1);
+
+        @typ@ i2_x = *(@typ@ *)(args[1] + 0*is2);
+        @typ@ i2_y = *(@typ@ *)(args[1] + 1*is2);
+        @typ@ i2_z = *(@typ@ *)(args[1] + 2*is2);
+        char *op = args[2];
+
+        *(@typ@ *)op = i1_y * i2_z - i1_z * i2_y;
+        op += os;
+        *(@typ@ *)op = i1_z * i2_x - i1_x * i2_z;
+        op += os;
+        *(@typ@ *)op = i1_x * i2_y - i1_y * i2_x;
+    END_OUTER_LOOP
+}
+
+/**end repeat**/
+
 char *euclidean_pdist_signature = "(n,d)->(p)";
 
 /**begin repeat
@@ -285,17 +326,39 @@ static void
 
 /**end repeat**/
 
+/*  The following lines were generated using a slightly modified
+    version of code_generators/generate_umath.py and adding these
+    lines to defdict:
+
+defdict = {
+'inner1d' :
+    Ufunc(2, 1, None_,
+        r'''inner on the last dimension and broadcast on the rest \n"
+        "     \"(i),(i)->()\" \n''',
+        TD('ld'),
+        ),
+'innerwt' :
+    Ufunc(3, 1, None_,
+        r'''inner1d with a weight argument \n"
+        "     \"(i),(i),(i)->()\" \n''',
+        TD('ld'),
+        ),
+}
+
+*/
 
 static PyUFuncGenericFunction inner1d_functions[] = { LONG_inner1d, DOUBLE_inner1d };
-static void * inner1d_data[] = { (void *)NULL, (void *)NULL };
+static void *inner1d_data[] = { (void *)NULL, (void *)NULL };
 static char inner1d_signatures[] = { NPY_LONG, NPY_LONG, NPY_LONG, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE };
 static PyUFuncGenericFunction innerwt_functions[] = { LONG_innerwt, DOUBLE_innerwt };
-static void * innerwt_data[] = { (void *)NULL, (void *)NULL };
+static void *innerwt_data[] = { (void *)NULL, (void *)NULL };
 static char innerwt_signatures[] = { NPY_LONG, NPY_LONG, NPY_LONG, NPY_LONG, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE };
 static PyUFuncGenericFunction matrix_multiply_functions[] = { LONG_matrix_multiply, FLOAT_matrix_multiply, DOUBLE_matrix_multiply };
 static void *matrix_multiply_data[] = { (void *)NULL, (void *)NULL, (void *)NULL };
 static char matrix_multiply_signatures[] = { NPY_LONG, NPY_LONG, NPY_LONG,  NPY_FLOAT, NPY_FLOAT, NPY_FLOAT,  NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE };
-
+static PyUFuncGenericFunction cross1d_functions[] = { LONG_cross1d, DOUBLE_cross1d };
+static void *cross1d_data[] = { (void *)NULL, (void *)NULL };
+static char cross1d_signatures[] = { NPY_LONG, NPY_LONG, NPY_LONG, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE };
 static PyUFuncGenericFunction euclidean_pdist_functions[] =
                             { FLOAT_euclidean_pdist, DOUBLE_euclidean_pdist };
 static void *eucldiean_pdist_data[] = { (void *)NULL, (void *)NULL };
@@ -303,7 +366,7 @@ static char euclidean_pdist_signatures[] = { NPY_FLOAT, NPY_FLOAT,
                                              NPY_DOUBLE, NPY_DOUBLE };
 
 static PyUFuncGenericFunction cumsum_functions[] = { LONG_cumsum, DOUBLE_cumsum };
-static void * cumsum_data[] = { (void *)NULL, (void *)NULL };
+static void *cumsum_data[] = { (void *)NULL, (void *)NULL };
 static char cumsum_signatures[] = { NPY_LONG, NPY_LONG, NPY_DOUBLE, NPY_DOUBLE };
 
 
@@ -346,6 +409,17 @@ addUfuncs(PyObject *dictionary) {
     }
     PyDict_SetItemString(dictionary, "matrix_multiply", f);
     Py_DECREF(f);
+    f = PyUFunc_FromFuncAndDataAndSignature(matrix_multiply_functions,
+                    matrix_multiply_data, matrix_multiply_signatures,
+                    3, 2, 1, PyUFunc_None, "matmul",
+                    "matmul on last two dimensions, with some being optional\n"
+                    "     \"(m?,n),(n,p?)->(m?,p?)\" \n",
+                    0, matmul_signature);
+    if (f == NULL) {
+        return -1;
+    }
+    PyDict_SetItemString(dictionary, "matmul", f);
+    Py_DECREF(f);
     f = PyUFunc_FromFuncAndDataAndSignature(euclidean_pdist_functions,
                     eucldiean_pdist_data, euclidean_pdist_signatures,
                     2, 1, 1, PyUFunc_None, "euclidean_pdist",
@@ -376,6 +450,16 @@ addUfuncs(PyObject *dictionary) {
     }
     PyDict_SetItemString(dictionary, "inner1d_no_doc", f);
     Py_DECREF(f);
+    f = PyUFunc_FromFuncAndDataAndSignature(cross1d_functions, cross1d_data,
+                    cross1d_signatures, 2, 2, 1, PyUFunc_None, "cross1d",
+                    "cross product on the last dimension and broadcast on the rest \n"\
+                    "     \"(3),(3)->(3)\" \n",
+                    0, cross1d_signature);
+    if (f == NULL) {
+        return -1;
+    }
+    PyDict_SetItemString(dictionary, "cross1d", f);
+    Py_DECREF(f);
 
     return 0;
 }
@@ -385,9 +469,10 @@ static PyObject *
 UMath_Tests_test_signature(PyObject *NPY_UNUSED(dummy), PyObject *args)
 {
     int nin, nout, i;
-    PyObject *signature, *sig_str;
-    PyUFuncObject *f = NULL;
-    PyObject *core_num_dims = NULL, *core_dim_ixs = NULL;
+    PyObject *signature=NULL, *sig_str=NULL;
+    PyUFuncObject *f=NULL;
+    PyObject *core_num_dims=NULL, *core_dim_ixs=NULL;
+    PyObject *core_dim_flags=NULL, *core_dim_sizes=NULL;
     int core_enabled;
     int core_num_ixs = 0;
 
@@ -442,7 +527,7 @@ UMath_Tests_test_signature(PyObject *NPY_UNUSED(dummy), PyObject *args)
             goto fail;
         }
         for (i = 0; i < core_num_ixs; i++) {
-            PyObject * val = PyLong_FromLong(f->core_dim_ixs[i]);
+            PyObject *val = PyLong_FromLong(f->core_dim_ixs[i]);
             PyTuple_SET_ITEM(core_dim_ixs, i, val);
         }
     }
@@ -450,13 +535,44 @@ UMath_Tests_test_signature(PyObject *NPY_UNUSED(dummy), PyObject *args)
         Py_INCREF(Py_None);
         core_dim_ixs = Py_None;
     }
+    if (f->core_dim_flags != NULL) {
+        core_dim_flags = PyTuple_New(f->core_num_dim_ix);
+        if (core_dim_flags == NULL) {
+            goto fail;
+        }
+        for (i = 0; i < f->core_num_dim_ix; i++) {
+            PyObject *val = PyLong_FromLong(f->core_dim_flags[i]);
+            PyTuple_SET_ITEM(core_dim_flags, i, val);
+        }
+    }
+    else {
+        Py_INCREF(Py_None);
+        core_dim_flags = Py_None;
+    }
+    if (f->core_dim_sizes != NULL) {
+        core_dim_sizes = PyTuple_New(f->core_num_dim_ix);
+        if (core_dim_sizes == NULL) {
+            goto fail;
+        }
+        for (i = 0; i < f->core_num_dim_ix; i++) {
+            PyObject *val = PyLong_FromLong(f->core_dim_sizes[i]);
+            PyTuple_SET_ITEM(core_dim_sizes, i, val);
+        }
+    }
+    else {
+        Py_INCREF(Py_None);
+        core_dim_sizes = Py_None;
+    }
     Py_DECREF(f);
-    return Py_BuildValue("iOO", core_enabled, core_num_dims, core_dim_ixs);
+    return Py_BuildValue("iOOOO", core_enabled, core_num_dims,
+                         core_dim_ixs, core_dim_flags, core_dim_sizes);
 
 fail:
     Py_XDECREF(f);
     Py_XDECREF(core_num_dims);
     Py_XDECREF(core_dim_ixs);
+    Py_XDECREF(core_dim_flags);
+    Py_XDECREF(core_dim_sizes);
     return NULL;
 }
 
@@ -464,8 +580,8 @@ static PyMethodDef UMath_TestsMethods[] = {
     {"test_signature",  UMath_Tests_test_signature, METH_VARARGS,
      "Test signature parsing of ufunc. \n"
      "Arguments: nin nout signature \n"
-     "If fails, it returns NULL. Otherwise it will returns 0 for scalar ufunc "
-     "and 1 for generalized ufunc. \n",
+     "If fails, it returns NULL. Otherwise it returns a tuple of ufunc "
+     "internals. \n",
      },
     {NULL, NULL, 0, NULL}        /* Sentinel */
 };
@@ -504,6 +620,7 @@ PyMODINIT_FUNC init_umath_tests(void) {
     if (m == NULL) {
         return RETVAL(NULL);
     }
+
     import_array();
     import_ufunc();
 
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
index 66b69f555..e62942efd 100644
--- a/numpy/core/src/umath/loops.c.src
+++ b/numpy/core/src/umath/loops.c.src
@@ -1327,27 +1327,12 @@ NPY_NO_EXPORT void
 NPY_NO_EXPORT void
 @TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
 {
-    npy_bool give_future_warning = 0;
     BINARY_LOOP {
         const @type@ in1 = *(@type@ *)ip1;
         const @type@ in2 = *(@type@ *)ip2;
-        const npy_bool res = in1 @OP@ in2;
-        *((npy_bool *)op1) = res;
-
-        if ((in1 == NPY_DATETIME_NAT || in2 == NPY_DATETIME_NAT) && res) {
-            give_future_warning = 1;
-        }
-    }
-    if (give_future_warning) {
-        NPY_ALLOW_C_API_DEF
-        NPY_ALLOW_C_API;
-        /* 2016-01-18, 1.11 */
-        if (DEPRECATE_FUTUREWARNING(
-                "In the future, 'NAT @OP@ x' and 'x @OP@ NAT' "
-                "will always be False.") < 0) {
-            /* nothing to do, we return anyway */
-        }
-        NPY_DISABLE_C_API;
+        *((npy_bool *)op1) = (in1 @OP@ in2 &&
+                              in1 != NPY_DATETIME_NAT &&
+                              in2 != NPY_DATETIME_NAT);
     }
 }
 /**end repeat1**/
@@ -1355,26 +1340,12 @@ NPY_NO_EXPORT void
 NPY_NO_EXPORT void
 @TYPE@_not_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
 {
-    npy_bool give_future_warning = 0;
     BINARY_LOOP {
         const @type@ in1 = *(@type@ *)ip1;
         const @type@ in2 = *(@type@ *)ip2;
-        *((npy_bool *)op1) = in1 != in2;
-
-        if (in1 == NPY_DATETIME_NAT && in2 == NPY_DATETIME_NAT) {
-            give_future_warning = 1;
-        }
-    }
-    if (give_future_warning) {
-        NPY_ALLOW_C_API_DEF
-        NPY_ALLOW_C_API;
-        /* 2016-01-18, 1.11 */
-        if (DEPRECATE_FUTUREWARNING(
-                "In the future, NAT != NAT will be True "
-                "rather than False.") < 0) {
-            /* nothing to do, we return anyway */
-        }
-        NPY_DISABLE_C_API;
+        *((npy_bool *)op1) = (in1 != in2 ||
+                              in1 == NPY_DATETIME_NAT ||
+                              in2 == NPY_DATETIME_NAT);
     }
 }
 
diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src
index 5c0568c12..47f9168e5 100644
--- a/numpy/core/src/umath/simd.inc.src
+++ b/numpy/core/src/umath/simd.inc.src
@@ -17,8 +17,6 @@
 
 #include "lowlevel_strided_loops.h"
 #include "numpy/npy_common.h"
-/* for NO_FLOATING_POINT_SUPPORT */
-#include "numpy/ufuncobject.h"
 #include "numpy/npy_math.h"
 #ifdef NPY_HAVE_SSE2_INTRINSICS
 #include <emmintrin.h>
@@ -132,7 +130,6 @@ abs_ptrdiff(char *a, char *b)
  * #func = sqrt, absolute, negative, minimum, maximum#
  * #check = IS_BLOCKABLE_UNARY*3, IS_BLOCKABLE_REDUCE*2 #
  * #name = unary*3, unary_reduce*2#
- * #minmax = 0*3, 1*2#
  */
 
 #if @vector@ && defined NPY_HAVE_SSE2_INTRINSICS
@@ -146,9 +143,6 @@ sse2_@func@_@TYPE@(@type@ *, @type@ *, const npy_intp n);
 static NPY_INLINE int
 run_@name@_simd_@func@_@TYPE@(char **args, npy_intp *dimensions, npy_intp *steps)
 {
-#if @minmax@ && (defined NO_FLOATING_POINT_SUPPORT)
-    return 0;
-#else
 #if @vector@ && defined NPY_HAVE_SSE2_INTRINSICS
     if (@check@(sizeof(@type@), 16)) {
         sse2_@func@_@TYPE@((@type@*)args[1], (@type@*)args[0], dimensions[0]);
@@ -156,7 +150,6 @@ run_@name@_simd_@func@_@TYPE@(char **args, npy_intp *dimensions, npy_intp *steps
     }
 #endif
     return 0;
-#endif
 }
 
 /**end repeat1**/
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index 459b0a594..b82c74109 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -46,6 +46,7 @@
 #include "npy_import.h"
 #include "extobj.h"
 #include "common.h"
+#include "numpyos.h"
 
 /********** PRINTF DEBUG TRACING **************/
 #define NPY_UF_DBG_TRACING 0
@@ -480,7 +481,27 @@ _is_alnum_underscore(char ch)
 }
 
 /*
- * Return the ending position of a variable name
+ * Convert a string into a number
+ */
+static npy_intp
+_get_size(const char* str)
+{
+    char *stop;
+    npy_longlong size = NumPyOS_strtoll(str, &stop, 10);
+
+    if (stop == str || _is_alpha_underscore(*stop)) {
+        /* not a well formed number */
+         return -1;
+    }
+    if (size >= NPY_MAX_INTP || size <= NPY_MIN_INTP) {
+        /* len(str) too long */
+        return -1;
+    }
+    return size;
+ }
+
+/*
+ * Return the ending position of a variable name including optional modifier
  */
 static int
 _get_end_of_name(const char* str, int offset)
@@ -489,6 +510,9 @@ _get_end_of_name(const char* str, int offset)
     while (_is_alnum_underscore(str[ret])) {
         ret++;
     }
+    if (str[ret] == '?') {
+        ret ++;
+    }
     return ret;
 }
 
@@ -530,7 +554,6 @@ _parse_signature(PyUFuncObject *ufunc, const char *signature)
                         "_parse_signature with NULL signature");
         return -1;
     }
-
     len = strlen(signature);
     ufunc->core_signature = PyArray_malloc(sizeof(char) * (len+1));
     if (ufunc->core_signature) {
@@ -546,13 +569,22 @@ _parse_signature(PyUFuncObject *ufunc, const char *signature)
     ufunc->core_enabled = 1;
     ufunc->core_num_dim_ix = 0;
     ufunc->core_num_dims = PyArray_malloc(sizeof(int) * ufunc->nargs);
-    ufunc->core_dim_ixs = PyArray_malloc(sizeof(int) * len); /* shrink this later */
     ufunc->core_offsets = PyArray_malloc(sizeof(int) * ufunc->nargs);
-    if (ufunc->core_num_dims == NULL || ufunc->core_dim_ixs == NULL
-        || ufunc->core_offsets == NULL) {
+    /* The next three items will be shrunk later */
+    ufunc->core_dim_ixs = PyArray_malloc(sizeof(int) * len);
+    ufunc->core_dim_sizes = PyArray_malloc(sizeof(npy_intp) * len);
+    ufunc->core_dim_flags = PyArray_malloc(sizeof(npy_uint32) * len);
+
+    if (ufunc->core_num_dims == NULL || ufunc->core_dim_ixs == NULL ||
+        ufunc->core_offsets == NULL ||
+        ufunc->core_dim_sizes == NULL ||
+        ufunc->core_dim_flags == NULL) {
         PyErr_NoMemory();
         goto fail;
     }
+    for (i = 0; i < len; i++) {
+        ufunc->core_dim_flags[i] = 0;
+    }
 
     i = _next_non_white_space(signature, 0);
     while (signature[i] != '\0') {
@@ -577,26 +609,70 @@ _parse_signature(PyUFuncObject *ufunc, const char *signature)
         i = _next_non_white_space(signature, i + 1);
         while (signature[i] != ')') {
             /* loop over core dimensions */
-            int j = 0;
-            if (!_is_alpha_underscore(signature[i])) {
-                parse_error = "expect dimension name";
+            int ix, i_end;
+            npy_intp frozen_size;
+            npy_bool can_ignore;
+
+            if (signature[i] == '\0') {
+                parse_error = "unexpected end of signature string";
                 goto fail;
             }
-            while (j < ufunc->core_num_dim_ix) {
-                if (_is_same_name(signature+i, var_names[j])) {
+            /*
+             * Is this a variable or a fixed size dimension?
+             */
+            if (_is_alpha_underscore(signature[i])) {
+                frozen_size = -1;
+            }
+            else {
+                frozen_size = (npy_intp)_get_size(signature + i);
+                if (frozen_size <= 0) {
+                    parse_error = "expect dimension name or non-zero frozen size";
+                    goto fail;
+                }
+            }
+            /* Is this dimension flexible? */
+            i_end = _get_end_of_name(signature, i);
+            can_ignore = (i_end > 0 && signature[i_end - 1] == '?');
+            /*
+             * Determine whether we already saw this dimension name,
+             * get its index, and set its properties
+             */
+            for(ix = 0; ix < ufunc->core_num_dim_ix; ix++) {
+                if (frozen_size > 0 ?
+                    frozen_size == ufunc->core_dim_sizes[ix] :
+                    _is_same_name(signature + i, var_names[ix])) {
                     break;
                 }
-                j++;
             }
-            if (j >= ufunc->core_num_dim_ix) {
-                var_names[j] = signature+i;
+            /*
+             * If a new dimension, store its properties; if old, check consistency.
+             */
+            if (ix == ufunc->core_num_dim_ix) {
                 ufunc->core_num_dim_ix++;
+                var_names[ix] = signature + i;
+                ufunc->core_dim_sizes[ix] = frozen_size;
+                if (frozen_size < 0) {
+                    ufunc->core_dim_flags[ix] |= UFUNC_CORE_DIM_SIZE_INFERRED;
+                }
+                if (can_ignore) {
+                    ufunc->core_dim_flags[ix] |= UFUNC_CORE_DIM_CAN_IGNORE;
+                }
+            } else {
+                if (can_ignore && !(ufunc->core_dim_flags[ix] &
+                                    UFUNC_CORE_DIM_CAN_IGNORE)) {
+                    parse_error = "? cannot be used, name already seen without ?";
+                    goto fail;
+                }
+                if (!can_ignore && (ufunc->core_dim_flags[ix] &
+                                    UFUNC_CORE_DIM_CAN_IGNORE)) {
+                    parse_error = "? must be used, name already seen with ?";
+                    goto fail;
+                }
             }
-            ufunc->core_dim_ixs[cur_core_dim] = j;
+            ufunc->core_dim_ixs[cur_core_dim] = ix;
             cur_core_dim++;
             nd++;
-            i = _get_end_of_name(signature, i);
-            i = _next_non_white_space(signature, i);
+            i = _next_non_white_space(signature, i_end);
             if (signature[i] != ',' && signature[i] != ')') {
                 parse_error = "expect ',' or ')'";
                 goto fail;
@@ -633,7 +709,14 @@ _parse_signature(PyUFuncObject *ufunc, const char *signature)
         goto fail;
     }
     ufunc->core_dim_ixs = PyArray_realloc(ufunc->core_dim_ixs,
-            sizeof(int)*cur_core_dim);
+            sizeof(int) * cur_core_dim);
+    ufunc->core_dim_sizes = PyArray_realloc(
+            ufunc->core_dim_sizes,
+            sizeof(npy_intp) * ufunc->core_num_dim_ix);
+    ufunc->core_dim_flags = PyArray_realloc(
+            ufunc->core_dim_flags,
+            sizeof(npy_uint32) * ufunc->core_num_dim_ix);
+
     /* check for trivial core-signature, e.g. "(),()->()" */
     if (cur_core_dim == 0) {
         ufunc->core_enabled = 0;
@@ -1935,6 +2018,72 @@ fail:
 }
 
 /*
+ * Validate that operands have enough dimensions, accounting for
+ * possible flexible dimensions that may be absent.
+ */
+static int
+_validate_num_dims(PyUFuncObject *ufunc, PyArrayObject **op,
+                   npy_uint32 *core_dim_flags,
+                   int *op_core_num_dims) {
+    int i, j;
+    int nin = ufunc->nin;
+    int nop = ufunc->nargs;
+
+    for (i = 0; i < nop; i++) {
+        if (op[i] != NULL) {
+            int op_ndim = PyArray_NDIM(op[i]);
+
+            if (op_ndim < op_core_num_dims[i]) {
+                int core_offset = ufunc->core_offsets[i];
+                /* We've too few, but some dimensions might be flexible */
+                for (j = core_offset;
+                     j < core_offset + ufunc->core_num_dims[i]; j++) {
+                    int core_dim_index = ufunc->core_dim_ixs[j];
+                    if ((core_dim_flags[core_dim_index] &
+                         UFUNC_CORE_DIM_CAN_IGNORE)) {
+                        int i1, j1, k;
+                        /*
+                         * Found a dimension that can be ignored. Flag that
+                         * it is missing, and unflag that it can be ignored,
+                         * since we are doing so already.
+                         */
+                        core_dim_flags[core_dim_index] |= UFUNC_CORE_DIM_MISSING;
+                        core_dim_flags[core_dim_index] ^= UFUNC_CORE_DIM_CAN_IGNORE;
+                        /*
+                         * Reduce the number of core dimensions for all
+                         * operands that use this one (including ours),
+                         * and check whether we're now OK.
+                         */
+                        for (i1 = 0, k=0; i1 < nop; i1++) {
+                            for (j1 = 0; j1 < ufunc->core_num_dims[i1]; j1++) {
+                                if (ufunc->core_dim_ixs[k++] == core_dim_index) {
+                                    op_core_num_dims[i1]--;
+                                }
+                            }
+                        }
+                        if (op_ndim == op_core_num_dims[i]) {
+                            break;
+                        }
+                    }
+                }
+                if (op_ndim < op_core_num_dims[i]) {
+                    PyErr_Format(PyExc_ValueError,
+                         "%s: %s operand %d does not have enough "
+                         "dimensions (has %d, gufunc core with "
+                         "signature %s requires %d)",
+                         ufunc_get_name_cstr(ufunc),
+                         i < nin ? "Input" : "Output",
+                         i < nin ? i : i - nin, PyArray_NDIM(op[i]),
+                         ufunc->core_signature, op_core_num_dims[i]);
+                    return -1;
+                }
+            }
+        }
+    }
+    return 0;
+}
+
+/*
  * Check whether any of the outputs of a gufunc has core dimensions.
  */
 static int
@@ -2007,7 +2156,7 @@ _check_keepdims_support(PyUFuncObject *ufunc) {
  * Returns 0 on success, and -1 on failure
  */
 static int
-_parse_axes_arg(PyUFuncObject *ufunc, int core_num_dims[], PyObject *axes,
+_parse_axes_arg(PyUFuncObject *ufunc, int op_core_num_dims[], PyObject *axes,
                 PyArrayObject **op, int broadcast_ndim, int **remap_axis) {
     int nin = ufunc->nin;
     int nop = ufunc->nargs;
@@ -2037,7 +2186,7 @@ _parse_axes_arg(PyUFuncObject *ufunc, int core_num_dims[], PyObject *axes,
         PyObject *op_axes_tuple, *axis_item;
         int axis, op_axis;
 
-        op_ncore = core_num_dims[iop];
+        op_ncore = op_core_num_dims[iop];
         if (op[iop] != NULL) {
             op_ndim = PyArray_NDIM(op[iop]);
             op_nbroadcast = op_ndim - op_ncore;
@@ -2191,57 +2340,72 @@ _parse_axis_arg(PyUFuncObject *ufunc, int core_num_dims[], PyObject *axis,
  *
  * Returns 0 on success, and -1 on failure
  *
- * The behavior has been changed in NumPy 1.10.0, and the following
+ * The behavior has been changed in NumPy 1.16.0, and the following
  * requirements must be fulfilled or an error will be raised:
  *  * Arguments, both input and output, must have at least as many
  *    dimensions as the corresponding number of core dimensions. In
- *    previous versions, 1's were prepended to the shape as needed.
+ *    versions before 1.10, 1's were prepended to the shape as needed.
  *  * Core dimensions with same labels must have exactly matching sizes.
- *    In previous versions, core dimensions of size 1 would broadcast
+ *    In versions before 1.10, core dimensions of size 1 would broadcast
  *    against other core dimensions with the same label.
  *  * All core dimensions must have their size specified by a passed in
- *    input or output argument. In previous versions, core dimensions in
+ *    input or output argument. In versions before 1.10, core dimensions in
  *    an output argument that were not specified in an input argument,
  *    and whose size could not be inferred from a passed in output
  *    argument, would have their size set to 1.
+ *  * Core dimensions may be fixed, new in NumPy 1.16
  */
 static int
 _get_coredim_sizes(PyUFuncObject *ufunc, PyArrayObject **op,
-                   npy_intp* core_dim_sizes, int **remap_axis) {
+                   int *op_core_num_dims, npy_uint32 *core_dim_flags,
+                   npy_intp *core_dim_sizes, int **remap_axis) {
     int i;
     int nin = ufunc->nin;
     int nout = ufunc->nout;
     int nop = nin + nout;
 
-    for (i = 0; i < ufunc->core_num_dim_ix; ++i) {
-        core_dim_sizes[i] = -1;
-    }
     for (i = 0; i < nop; ++i) {
         if (op[i] != NULL) {
             int idim;
             int dim_offset = ufunc->core_offsets[i];
-            int num_dims = ufunc->core_num_dims[i];
-            int core_start_dim = PyArray_NDIM(op[i]) - num_dims;
+            int core_start_dim = PyArray_NDIM(op[i]) - op_core_num_dims[i];
+            int dim_delta = 0;
+
+            /* checked before this routine gets called */
+            assert(core_start_dim >= 0);
+
             /*
              * Make sure every core dimension exactly matches all other core
-             * dimensions with the same label.
+             * dimensions with the same label. Note that flexible dimensions
+             * may have been removed at this point, if so, they are marked
+             * with UFUNC_CORE_DIM_MISSING.
              */
-            for (idim = 0; idim < num_dims; ++idim) {
-                int core_dim_index = ufunc->core_dim_ixs[dim_offset+idim];
-                npy_intp op_dim_size = PyArray_DIM(
-                    op[i], REMAP_AXIS(i, core_start_dim+idim));
-
-                if (core_dim_sizes[core_dim_index] == -1) {
+            for (idim = 0; idim < ufunc->core_num_dims[i]; ++idim) {
+                int core_index = dim_offset + idim;
+                int core_dim_index = ufunc->core_dim_ixs[core_index];
+                npy_intp core_dim_size = core_dim_sizes[core_dim_index];
+                npy_intp op_dim_size;
+
+                /* can only happen if flexible; dimension missing altogether */
+                if (core_dim_flags[core_dim_index] & UFUNC_CORE_DIM_MISSING) {
+                    op_dim_size = 1;
+                    dim_delta++; /* for indexing in dimensions */
+                }
+                else {
+                    op_dim_size = PyArray_DIM(op[i],
+                             REMAP_AXIS(i, core_start_dim + idim - dim_delta));
+                }
+                if (core_dim_sizes[core_dim_index] < 0) {
                     core_dim_sizes[core_dim_index] = op_dim_size;
                 }
-                else if (op_dim_size != core_dim_sizes[core_dim_index]) {
+                else if (op_dim_size != core_dim_size) {
                     PyErr_Format(PyExc_ValueError,
                             "%s: %s operand %d has a mismatch in its "
                             "core dimension %d, with gufunc "
                             "signature %s (size %zd is different "
                             "from %zd)",
                             ufunc_get_name_cstr(ufunc), i < nin ? "Input" : "Output",
-                            i < nin ? i : i - nin, idim,
+                            i < nin ? i : i - nin, idim - dim_delta,
                             ufunc->core_signature, op_dim_size,
                             core_dim_sizes[core_dim_index]);
                     return -1;
@@ -2253,39 +2417,29 @@ _get_coredim_sizes(PyUFuncObject *ufunc, PyArrayObject **op,
     /*
      * Make sure no core dimension is unspecified.
      */
-    for (i = 0; i < ufunc->core_num_dim_ix; ++i) {
-        if (core_dim_sizes[i] == -1) {
-            break;
-        }
-    }
-    if (i != ufunc->core_num_dim_ix) {
-        /*
-         * There is at least one core dimension missing, find in which
-         * operand it comes up first (it has to be an output operand).
-         */
-        const int missing_core_dim = i;
-        int out_op;
-        for (out_op = nin; out_op < nop; ++out_op) {
-            int first_idx = ufunc->core_offsets[out_op];
-            int last_idx = first_idx + ufunc->core_num_dims[out_op];
-            for (i = first_idx; i < last_idx; ++i) {
-                if (ufunc->core_dim_ixs[i] == missing_core_dim) {
-                    break;
-                }
-            }
-            if (i < last_idx) {
-                /* Change index offsets for error message */
-                out_op -= nin;
-                i -= first_idx;
-                break;
+    for (i = nin; i < nop; ++i) {
+        int idim;
+        int dim_offset = ufunc->core_offsets[i];
+
+        for (idim = 0; idim < ufunc->core_num_dims[i]; ++idim) {
+            int core_dim_index = ufunc->core_dim_ixs[dim_offset + idim];
+
+            /* check all cases where the size has not yet been set */
+            if (core_dim_sizes[core_dim_index] < 0) {
+                /*
+                 * Oops, this dimension was never specified
+                 * (can only happen if output op not given)
+                 */
+                PyErr_Format(PyExc_ValueError,
+                        "%s: Output operand %d has core dimension %d "
+                        "unspecified, with gufunc signature %s",
+                        ufunc_get_name_cstr(ufunc), i - nin, idim,
+                        ufunc->core_signature);
+                return -1;
             }
         }
-        PyErr_Format(PyExc_ValueError,
-                     "%s: Output operand %d has core dimension %d "
-                     "unspecified, with gufunc signature %s",
-                     ufunc_get_name_cstr(ufunc), out_op, i, ufunc->core_signature);
-        return -1;
     }
+
     return 0;
 }
 
@@ -2324,6 +2478,26 @@ _get_identity(PyUFuncObject *ufunc, npy_bool *reorderable) {
     }
 }
 
+/*
+ * Copy over parts of the ufunc structure that may need to be
+ * changed during execution.  Returns 0 on success; -1 otherwise.
+ */
+static int
+_initialize_variable_parts(PyUFuncObject *ufunc,
+                           int op_core_num_dims[],
+                           npy_intp core_dim_sizes[],
+                           npy_uint32 core_dim_flags[]) {
+    int i;
+
+    for (i = 0; i < ufunc->nargs; i++) {
+        op_core_num_dims[i] = ufunc->core_num_dims[i];
+    }
+    for (i = 0; i < ufunc->core_num_dim_ix; i++) {
+        core_dim_sizes[i] = ufunc->core_dim_sizes[i];
+        core_dim_flags[i] = ufunc->core_dim_flags[i];
+    }
+    return 0;
+}
 
 static int
 PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
@@ -2340,10 +2514,10 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
 
     /* Use remapped axes for generalized ufunc */
     int broadcast_ndim, iter_ndim;
-    int core_num_dims_array[NPY_MAXARGS];
-    int *core_num_dims;
+    int op_core_num_dims[NPY_MAXARGS];
     int op_axes_arrays[NPY_MAXARGS][NPY_MAXDIMS];
     int *op_axes[NPY_MAXARGS];
+    npy_uint32 core_dim_flags[NPY_MAXARGS];
 
     npy_uint32 op_flags[NPY_MAXARGS];
     npy_intp iter_shape[NPY_MAXARGS];
@@ -2398,6 +2572,12 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
         dtypes[i] = NULL;
         arr_prep[i] = NULL;
     }
+    /* Initialize possibly variable parts to the values from the ufunc */
+    retval = _initialize_variable_parts(ufunc, op_core_num_dims,
+                                        core_dim_sizes, core_dim_flags);
+    if (retval < 0) {
+        goto fail;
+    }
 
     NPY_UF_DBG_PRINT("Getting arguments\n");
 
@@ -2429,41 +2609,28 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
         }
     }
     /*
-     * If keepdims is set and true, signal all dimensions will be the same.
+     * If keepdims is set and true, which means all input dimensions are
+     * the same, signal that all output dimensions will be the same too.
      */
     if (keepdims == 1) {
-        int num_dims = ufunc->core_num_dims[0];
-        for (i = 0; i < nop; ++i) {
-            core_num_dims_array[i] = num_dims;
+        int num_dims = op_core_num_dims[0];
+        for (i = nin; i < nop; ++i) {
+            op_core_num_dims[i] = num_dims;
         }
-        core_num_dims = core_num_dims_array;
     }
     else {
         /* keepdims was not set or was false; no adjustment necessary */
-        core_num_dims = ufunc->core_num_dims;
         keepdims = 0;
     }
     /*
      * Check that operands have the minimum dimensions required.
      * (Just checks core; broadcast dimensions are tested by the iterator.)
      */
-    for (i = 0; i < nop; i++) {
-        if (op[i] != NULL && PyArray_NDIM(op[i]) < core_num_dims[i]) {
-            PyErr_Format(PyExc_ValueError,
-                         "%s: %s operand %d does not have enough "
-                         "dimensions (has %d, gufunc core with "
-                         "signature %s requires %d)",
-                         ufunc_name,
-                         i < nin ? "Input" : "Output",
-                         i < nin ? i : i - nin,
-                         PyArray_NDIM(op[i]),
-                         ufunc->core_signature,
-                         core_num_dims[i]);
-            retval = -1;
-            goto fail;
-        }
+    retval = _validate_num_dims(ufunc, op, core_dim_flags,
+                                op_core_num_dims);
+    if (retval < 0) {
+        goto fail;
     }
-
     /*
      * Figure out the number of iteration dimensions, which
      * is the broadcast result of all the input non-core
@@ -2471,30 +2638,12 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
      */
     broadcast_ndim = 0;
     for (i = 0; i < nin; ++i) {
-        int n = PyArray_NDIM(op[i]) - core_num_dims[i];
+        int n = PyArray_NDIM(op[i]) - op_core_num_dims[i];
         if (n > broadcast_ndim) {
             broadcast_ndim = n;
         }
     }
 
-    /*
-     * Figure out the number of iterator creation dimensions,
-     * which is the broadcast dimensions + all the core dimensions of
-     * the outputs, so that the iterator can allocate those output
-     * dimensions following the rules of order='F', for example.
-     */
-    iter_ndim = broadcast_ndim;
-    for (i = nin; i < nop; ++i) {
-        iter_ndim += core_num_dims[i];
-    }
-    if (iter_ndim > NPY_MAXDIMS) {
-        PyErr_Format(PyExc_ValueError,
-                    "too many dimensions for generalized ufunc %s",
-                    ufunc_name);
-        retval = -1;
-        goto fail;
-    }
-
     /* Possibly remap axes. */
     if (axes != NULL || axis != NULL) {
         remap_axis = PyArray_malloc(sizeof(remap_axis[0]) * nop);
@@ -2508,11 +2657,11 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
             remap_axis[i] = remap_axis_memory + i * NPY_MAXDIMS;
         }
         if (axis) {
-            retval = _parse_axis_arg(ufunc, core_num_dims, axis, op,
+            retval = _parse_axis_arg(ufunc, op_core_num_dims, axis, op,
                                      broadcast_ndim, remap_axis);
         }
         else {
-            retval = _parse_axes_arg(ufunc, core_num_dims, axes, op,
+            retval = _parse_axes_arg(ufunc, op_core_num_dims, axes, op,
                                      broadcast_ndim, remap_axis);
         }
         if(retval < 0) {
@@ -2521,10 +2670,28 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
     }
 
     /* Collect the lengths of the labelled core dimensions */
-    retval = _get_coredim_sizes(ufunc, op, core_dim_sizes, remap_axis);
+    retval = _get_coredim_sizes(ufunc, op, op_core_num_dims, core_dim_flags,
+                                core_dim_sizes, remap_axis);
     if(retval < 0) {
         goto fail;
     }
+    /*
+     * Figure out the number of iterator creation dimensions,
+     * which is the broadcast dimensions + all the core dimensions of
+     * the outputs, so that the iterator can allocate those output
+     * dimensions following the rules of order='F', for example.
+     */
+    iter_ndim = broadcast_ndim;
+    for (i = nin; i < nop; ++i) {
+        iter_ndim += op_core_num_dims[i];
+    }
+    if (iter_ndim > NPY_MAXDIMS) {
+        PyErr_Format(PyExc_ValueError,
+                    "too many dimensions for generalized ufunc %s",
+                    ufunc_name);
+        retval = -1;
+        goto fail;
+    }
 
     /* Fill in the initial part of 'iter_shape' */
     for (idim = 0; idim < broadcast_ndim; ++idim) {
@@ -2537,11 +2704,7 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
         int n;
 
         if (op[i]) {
-            /*
-             * Note that n may be negative if broadcasting
-             * extends into the core dimensions.
-             */
-            n = PyArray_NDIM(op[i]) - core_num_dims[i];
+            n = PyArray_NDIM(op[i]) - op_core_num_dims[i];
         }
         else {
             n = broadcast_ndim;
@@ -2565,24 +2728,49 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
         /* Except for when it belongs to this output */
         if (i >= nin) {
             int dim_offset = ufunc->core_offsets[i];
-            int num_dims = core_num_dims[i];
+            int num_removed = 0;
             /*
              * Fill in 'iter_shape' and 'op_axes' for the core dimensions
              * of this output. Here, we have to be careful: if keepdims
-             * was used, then this axis is not a real core dimension,
-             * but is being added back for broadcasting, so its size is 1.
+             * was used, then the axes are not real core dimensions, but
+             * are being added back for broadcasting, so their size is 1.
+             * If the axis was removed, we should skip altogether.
              */
-            for (idim = 0; idim < num_dims; ++idim) {
-                iter_shape[j] = keepdims ? 1 : core_dim_sizes[
-                                        ufunc->core_dim_ixs[dim_offset + idim]];
-                op_axes_arrays[i][j] = REMAP_AXIS(i, n + idim);
-                ++j;
+            if (keepdims) {
+                for (idim = 0; idim < op_core_num_dims[i]; ++idim) {
+                    iter_shape[j] = 1;
+                    op_axes_arrays[i][j] = REMAP_AXIS(i, n + idim);
+                    ++j;
+                }
+            }
+            else {
+                for (idim = 0; idim < ufunc->core_num_dims[i]; ++idim) {
+                    int core_index = dim_offset + idim;
+                    int core_dim_index = ufunc->core_dim_ixs[core_index];
+                    if ((core_dim_flags[core_dim_index] &
+                         UFUNC_CORE_DIM_MISSING)) {
+                        /* skip it */
+                        num_removed++;
+                        continue;
+                    }
+                    iter_shape[j] = core_dim_sizes[ufunc->core_dim_ixs[core_index]];
+                    op_axes_arrays[i][j] = REMAP_AXIS(i, n + idim - num_removed);
+                    ++j;
+                }
             }
         }
 
         op_axes[i] = op_axes_arrays[i];
     }
 
+#if NPY_UF_DBG_TRACING
+    printf("iter shapes:");
+    for (j=0; j < iter_ndim; j++) {
+        printf(" %ld", iter_shape[j]);
+    }
+    printf("\n");
+#endif
+
     /* Get the buffersize and errormask */
     if (_get_bufsize_errmask(extobj, ufunc_name, &buffersize, &errormask) < 0) {
         retval = -1;
@@ -2705,8 +2893,6 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
     /* Copy the strides after the first nop */
     idim = nop;
     for (i = 0; i < nop; ++i) {
-        int num_dims = ufunc->core_num_dims[i];
-        int core_start_dim = PyArray_NDIM(op[i]) - num_dims;
         /*
          * Need to use the arrays in the iterator, not op, because
          * a copy with a different-sized type may have been made.
@@ -2714,20 +2900,31 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
         PyArrayObject *arr = NpyIter_GetOperandArray(iter)[i];
         npy_intp *shape = PyArray_SHAPE(arr);
         npy_intp *strides = PyArray_STRIDES(arr);
-        for (j = 0; j < num_dims; ++j) {
-            if (core_start_dim + j >= 0) {
-                /*
-                 * Force the stride to zero when the shape is 1, so
-                 * that the broadcasting works right.
-                 */
-                int remapped_axis = REMAP_AXIS(i, core_start_dim + j);
+        /*
+         * Could be negative if flexible dims are used, but not for
+         * keepdims, since those dimensions are allocated in arr.
+         */
+        int core_start_dim = PyArray_NDIM(arr) - op_core_num_dims[i];
+        int num_removed = 0;
+        int dim_offset = ufunc->core_offsets[i];
+
+        for (j = 0; j < ufunc->core_num_dims[i]; ++j) {
+            int core_dim_index = ufunc->core_dim_ixs[dim_offset + j];
+            /*
+             * Force zero stride when the shape is 1 (always the case for
+             * for missing dimensions), so that broadcasting works right.
+             */
+            if (core_dim_flags[core_dim_index] & UFUNC_CORE_DIM_MISSING) {
+                num_removed++;
+                inner_strides[idim++] = 0;
+            }
+            else {
+                int remapped_axis = REMAP_AXIS(i, core_start_dim + j - num_removed);
                 if (shape[remapped_axis] != 1) {
                     inner_strides[idim++] = strides[remapped_axis];
                 } else {
                     inner_strides[idim++] = 0;
                 }
-            } else {
-                inner_strides[idim++] = 0;
             }
         }
     }
@@ -4644,7 +4841,6 @@ PyUFunc_FromFuncAndDataAndSignature(PyUFuncGenericFunction *func, void **data,
                                      int unused, const char *signature)
 {
     PyUFuncObject *ufunc;
-
     if (nin + nout > NPY_MAXARGS) {
         PyErr_Format(PyExc_ValueError,
                      "Cannot construct a ufunc with more than %d operands "
@@ -4657,11 +4853,9 @@ PyUFunc_FromFuncAndDataAndSignature(PyUFuncGenericFunction *func, void **data,
     if (ufunc == NULL) {
         return NULL;
     }
+    memset(ufunc, 0, sizeof(PyUFuncObject));
     PyObject_Init((PyObject *)ufunc, &PyUFunc_Type);
 
-    ufunc->reserved1 = 0;
-    ufunc->reserved2 = NULL;
-
     ufunc->nin = nin;
     ufunc->nout = nout;
     ufunc->nargs = nin+nout;
@@ -4671,9 +4865,6 @@ PyUFunc_FromFuncAndDataAndSignature(PyUFuncGenericFunction *func, void **data,
     ufunc->data = data;
     ufunc->types = types;
     ufunc->ntypes = ntypes;
-    ufunc->ptr = NULL;
-    ufunc->obj = NULL;
-    ufunc->userloops=NULL;
 
     /* Type resolution and inner loop selection functions */
     ufunc->type_resolver = &PyUFunc_DefaultTypeResolver;
@@ -4694,15 +4885,6 @@ PyUFunc_FromFuncAndDataAndSignature(PyUFuncGenericFunction *func, void **data,
     }
     memset(ufunc->op_flags, 0, sizeof(npy_uint32)*ufunc->nargs);
 
-    ufunc->iter_flags = 0;
-
-    /* generalized ufunc */
-    ufunc->core_enabled = 0;
-    ufunc->core_num_dim_ix = 0;
-    ufunc->core_num_dims = NULL;
-    ufunc->core_dim_ixs = NULL;
-    ufunc->core_offsets = NULL;
-    ufunc->core_signature = NULL;
     if (signature != NULL) {
         if (_parse_signature(ufunc, signature) != 0) {
             Py_DECREF(ufunc);
diff --git a/numpy/core/src/umath/ufunc_type_resolution.c b/numpy/core/src/umath/ufunc_type_resolution.c
index 807b03512..5ddfe29ef 100644
--- a/numpy/core/src/umath/ufunc_type_resolution.c
+++ b/numpy/core/src/umath/ufunc_type_resolution.c
@@ -40,6 +40,25 @@ npy_casting_to_string(NPY_CASTING casting)
             return "<unknown>";
     }
 }
+
+static int
+raise_binary_type_reso_error(PyUFuncObject *ufunc, PyArrayObject **operands) {
+    PyObject *errmsg;
+    const char *ufunc_name = ufunc_get_name_cstr(ufunc);
+    errmsg = PyUString_FromFormat("ufunc %s cannot use operands "
+                        "with types ", ufunc_name);
+    PyUString_ConcatAndDel(&errmsg,
+            PyObject_Repr((PyObject *)PyArray_DESCR(operands[0])));
+    PyUString_ConcatAndDel(&errmsg,
+            PyUString_FromString(" and "));
+    PyUString_ConcatAndDel(&errmsg,
+            PyObject_Repr((PyObject *)PyArray_DESCR(operands[1])));
+    PyErr_SetObject(PyExc_TypeError, errmsg);
+    Py_DECREF(errmsg);
+    return -1;
+}
+
+
 /*UFUNC_API
  *
  * Validates that the input operands can be cast to
@@ -605,7 +624,6 @@ PyUFunc_AdditionTypeResolver(PyUFuncObject *ufunc,
 {
     int type_num1, type_num2;
     int i;
-    const char *ufunc_name = ufunc_get_name_cstr(ufunc);
 
     type_num1 = PyArray_DESCR(operands[0])->type_num;
     type_num2 = PyArray_DESCR(operands[1])->type_num;
@@ -661,7 +679,7 @@ PyUFunc_AdditionTypeResolver(PyUFuncObject *ufunc,
             type_num2 = NPY_TIMEDELTA;
         }
         else {
-            goto type_reso_error;
+            return raise_binary_type_reso_error(ufunc, operands);
         }
     }
     else if (type_num1 == NPY_DATETIME) {
@@ -703,7 +721,7 @@ PyUFunc_AdditionTypeResolver(PyUFuncObject *ufunc,
             type_num2 = NPY_TIMEDELTA;
         }
         else {
-            goto type_reso_error;
+            return raise_binary_type_reso_error(ufunc, operands);
         }
     }
     else if (PyTypeNum_ISINTEGER(type_num1) || PyTypeNum_ISBOOL(type_num1)) {
@@ -739,11 +757,11 @@ PyUFunc_AdditionTypeResolver(PyUFuncObject *ufunc,
             type_num1 = NPY_TIMEDELTA;
         }
         else {
-            goto type_reso_error;
+            return raise_binary_type_reso_error(ufunc, operands);
         }
     }
     else {
-        goto type_reso_error;
+        return raise_binary_type_reso_error(ufunc, operands);
     }
 
     /* Check against the casting rules */
@@ -756,21 +774,6 @@ PyUFunc_AdditionTypeResolver(PyUFuncObject *ufunc,
     }
 
     return 0;
-
-type_reso_error: {
-        PyObject *errmsg;
-        errmsg = PyUString_FromFormat("ufunc %s cannot use operands "
-                            "with types ", ufunc_name);
-        PyUString_ConcatAndDel(&errmsg,
-                PyObject_Repr((PyObject *)PyArray_DESCR(operands[0])));
-        PyUString_ConcatAndDel(&errmsg,
-                PyUString_FromString(" and "));
-        PyUString_ConcatAndDel(&errmsg,
-                PyObject_Repr((PyObject *)PyArray_DESCR(operands[1])));
-        PyErr_SetObject(PyExc_TypeError, errmsg);
-        Py_DECREF(errmsg);
-        return -1;
-    }
 }
 
 /*
@@ -793,7 +796,6 @@ PyUFunc_SubtractionTypeResolver(PyUFuncObject *ufunc,
 {
     int type_num1, type_num2;
     int i;
-    const char *ufunc_name = ufunc_get_name_cstr(ufunc);
 
     type_num1 = PyArray_DESCR(operands[0])->type_num;
     type_num2 = PyArray_DESCR(operands[1])->type_num;
@@ -846,7 +848,7 @@ PyUFunc_SubtractionTypeResolver(PyUFuncObject *ufunc,
             type_num2 = NPY_TIMEDELTA;
         }
         else {
-            goto type_reso_error;
+            return raise_binary_type_reso_error(ufunc, operands);
         }
     }
     else if (type_num1 == NPY_DATETIME) {
@@ -904,7 +906,7 @@ PyUFunc_SubtractionTypeResolver(PyUFuncObject *ufunc,
             Py_INCREF(out_dtypes[1]);
         }
         else {
-            goto type_reso_error;
+            return raise_binary_type_reso_error(ufunc, operands);
         }
     }
     else if (PyTypeNum_ISINTEGER(type_num1) || PyTypeNum_ISBOOL(type_num1)) {
@@ -922,11 +924,11 @@ PyUFunc_SubtractionTypeResolver(PyUFuncObject *ufunc,
             type_num1 = NPY_TIMEDELTA;
         }
         else {
-            goto type_reso_error;
+            return raise_binary_type_reso_error(ufunc, operands);
         }
     }
     else {
-        goto type_reso_error;
+        return raise_binary_type_reso_error(ufunc, operands);
     }
 
     /* Check against the casting rules */
@@ -939,21 +941,6 @@ PyUFunc_SubtractionTypeResolver(PyUFuncObject *ufunc,
     }
 
     return 0;
-
-type_reso_error: {
-        PyObject *errmsg;
-        errmsg = PyUString_FromFormat("ufunc %s cannot use operands "
-                            "with types ", ufunc_name);
-        PyUString_ConcatAndDel(&errmsg,
-                PyObject_Repr((PyObject *)PyArray_DESCR(operands[0])));
-        PyUString_ConcatAndDel(&errmsg,
-                PyUString_FromString(" and "));
-        PyUString_ConcatAndDel(&errmsg,
-                PyObject_Repr((PyObject *)PyArray_DESCR(operands[1])));
-        PyErr_SetObject(PyExc_TypeError, errmsg);
-        Py_DECREF(errmsg);
-        return -1;
-    }
 }
 
 /*
@@ -973,7 +960,6 @@ PyUFunc_MultiplicationTypeResolver(PyUFuncObject *ufunc,
 {
     int type_num1, type_num2;
     int i;
-    const char *ufunc_name = ufunc_get_name_cstr(ufunc);
 
     type_num1 = PyArray_DESCR(operands[0])->type_num;
     type_num2 = PyArray_DESCR(operands[1])->type_num;
@@ -1020,7 +1006,7 @@ PyUFunc_MultiplicationTypeResolver(PyUFuncObject *ufunc,
             type_num2 = NPY_DOUBLE;
         }
         else {
-            goto type_reso_error;
+            return raise_binary_type_reso_error(ufunc, operands);
         }
     }
     else if (PyTypeNum_ISINTEGER(type_num1) || PyTypeNum_ISBOOL(type_num1)) {
@@ -1042,7 +1028,7 @@ PyUFunc_MultiplicationTypeResolver(PyUFuncObject *ufunc,
             type_num1 = NPY_LONGLONG;
         }
         else {
-            goto type_reso_error;
+            return raise_binary_type_reso_error(ufunc, operands);
         }
     }
     else if (PyTypeNum_ISFLOAT(type_num1)) {
@@ -1064,11 +1050,11 @@ PyUFunc_MultiplicationTypeResolver(PyUFuncObject *ufunc,
             type_num1 = NPY_DOUBLE;
         }
         else {
-            goto type_reso_error;
+            return raise_binary_type_reso_error(ufunc, operands);
         }
     }
     else {
-        goto type_reso_error;
+        return raise_binary_type_reso_error(ufunc, operands);
     }
 
     /* Check against the casting rules */
@@ -1081,21 +1067,6 @@ PyUFunc_MultiplicationTypeResolver(PyUFuncObject *ufunc,
     }
 
     return 0;
-
-type_reso_error: {
-        PyObject *errmsg;
-        errmsg = PyUString_FromFormat("ufunc %s cannot use operands "
-                            "with types ", ufunc_name);
-        PyUString_ConcatAndDel(&errmsg,
-                PyObject_Repr((PyObject *)PyArray_DESCR(operands[0])));
-        PyUString_ConcatAndDel(&errmsg,
-                PyUString_FromString(" and "));
-        PyUString_ConcatAndDel(&errmsg,
-                PyObject_Repr((PyObject *)PyArray_DESCR(operands[1])));
-        PyErr_SetObject(PyExc_TypeError, errmsg);
-        Py_DECREF(errmsg);
-        return -1;
-    }
 }
 
 
@@ -1115,7 +1086,6 @@ PyUFunc_DivisionTypeResolver(PyUFuncObject *ufunc,
 {
     int type_num1, type_num2;
     int i;
-    const char *ufunc_name = ufunc_get_name_cstr(ufunc);
 
     type_num1 = PyArray_DESCR(operands[0])->type_num;
     type_num2 = PyArray_DESCR(operands[1])->type_num;
@@ -1183,11 +1153,11 @@ PyUFunc_DivisionTypeResolver(PyUFuncObject *ufunc,
             type_num2 = NPY_DOUBLE;
         }
         else {
-            goto type_reso_error;
+            return raise_binary_type_reso_error(ufunc, operands);
         }
     }
     else {
-        goto type_reso_error;
+        return raise_binary_type_reso_error(ufunc, operands);
     }
 
     /* Check against the casting rules */
@@ -1200,21 +1170,6 @@ PyUFunc_DivisionTypeResolver(PyUFuncObject *ufunc,
     }
 
     return 0;
-
-type_reso_error: {
-        PyObject *errmsg;
-        errmsg = PyUString_FromFormat("ufunc %s cannot use operands "
-                            "with types ", ufunc_name);
-        PyUString_ConcatAndDel(&errmsg,
-                PyObject_Repr((PyObject *)PyArray_DESCR(operands[0])));
-        PyUString_ConcatAndDel(&errmsg,
-                PyUString_FromString(" and "));
-        PyUString_ConcatAndDel(&errmsg,
-                PyObject_Repr((PyObject *)PyArray_DESCR(operands[1])));
-        PyErr_SetObject(PyExc_TypeError, errmsg);
-        Py_DECREF(errmsg);
-        return -1;
-    }
 }
 
 
@@ -1275,7 +1230,7 @@ PyUFunc_MixedDivisionTypeResolver(PyUFuncObject *ufunc,
                                   PyObject *type_tup,
                                   PyArray_Descr **out_dtypes)
 {
- /* Depreciation checks needed only on python 2 */
+ /* Deprecation checks needed only on python 2 */
 #if !defined(NPY_PY3K)
     int type_num1, type_num2;
 
diff --git a/numpy/core/tests/_locales.py b/numpy/core/tests/_locales.py
index 28eebb14d..52e4ff36d 100644
--- a/numpy/core/tests/_locales.py
+++ b/numpy/core/tests/_locales.py
@@ -6,7 +6,7 @@ from __future__ import division, absolute_import, print_function
 import sys
 import locale
 
-from numpy.testing import SkipTest
+import pytest
 
 __ALL__ = ['CommaDecimalPointLocale']
 
@@ -52,7 +52,7 @@ class CommaDecimalPointLocale(object):
     tests with locale.LC_NUMERIC set to a locale where commas (',') are used as
     the decimal point instead of periods ('.'). On exit the locale is restored
     to the initial locale. It also serves as context manager with the same
-    effect. If no such locale is available, it raises SkipTest in both cases.
+    effect. If no such locale is available, the test is skipped.
 
     .. versionadded:: 1.15.0
 
@@ -61,7 +61,7 @@ class CommaDecimalPointLocale(object):
 
     def setup(self):
         if self.tst_locale is None:
-            raise SkipTest("No French locale available")
+            pytest.skip("No French locale available")
         locale.setlocale(locale.LC_NUMERIC, locale=self.tst_locale)
 
     def teardown(self):
@@ -69,7 +69,7 @@ class CommaDecimalPointLocale(object):
 
     def __enter__(self):
         if self.tst_locale is None:
-            raise SkipTest("No French locale available")
+            pytest.skip("No French locale available")
         locale.setlocale(locale.LC_NUMERIC, locale=self.tst_locale)
 
     def __exit__(self, type, value, traceback):
diff --git a/numpy/core/tests/test_datetime.py b/numpy/core/tests/test_datetime.py
index 8356615c1..e4446e07f 100644
--- a/numpy/core/tests/test_datetime.py
+++ b/numpy/core/tests/test_datetime.py
@@ -1,14 +1,14 @@
 from __future__ import division, absolute_import, print_function
 
-import pickle
 
 import numpy
 import numpy as np
 import datetime
 import pytest
 from numpy.testing import (
-    assert_, assert_equal, assert_raises, assert_warns, suppress_warnings
+    assert_, assert_equal, assert_raises, assert_warns, suppress_warnings,
     )
+from numpy.core.numeric import pickle
 
 # Use pytz to test out various time zones if available
 try:
@@ -130,13 +130,10 @@ class TestDateTime(object):
 
     def test_compare_generic_nat(self):
         # regression tests for gh-6452
-        assert_equal(np.datetime64('NaT'),
-                     np.datetime64('2000') + np.timedelta64('NaT'))
-        # nb. we may want to make NaT != NaT true in the future
-        with suppress_warnings() as sup:
-            sup.filter(FutureWarning, ".*NAT ==")
-            assert_(np.datetime64('NaT') == np.datetime64('NaT', 'us'))
-            assert_(np.datetime64('NaT', 'us') == np.datetime64('NaT'))
+        assert_(np.datetime64('NaT') !=
+                np.datetime64('2000') + np.timedelta64('NaT'))
+        assert_(np.datetime64('NaT') != np.datetime64('NaT', 'us'))
+        assert_(np.datetime64('NaT', 'us') != np.datetime64('NaT'))
 
     def test_datetime_scalar_construction(self):
         # Construct with different units
@@ -641,14 +638,17 @@ class TestDateTime(object):
 
     def test_pickle(self):
         # Check that pickle roundtripping works
-        dt = np.dtype('M8[7D]')
-        assert_equal(pickle.loads(pickle.dumps(dt)), dt)
-        dt = np.dtype('M8[W]')
-        assert_equal(pickle.loads(pickle.dumps(dt)), dt)
-        scalar = np.datetime64('2016-01-01T00:00:00.000000000')
-        assert_equal(pickle.loads(pickle.dumps(scalar)), scalar)
-        delta = scalar - np.datetime64('2015-01-01T00:00:00.000000000')
-        assert_equal(pickle.loads(pickle.dumps(delta)), delta)
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            dt = np.dtype('M8[7D]')
+            assert_equal(pickle.loads(pickle.dumps(dt, protocol=proto)), dt)
+            dt = np.dtype('M8[W]')
+            assert_equal(pickle.loads(pickle.dumps(dt, protocol=proto)), dt)
+            scalar = np.datetime64('2016-01-01T00:00:00.000000000')
+            assert_equal(pickle.loads(pickle.dumps(scalar, protocol=proto)),
+                         scalar)
+            delta = scalar - np.datetime64('2015-01-01T00:00:00.000000000')
+            assert_equal(pickle.loads(pickle.dumps(delta, protocol=proto)),
+                         delta)
 
         # Check that loading pickles from 1.6 works
         pkl = b"cnumpy\ndtype\np0\n(S'M8'\np1\nI0\nI1\ntp2\nRp3\n" + \
@@ -1169,47 +1169,23 @@ class TestDateTime(object):
         td_nat = np.timedelta64('NaT', 'h')
         td_other = np.timedelta64(1, 'h')
 
-        with suppress_warnings() as sup:
-            # The assert warns contexts will again see the warning:
-            sup.filter(FutureWarning, ".*NAT")
-
-            for op in [np.equal, np.less, np.less_equal,
-                       np.greater, np.greater_equal]:
-                if op(dt_nat, dt_nat):
-                    assert_warns(FutureWarning, op, dt_nat, dt_nat)
-                if op(dt_nat, dt_other):
-                    assert_warns(FutureWarning, op, dt_nat, dt_other)
-                if op(dt_other, dt_nat):
-                    assert_warns(FutureWarning, op, dt_other, dt_nat)
-                if op(td_nat, td_nat):
-                    assert_warns(FutureWarning, op, td_nat, td_nat)
-                if op(td_nat, td_other):
-                    assert_warns(FutureWarning, op, td_nat, td_other)
-                if op(td_other, td_nat):
-                    assert_warns(FutureWarning, op, td_other, td_nat)
-
-            assert_warns(FutureWarning, np.not_equal, dt_nat, dt_nat)
-            assert_warns(FutureWarning, np.not_equal, td_nat, td_nat)
-
-        with suppress_warnings() as sup:
-            sup.record(FutureWarning)
-            assert_(np.not_equal(dt_nat, dt_other))
-            assert_(np.not_equal(dt_other, dt_nat))
-            assert_(np.not_equal(td_nat, td_other))
-            assert_(np.not_equal(td_other, td_nat))
-            assert_equal(len(sup.log), 0)
-
-    def test_datetime_futurewarning_once_nat(self):
-        # Test that the futurewarning is only given once per inner loop
-        arr1 = np.array(['NaT', 'NaT', '2000-01-01'] * 2, dtype='M8[s]')
-        arr2 = np.array(['NaT', '2000-01-01', 'NaT'] * 2, dtype='M8[s]')
-        # All except less, because for less it can't be wrong (NaT is min)
         for op in [np.equal, np.less, np.less_equal,
                    np.greater, np.greater_equal]:
-            with suppress_warnings() as sup:
-                rec = sup.record(FutureWarning, ".*NAT")
-                op(arr1, arr2)
-                assert_(len(rec) == 1, "failed for {}".format(op))
+            assert_(not op(dt_nat, dt_nat))
+            assert_(not op(dt_nat, dt_other))
+            assert_(not op(dt_other, dt_nat))
+
+            assert_(not op(td_nat, td_nat))
+            assert_(not op(td_nat, td_other))
+            assert_(not op(td_other, td_nat))
+
+        assert_(np.not_equal(dt_nat, dt_nat))
+        assert_(np.not_equal(dt_nat, dt_other))
+        assert_(np.not_equal(dt_other, dt_nat))
+
+        assert_(np.not_equal(td_nat, td_nat))
+        assert_(np.not_equal(td_nat, td_other))
+        assert_(np.not_equal(td_other, td_nat))
 
     def test_datetime_minmax(self):
         # The metadata of the result should become the GCD
diff --git a/numpy/core/tests/test_defchararray.py b/numpy/core/tests/test_defchararray.py
index 43f1b71c7..7b0e6f8a4 100644
--- a/numpy/core/tests/test_defchararray.py
+++ b/numpy/core/tests/test_defchararray.py
@@ -6,7 +6,7 @@ import numpy as np
 from numpy.core.multiarray import _vec_string
 from numpy.testing import (
     assert_, assert_equal, assert_array_equal, assert_raises,
-    suppress_warnings,
+    assert_raises_regex, suppress_warnings,
     )
 
 kw_unicode_true = {'unicode': True}  # make 2to3 work properly
@@ -626,12 +626,9 @@ class TestOperations(object):
             assert_array_equal(Ar, (self.A * r))
 
         for ob in [object(), 'qrs']:
-            try:
-                A * ob
-            except ValueError:
-                pass
-            else:
-                self.fail("chararray can only be multiplied by integers")
+            with assert_raises_regex(ValueError,
+                                     'Can only multiply by integers'):
+                A*ob
 
     def test_rmul(self):
         A = self.A
@@ -641,12 +638,9 @@ class TestOperations(object):
             assert_array_equal(Ar, (r * self.A))
 
         for ob in [object(), 'qrs']:
-            try:
+            with assert_raises_regex(ValueError,
+                                     'Can only multiply by integers'):
                 ob * A
-            except ValueError:
-                pass
-            else:
-                self.fail("chararray can only be multiplied by integers")
 
     def test_mod(self):
         """Ticket #856"""
@@ -668,13 +662,9 @@ class TestOperations(object):
         assert_(("%r" % self.A) == repr(self.A))
 
         for ob in [42, object()]:
-            try:
+            with assert_raises_regex(
+                    TypeError, "unsupported operand type.* and 'chararray'"):
                 ob % self.A
-            except TypeError:
-                pass
-            else:
-                self.fail("chararray __rmod__ should fail with "
-                          "non-string objects")
 
     def test_slice(self):
         """Regression test for https://github.com/numpy/numpy/issues/5982"""
diff --git a/numpy/core/tests/test_dtype.py b/numpy/core/tests/test_dtype.py
index 31ef9d609..1bce86a5a 100644
--- a/numpy/core/tests/test_dtype.py
+++ b/numpy/core/tests/test_dtype.py
@@ -1,6 +1,5 @@
 from __future__ import division, absolute_import, print_function
 
-import pickle
 import sys
 import operator
 import pytest
@@ -9,6 +8,7 @@ import ctypes
 import numpy as np
 from numpy.core._rational_tests import rational
 from numpy.testing import assert_, assert_equal, assert_raises
+from numpy.core.numeric import pickle
 
 def assert_dtype_equal(a, b):
     assert_equal(a, b)
@@ -21,26 +21,26 @@ def assert_dtype_not_equal(a, b):
             "two different types hash to the same value !")
 
 class TestBuiltin(object):
-    def test_run(self):
+    @pytest.mark.parametrize('t', [int, float, complex, np.int32, str, object,
+                                   np.unicode])
+    def test_run(self, t):
         """Only test hash runs at all."""
-        for t in [int, float, complex, np.int32, str, object,
-                np.unicode]:
-            dt = np.dtype(t)
-            hash(dt)
+        dt = np.dtype(t)
+        hash(dt)
 
-    def test_dtype(self):
+    @pytest.mark.parametrize('t', [int, float])
+    def test_dtype(self, t):
         # Make sure equivalent byte order char hash the same (e.g. < and = on
         # little endian)
-        for t in [int, float]:
-            dt = np.dtype(t)
-            dt2 = dt.newbyteorder("<")
-            dt3 = dt.newbyteorder(">")
-            if dt == dt2:
-                assert_(dt.byteorder != dt2.byteorder, "bogus test")
-                assert_dtype_equal(dt, dt2)
-            else:
-                assert_(dt.byteorder != dt3.byteorder, "bogus test")
-                assert_dtype_equal(dt, dt3)
+        dt = np.dtype(t)
+        dt2 = dt.newbyteorder("<")
+        dt3 = dt.newbyteorder(">")
+        if dt == dt2:
+            assert_(dt.byteorder != dt2.byteorder, "bogus test")
+            assert_dtype_equal(dt, dt2)
+        else:
+            assert_(dt.byteorder != dt3.byteorder, "bogus test")
+            assert_dtype_equal(dt, dt3)
 
     def test_equivalent_dtype_hashing(self):
         # Make sure equivalent dtypes with different type num hash equal
@@ -552,7 +552,7 @@ class TestString(object):
         assert_equal(str(dt),
                     "[('a', '<m8[D]'), ('b', '<M8[us]')]")
 
-    def test_complex_dtype_repr(self):
+    def test_repr_structured(self):
         dt = np.dtype([('top', [('tiles', ('>f4', (64, 64)), (1,)),
                                 ('rtile', '>f4', (64, 36))], (3,)),
                        ('bottom', [('bleft', ('>f4', (8, 64)), (1,)),
@@ -572,6 +572,7 @@ class TestString(object):
                     "(('Green pixel', 'g'), 'u1'), "
                     "(('Blue pixel', 'b'), 'u1')], align=True)")
 
+    def test_repr_structured_not_packed(self):
         dt = np.dtype({'names': ['rgba', 'r', 'g', 'b'],
                        'formats': ['<u4', 'u1', 'u1', 'u1'],
                        'offsets': [0, 0, 1, 2],
@@ -596,10 +597,16 @@ class TestString(object):
                     "'titles':['Red pixel','Blue pixel'], "
                     "'itemsize':4})")
 
+    def test_repr_structured_datetime(self):
         dt = np.dtype([('a', '<M8[D]'), ('b', '<m8[us]')])
         assert_equal(repr(dt),
                     "dtype([('a', '<M8[D]'), ('b', '<m8[us]')])")
 
+    def test_repr_str_subarray(self):
+        dt = np.dtype(('<i2', (1,)))
+        assert_equal(repr(dt), "dtype(('<i2', (1,)))")
+        assert_equal(str(dt), "('<i2', (1,))")
+
     @pytest.mark.skipif(sys.version_info[0] >= 3, reason="Python 2 only")
     def test_dtype_str_with_long_in_shape(self):
         # Pull request #376, should not error
@@ -642,12 +649,12 @@ class TestDtypeAttributes(object):
         new_dtype = np.dtype(dtype.descr)
         assert_equal(new_dtype.itemsize, 16)
 
-    def test_name_builtin(self):
-        for t in np.typeDict.values():
-            name = t.__name__
-            if name.endswith('_'):
-                name = name[:-1]
-            assert_equal(np.dtype(t).name, name)
+    @pytest.mark.parametrize('t', np.typeDict.values())
+    def test_name_builtin(self, t):
+        name = t.__name__
+        if name.endswith('_'):
+            name = name[:-1]
+        assert_equal(np.dtype(t).name, name)
 
     def test_name_dtype_subclass(self):
         # Ticket #4357
@@ -671,38 +678,46 @@ class TestPickling(object):
             assert_equal(x, y)
             assert_equal(x[0], y[0])
 
-    def test_builtin(self):
-        for t in [int, float, complex, np.int32, str, object,
-                  np.unicode, bool]:
-            self.check_pickling(np.dtype(t))
+    @pytest.mark.parametrize('t', [int, float, complex, np.int32, str, object,
+                                   np.unicode, bool])
+    def test_builtin(self, t):
+        self.check_pickling(np.dtype(t))
 
     def test_structured(self):
         dt = np.dtype(([('a', '>f4', (2, 1)), ('b', '<f8', (1, 3))], (2, 2)))
         self.check_pickling(dt)
+
+    def test_structured_aligned(self):
         dt = np.dtype('i4, i1', align=True)
         self.check_pickling(dt)
+
+    def test_structured_unaligned(self):
         dt = np.dtype('i4, i1', align=False)
         self.check_pickling(dt)
+
+    def test_structured_padded(self):
         dt = np.dtype({
             'names': ['A', 'B'],
             'formats': ['f4', 'f4'],
             'offsets': [0, 8],
             'itemsize': 16})
         self.check_pickling(dt)
+
+    def test_structured_titles(self):
         dt = np.dtype({'names': ['r', 'b'],
                        'formats': ['u1', 'u1'],
                        'titles': ['Red pixel', 'Blue pixel']})
         self.check_pickling(dt)
 
-    def test_datetime(self):
-        for base in ['m8', 'M8']:
-            for unit in ['', 'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms',
-                         'us', 'ns', 'ps', 'fs', 'as']:
-                dt = np.dtype('%s[%s]' % (base, unit) if unit else base)
-                self.check_pickling(dt)
-                if unit:
-                    dt = np.dtype('%s[7%s]' % (base, unit))
-                    self.check_pickling(dt)
+    @pytest.mark.parametrize('base', ['m8', 'M8'])
+    @pytest.mark.parametrize('unit', ['', 'Y', 'M', 'W', 'D', 'h', 'm', 's',
+                                      'ms', 'us', 'ns', 'ps', 'fs', 'as'])
+    def test_datetime(self, base, unit):
+        dt = np.dtype('%s[%s]' % (base, unit) if unit else base)
+        self.check_pickling(dt)
+        if unit:
+            dt = np.dtype('%s[7%s]' % (base, unit))
+            self.check_pickling(dt)
 
     def test_metadata(self):
         dt = np.dtype(int, metadata={'datum': 1})
@@ -729,6 +744,7 @@ def test_dtypes_are_true():
 def test_invalid_dtype_string():
     # test for gh-10440
     assert_raises(TypeError, np.dtype, 'f8,i8,[f8,i8]')
+    assert_raises(TypeError, np.dtype, u'Fl\xfcgel')
 
 
 class TestFromCTypes(object):
diff --git a/numpy/core/tests/test_errstate.py b/numpy/core/tests/test_errstate.py
index 4f6111921..670d485c1 100644
--- a/numpy/core/tests/test_errstate.py
+++ b/numpy/core/tests/test_errstate.py
@@ -4,7 +4,7 @@ import platform
 import pytest
 
 import numpy as np
-from numpy.testing import assert_
+from numpy.testing import assert_, assert_raises
 
 
 class TestErrstate(object):
@@ -16,12 +16,8 @@ class TestErrstate(object):
             with np.errstate(invalid='ignore'):
                 np.sqrt(a)
             # While this should fail!
-            try:
+            with assert_raises(FloatingPointError):
                 np.sqrt(a)
-            except FloatingPointError:
-                pass
-            else:
-                self.fail("Did not raise an invalid error")
 
     def test_divide(self):
         with np.errstate(all='raise', under='ignore'):
@@ -30,12 +26,8 @@ class TestErrstate(object):
             with np.errstate(divide='ignore'):
                 a // 0
             # While this should fail!
-            try:
+            with assert_raises(FloatingPointError):
                 a // 0
-            except FloatingPointError:
-                pass
-            else:
-                self.fail("Did not raise divide by zero error")
 
     def test_errcall(self):
         def foo(*args):
diff --git a/numpy/core/tests/test_indexing.py b/numpy/core/tests/test_indexing.py
index 1934d542a..99792cee7 100644
--- a/numpy/core/tests/test_indexing.py
+++ b/numpy/core/tests/test_indexing.py
@@ -580,15 +580,33 @@ class TestBroadcastedAssignments(object):
 
 class TestSubclasses(object):
     def test_basic(self):
+        # Test that indexing in various ways produces SubClass instances,
+        # and that the base is set up correctly: the original subclass
+        # instance for views, and a new ndarray for advanced/boolean indexing
+        # where a copy was made (latter a regression test for gh-11983).
         class SubClass(np.ndarray):
             pass
 
-        s = np.arange(5).view(SubClass)
-        assert_(isinstance(s[:3], SubClass))
-        assert_(s[:3].base is s)
-
-        assert_(isinstance(s[[0, 1, 2]], SubClass))
-        assert_(isinstance(s[s > 0], SubClass))
+        a = np.arange(5)
+        s = a.view(SubClass)
+        s_slice = s[:3]
+        assert_(type(s_slice) is SubClass)
+        assert_(s_slice.base is s)
+        assert_array_equal(s_slice, a[:3])
+
+        s_fancy = s[[0, 1, 2]]
+        assert_(type(s_fancy) is SubClass)
+        assert_(s_fancy.base is not s)
+        assert_(type(s_fancy.base) is np.ndarray)
+        assert_array_equal(s_fancy, a[[0, 1, 2]])
+        assert_array_equal(s_fancy.base, a[[0, 1, 2]])
+
+        s_bool = s[s > 0]
+        assert_(type(s_bool) is SubClass)
+        assert_(s_bool.base is not s)
+        assert_(type(s_bool.base) is np.ndarray)
+        assert_array_equal(s_bool, a[a > 0])
+        assert_array_equal(s_bool.base, a[a > 0])
 
     def test_finalize_gets_full_info(self):
         # Array finalize should be called on the filled array.
diff --git a/numpy/core/tests/test_memmap.py b/numpy/core/tests/test_memmap.py
index 59ca28324..990d0ae26 100644
--- a/numpy/core/tests/test_memmap.py
+++ b/numpy/core/tests/test_memmap.py
@@ -81,7 +81,10 @@ class TestMemmap(object):
         tmpname = mktemp('', 'mmap', dir=self.tempdir)
         fp = memmap(Path(tmpname), dtype=self.dtype, mode='w+',
                        shape=self.shape)
-        abspath = os.path.realpath(os.path.abspath(tmpname))
+        # os.path.realpath does not resolve symlinks on Windows
+        # see: https://bugs.python.org/issue9949
+        # use Path.resolve, just as memmap class does internally
+        abspath = str(Path(tmpname).resolve())
         fp[:] = self.data[:]
         assert_equal(abspath, str(fp.filename.resolve()))
         b = fp[:1]
diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py
index 1c59abaa7..4b2a38990 100644
--- a/numpy/core/tests/test_multiarray.py
+++ b/numpy/core/tests/test_multiarray.py
@@ -20,6 +20,9 @@ import gc
 import weakref
 import pytest
 from contextlib import contextmanager
+
+from numpy.core.numeric import pickle
+
 if sys.version_info[0] >= 3:
     import builtins
 else:
@@ -33,7 +36,7 @@ from numpy.testing import (
     assert_, assert_raises, assert_warns, assert_equal, assert_almost_equal,
     assert_array_equal, assert_raises_regex, assert_array_almost_equal,
     assert_allclose, IS_PYPY, HAS_REFCOUNT, assert_array_less, runstring,
-    SkipTest, temppath, suppress_warnings
+    temppath, suppress_warnings
     )
 from numpy.core.tests._locales import CommaDecimalPointLocale
 
@@ -1371,13 +1374,28 @@ class TestZeroSizeFlexible(object):
             assert_equal(zs.view((dt, 1)).shape, (0,))
 
     def test_pickle(self):
-        import pickle
-        for dt in [bytes, np.void, unicode]:
-            zs = self._zeros(10, dt)
-            p = pickle.dumps(zs)
-            zs2 = pickle.loads(p)
-
-            assert_equal(zs.dtype, zs2.dtype)
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            for dt in [bytes, np.void, unicode]:
+                zs = self._zeros(10, dt)
+                p = pickle.dumps(zs, protocol=proto)
+                zs2 = pickle.loads(p)
+
+                assert_equal(zs.dtype, zs2.dtype)
+
+    @pytest.mark.skipif(pickle.HIGHEST_PROTOCOL < 5,
+                        reason="requires pickle protocol 5")
+    def test_pickle_with_buffercallback(self):
+        array = np.arange(10)
+        buffers = []
+        bytes_string = pickle.dumps(array, buffer_callback=buffers.append,
+                                    protocol=5)
+        array_from_buffer = pickle.loads(bytes_string, buffers=buffers)
+        # when using pickle protocol 5 with buffer callbacks,
+        # array_from_buffer is reconstructed from a buffer holding a view
+        # to the initial array's data, so modifying an element in array
+        # should modify it in array_from_buffer too.
+        array[0] = -1
+        assert array_from_buffer[0] == -1, array_from_buffer[0]
 
 
 class TestMethods(object):
@@ -1416,6 +1434,10 @@ class TestMethods(object):
         A = ind.choose((x, y2))
         assert_equal(A, [[2, 2, 3], [2, 2, 3]])
 
+        oned = np.ones(1)
+        # gh-12031, caused SEGFAULT
+        assert_raises(TypeError, oned.choose,np.void(0), [oned])
+
     def test_prod(self):
         ba = [1, 2, 10, 11, 6, 5, 4]
         ba2 = [[1, 2, 3, 4], [5, 6, 7, 9], [10, 3, 4, 5]]
@@ -3544,21 +3566,106 @@ class TestSubscripting(object):
 
 
 class TestPickling(object):
+    def test_highest_available_pickle_protocol(self):
+        try:
+            import pickle5
+        except ImportError:
+            pickle5 = None
+
+        if sys.version_info[:2] >= (3, 8) or pickle5 is not None:
+            assert pickle.HIGHEST_PROTOCOL >= 5
+        else:
+            assert pickle.HIGHEST_PROTOCOL < 5
+
+    @pytest.mark.skipif(pickle.HIGHEST_PROTOCOL >= 5,
+                        reason=('this tests the error messages when trying to'
+                                'protocol 5 although it is not available'))
+    def test_correct_protocol5_error_message(self):
+        array = np.arange(10)
+        f = io.BytesIO()
+
+        if sys.version_info[:2] in ((3, 6), (3, 7)):
+            # For the specific case of python3.6 and 3.7, raise a clear import
+            # error about the pickle5 backport when trying to use protocol=5
+            # without the pickle5 package
+            with pytest.raises(ImportError):
+                array.__reduce_ex__(5)
+
+        elif sys.version_info[:2] < (3, 6):
+            # when calling __reduce_ex__ explicitly with protocol=5 on python
+            # raise a ValueError saying that protocol 5 is not available for
+            # this python version
+            with pytest.raises(ValueError):
+                array.__reduce_ex__(5)
+
+    def test_record_array_with_object_dtype(self):
+        my_object = object()
+
+        arr_with_object = np.array(
+                [(my_object, 1, 2.0)],
+                dtype=[('a', object), ('b', int), ('c', float)])
+        arr_without_object = np.array(
+                [('xxx', 1, 2.0)],
+                dtype=[('a', str), ('b', int), ('c', float)])
+
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            depickled_arr_with_object = pickle.loads(
+                    pickle.dumps(arr_with_object, protocol=proto))
+            depickled_arr_without_object = pickle.loads(
+                    pickle.dumps(arr_without_object, protocol=proto))
+
+            assert_equal(arr_with_object.dtype,
+                         depickled_arr_with_object.dtype)
+            assert_equal(arr_without_object.dtype,
+                         depickled_arr_without_object.dtype)
+
+    @pytest.mark.skipif(pickle.HIGHEST_PROTOCOL < 5,
+                        reason="requires pickle protocol 5")
+    def test_f_contiguous_array(self):
+        f_contiguous_array = np.array([[1, 2, 3], [4, 5, 6]], order='F')
+        buffers = []
+
+        # When using pickle protocol 5, Fortran-contiguous arrays can be
+        # serialized using out-of-band buffers
+        bytes_string = pickle.dumps(f_contiguous_array, protocol=5,
+                                    buffer_callback=buffers.append)
+
+        assert len(buffers) > 0
+
+        depickled_f_contiguous_array = pickle.loads(bytes_string,
+                                                    buffers=buffers)
+
+        assert_equal(f_contiguous_array, depickled_f_contiguous_array)
+
+    def test_non_contiguous_array(self):
+        non_contiguous_array = np.arange(12).reshape(3, 4)[:, :2]
+        assert not non_contiguous_array.flags.c_contiguous
+        assert not non_contiguous_array.flags.f_contiguous
+
+        # make sure non-contiguous arrays can be pickled-depickled
+        # using any protocol
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            depickled_non_contiguous_array = pickle.loads(
+                    pickle.dumps(non_contiguous_array, protocol=proto))
+
+            assert_equal(non_contiguous_array, depickled_non_contiguous_array)
+
     def test_roundtrip(self):
-        import pickle
-        carray = np.array([[2, 9], [7, 0], [3, 8]])
-        DATA = [
-            carray,
-            np.transpose(carray),
-            np.array([('xxx', 1, 2.0)], dtype=[('a', (str, 3)), ('b', int),
-                                               ('c', float)])
-        ]
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            carray = np.array([[2, 9], [7, 0], [3, 8]])
+            DATA = [
+                carray,
+                np.transpose(carray),
+                np.array([('xxx', 1, 2.0)], dtype=[('a', (str, 3)), ('b', int),
+                                                   ('c', float)])
+            ]
 
-        for a in DATA:
-            assert_equal(a, pickle.loads(a.dumps()), err_msg="%r" % a)
+            for a in DATA:
+                assert_equal(
+                        a, pickle.loads(pickle.dumps(a, protocol=proto)),
+                        err_msg="%r" % a)
 
     def _loads(self, obj):
-        import pickle
         if sys.version_info[0] >= 3:
             return pickle.loads(obj, encoding='latin1')
         else:
@@ -4133,15 +4240,12 @@ class TestPutmask(object):
     def test_mask_size(self):
         assert_raises(ValueError, np.putmask, np.array([1, 2, 3]), [True], 5)
 
-    def tst_byteorder(self, dtype):
+    @pytest.mark.parametrize('dtype', ('>i4', '<i4'))
+    def test_byteorder(self, dtype):
         x = np.array([1, 2, 3], dtype)
         np.putmask(x, [True, False, True], -1)
         assert_array_equal(x, [-1, 2, -1])
 
-    def test_ip_byteorder(self):
-        for dtype in ('>i4', '<i4'):
-            self.tst_byteorder(dtype)
-
     def test_record_array(self):
         # Note mixed byteorder.
         rec = np.array([(-5, 2.0, 3.0), (5.0, 4.0, 3.0)],
@@ -4191,14 +4295,11 @@ class TestTake(object):
         assert_array_equal(x.take([2], axis=0, mode='wrap')[0], x[0])
         assert_array_equal(x.take([3], axis=0, mode='wrap')[0], x[1])
 
-    def tst_byteorder(self, dtype):
+    @pytest.mark.parametrize('dtype', ('>i4', '<i4'))
+    def test_byteorder(self, dtype):
         x = np.array([1, 2, 3], dtype)
         assert_array_equal(x.take([0, 2, 1]), [1, 3, 2])
 
-    def test_ip_byteorder(self):
-        for dtype in ('>i4', '<i4'):
-            self.tst_byteorder(dtype)
-
     def test_record_array(self):
         # Note mixed byteorder.
         rec = np.array([(-5, 2.0, 3.0), (5.0, 4.0, 3.0)],
@@ -4574,19 +4675,16 @@ class TestIO(object):
 
 
 class TestFromBuffer(object):
-    def tst_basic(self, buffer, expected, kwargs):
-        assert_array_equal(np.frombuffer(buffer,**kwargs), expected)
-
-    def test_ip_basic(self):
-        for byteorder in ['<', '>']:
-            for dtype in [float, int, complex]:
-                dt = np.dtype(dtype).newbyteorder(byteorder)
-                x = (np.random.random((4, 7))*5).astype(dt)
-                buf = x.tobytes()
-                self.tst_basic(buf, x.flat, {'dtype':dt})
+    @pytest.mark.parametrize('byteorder', ['<', '>'])
+    @pytest.mark.parametrize('dtype', [float, int, complex])
+    def test_basic(self, byteorder, dtype):
+        dt = np.dtype(dtype).newbyteorder(byteorder)
+        x = (np.random.random((4, 7)) * 5).astype(dt)
+        buf = x.tobytes()
+        assert_array_equal(np.frombuffer(buf, dtype=dt), x.flat)
 
     def test_empty(self):
-        self.tst_basic(b'', np.array([]), {})
+        assert_array_equal(np.frombuffer(b''), np.array([]))
 
 
 class TestFlat(object):
@@ -4731,6 +4829,12 @@ class TestResize(object):
         x_view.resize((0, 10))
         x_view.resize((0, 100))
 
+    def test_check_weakref(self):
+        x = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
+        xref = weakref.ref(x)
+        assert_raises(ValueError, x.resize, (5, 1))
+        del xref  # avoid pyflakes unused variable warning.
+
 
 class TestRecord(object):
     def test_field_rename(self):
@@ -4873,8 +4977,8 @@ class TestRecord(object):
 
         # non-ascii unicode field indexing is well behaved
         if not is_py3:
-            raise SkipTest('non ascii unicode field indexing skipped; '
-                           'raises segfault on python 2.x')
+            pytest.skip('non ascii unicode field indexing skipped; '
+                        'raises segfault on python 2.x')
         else:
             assert_raises(ValueError, a.__setitem__, u'\u03e0', 1)
             assert_raises(ValueError, a.__getitem__, u'\u03e0')
@@ -5940,9 +6044,10 @@ class TestRepeat(object):
 NEIGH_MODE = {'zero': 0, 'one': 1, 'constant': 2, 'circular': 3, 'mirror': 4}
 
 
+@pytest.mark.parametrize('dt', [float, Decimal], ids=['float', 'object'])
 class TestNeighborhoodIter(object):
     # Simple, 2d tests
-    def _test_simple2d(self, dt):
+    def test_simple2d(self, dt):
         # Test zero and one padding for simple data type
         x = np.array([[0, 1], [2, 3]], dtype=dt)
         r = [np.array([[0, 0, 0], [0, 0, 1]], dtype=dt),
@@ -5969,13 +6074,7 @@ class TestNeighborhoodIter(object):
                 x, [-1, 0, -1, 1], 4, NEIGH_MODE['constant'])
         assert_array_equal(l, r)
 
-    def test_simple2d(self):
-        self._test_simple2d(float)
-
-    def test_simple2d_object(self):
-        self._test_simple2d(Decimal)
-
-    def _test_mirror2d(self, dt):
+    def test_mirror2d(self, dt):
         x = np.array([[0, 1], [2, 3]], dtype=dt)
         r = [np.array([[0, 0, 1], [0, 0, 1]], dtype=dt),
              np.array([[0, 1, 1], [0, 1, 1]], dtype=dt),
@@ -5985,14 +6084,8 @@ class TestNeighborhoodIter(object):
                 x, [-1, 0, -1, 1], x[0], NEIGH_MODE['mirror'])
         assert_array_equal(l, r)
 
-    def test_mirror2d(self):
-        self._test_mirror2d(float)
-
-    def test_mirror2d_object(self):
-        self._test_mirror2d(Decimal)
-
     # Simple, 1d tests
-    def _test_simple(self, dt):
+    def test_simple(self, dt):
         # Test padding with constant values
         x = np.linspace(1, 5, 5).astype(dt)
         r = [[0, 1, 2], [1, 2, 3], [2, 3, 4], [3, 4, 5], [4, 5, 0]]
@@ -6010,14 +6103,8 @@ class TestNeighborhoodIter(object):
                 x, [-1, 1], x[4], NEIGH_MODE['constant'])
         assert_array_equal(l, r)
 
-    def test_simple_float(self):
-        self._test_simple(float)
-
-    def test_simple_object(self):
-        self._test_simple(Decimal)
-
     # Test mirror modes
-    def _test_mirror(self, dt):
+    def test_mirror(self, dt):
         x = np.linspace(1, 5, 5).astype(dt)
         r = np.array([[2, 1, 1, 2, 3], [1, 1, 2, 3, 4], [1, 2, 3, 4, 5],
                 [2, 3, 4, 5, 5], [3, 4, 5, 5, 4]], dtype=dt)
@@ -6026,14 +6113,8 @@ class TestNeighborhoodIter(object):
         assert_([i.dtype == dt for i in l])
         assert_array_equal(l, r)
 
-    def test_mirror(self):
-        self._test_mirror(float)
-
-    def test_mirror_object(self):
-        self._test_mirror(Decimal)
-
     # Circular mode
-    def _test_circular(self, dt):
+    def test_circular(self, dt):
         x = np.linspace(1, 5, 5).astype(dt)
         r = np.array([[4, 5, 1, 2, 3], [5, 1, 2, 3, 4], [1, 2, 3, 4, 5],
                 [2, 3, 4, 5, 1], [3, 4, 5, 1, 2]], dtype=dt)
@@ -6041,11 +6122,6 @@ class TestNeighborhoodIter(object):
                 x, [-2, 2], x[0], NEIGH_MODE['circular'])
         assert_array_equal(l, r)
 
-    def test_circular(self):
-        self._test_circular(float)
-
-    def test_circular_object(self):
-        self._test_circular(Decimal)
 
 # Test stacking neighborhood iterators
 class TestStackedNeighborhoodIter(object):
@@ -6699,6 +6775,18 @@ class TestNewBufferProtocol(object):
             ValueError, "format string",
             np.array, m)
 
+    def test_error_message(self):
+        # wchar has no corresponding numpy type - if this changes in future, we
+        # need a better way to construct an invalid memoryview format.
+        t = ctypes.c_wchar * 4
+        with assert_raises(ValueError) as cm:
+            np.array(t())
+
+        exc = cm.exception
+        if sys.version_info.major > 2:
+            with assert_raises_regex(ValueError, "Unknown .* specifier 'u'"):
+                raise exc.__cause__
+
     def test_ctypes_integer_via_memoryview(self):
         # gh-11150, due to bpo-10746
         for c_integer in {ctypes.c_int, ctypes.c_long, ctypes.c_longlong}:
@@ -7364,7 +7452,6 @@ class TestFormat(object):
                 dst = object.__format__(a, '30')
                 assert_equal(res, dst)
 
-
 class TestCTypes(object):
 
     def test_ctypes_is_available(self):
@@ -7644,3 +7731,55 @@ def test_npymath_real():
                 got = fun(z)
                 expected = npfun(z)
                 assert_allclose(got, expected)
+
+def test_uintalignment_and_alignment():
+    # alignment code needs to satisfy these requrements:
+    #  1. numpy structs match C struct layout
+    #  2. ufuncs/casting is safe wrt to aligned access
+    #  3. copy code is safe wrt to "uint alidned" access
+    #
+    # Complex types are the main problem, whose alignment may not be the same
+    # as their "uint alignment".
+    #
+    # This test might only fail on certain platforms, where uint64 alignment is
+    # not equal to complex64 alignment. The second 2 tests will only fail
+    # for DEBUG=1.
+
+    d1 = np.dtype('u1,c8', align=True)
+    d2 = np.dtype('u4,c8', align=True)
+    d3 = np.dtype({'names': ['a', 'b'], 'formats': ['u1', d1]}, align=True)
+
+    assert_equal(np.zeros(1, dtype=d1)['f1'].flags['ALIGNED'], True)
+    assert_equal(np.zeros(1, dtype=d2)['f1'].flags['ALIGNED'], True)
+    assert_equal(np.zeros(1, dtype='u1,c8')['f1'].flags['ALIGNED'], False)
+
+    # check that C struct matches numpy struct size
+    s = _multiarray_tests.get_struct_alignments()
+    for d, (alignment, size) in zip([d1,d2,d3], s):
+        assert_equal(d.alignment, alignment)
+        assert_equal(d.itemsize, size)
+
+    # check that ufuncs don't complain in debug mode
+    # (this is probably OK if the aligned flag is true above)
+    src = np.zeros((2,2), dtype=d1)['f1']  # 4-byte aligned, often
+    np.exp(src)  # assert fails?
+
+    # check that copy code doesn't complain in debug mode
+    dst = np.zeros((2,2), dtype='c8')
+    dst[:,1] = src[:,1]  # assert in lowlevel_strided_loops fails?
+
+def test_getfield():
+    a = np.arange(32, dtype='uint16')
+    if sys.byteorder == 'little':
+        i = 0
+        j = 1
+    else:
+        i = 1
+        j = 0
+    b = a.getfield('int8', i)
+    assert_equal(b, a)
+    b = a.getfield('int8', j)
+    assert_equal(b, 0)
+    pytest.raises(ValueError, a.getfield, 'uint8', -1)
+    pytest.raises(ValueError, a.getfield, 'uint8', 16)
+    pytest.raises(ValueError, a.getfield, 'uint64', 0)
diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py
index e7181736f..f264c4ab0 100644
--- a/numpy/core/tests/test_numeric.py
+++ b/numpy/core/tests/test_numeric.py
@@ -471,12 +471,9 @@ class TestSeterr(object):
     @pytest.mark.skipif(platform.machine() == "armv5tel", reason="See gh-413.")
     def test_divide_err(self):
         with np.errstate(divide='raise'):
-            try:
+            with assert_raises(FloatingPointError):
                 np.array([1.]) / np.array([0.])
-            except FloatingPointError:
-                pass
-            else:
-                self.fail()
+
             np.seterr(divide='ignore')
             np.array([1.]) / np.array([0.])
 
diff --git a/numpy/core/tests/test_numerictypes.py b/numpy/core/tests/test_numerictypes.py
index 4c3cc6c9e..27e4fdeec 100644
--- a/numpy/core/tests/test_numerictypes.py
+++ b/numpy/core/tests/test_numerictypes.py
@@ -3,6 +3,7 @@ from __future__ import division, absolute_import, print_function
 import sys
 import itertools
 
+import pytest
 import numpy as np
 from numpy.testing import assert_, assert_equal, assert_raises
 
@@ -408,7 +409,93 @@ class TestIsSubDType(object):
             assert_(not np.issubdtype(w1(np.float64), w2(np.float32)))
 
 
-def TestSctypeDict(object):
+class TestSctypeDict(object):
     def test_longdouble(self):
         assert_(np.sctypeDict['f8'] is not np.longdouble)
         assert_(np.sctypeDict['c16'] is not np.clongdouble)
+
+
+class TestBitName(object):
+    def test_abstract(self):
+        assert_raises(ValueError, np.core.numerictypes.bitname, np.floating)
+
+
+class TestMaximumSctype(object):
+
+    # note that parametrizing with sctype['int'] and similar would skip types
+    # with the same size (gh-11923)
+
+    @pytest.mark.parametrize('t', [np.byte, np.short, np.intc, np.int_, np.longlong])
+    def test_int(self, t):
+        assert_equal(np.maximum_sctype(t), np.sctypes['int'][-1])
+
+    @pytest.mark.parametrize('t', [np.ubyte, np.ushort, np.uintc, np.uint, np.ulonglong])
+    def test_uint(self, t):
+        assert_equal(np.maximum_sctype(t), np.sctypes['uint'][-1])
+
+    @pytest.mark.parametrize('t', [np.half, np.single, np.double, np.longdouble])
+    def test_float(self, t):
+        assert_equal(np.maximum_sctype(t), np.sctypes['float'][-1])
+
+    @pytest.mark.parametrize('t', [np.csingle, np.cdouble, np.clongdouble])
+    def test_complex(self, t):
+        assert_equal(np.maximum_sctype(t), np.sctypes['complex'][-1])
+
+    @pytest.mark.parametrize('t', [np.bool_, np.object_, np.unicode_, np.bytes_, np.void])
+    def test_other(self, t):
+        assert_equal(np.maximum_sctype(t), t)
+
+
+class Test_sctype2char(object):
+    # This function is old enough that we're really just documenting the quirks
+    # at this point.
+
+    def test_scalar_type(self):
+        assert_equal(np.sctype2char(np.double), 'd')
+        assert_equal(np.sctype2char(np.int_), 'l')
+        assert_equal(np.sctype2char(np.unicode_), 'U')
+        assert_equal(np.sctype2char(np.bytes_), 'S')
+
+    def test_other_type(self):
+        assert_equal(np.sctype2char(float), 'd')
+        assert_equal(np.sctype2char(list), 'O')
+        assert_equal(np.sctype2char(np.ndarray), 'O')
+
+    def test_third_party_scalar_type(self):
+        from numpy.core._rational_tests import rational
+        assert_raises(KeyError, np.sctype2char, rational)
+        assert_raises(KeyError, np.sctype2char, rational(1))
+
+    def test_array_instance(self):
+        assert_equal(np.sctype2char(np.array([1.0, 2.0])), 'd')
+
+    def test_abstract_type(self):
+        assert_raises(KeyError, np.sctype2char, np.floating)
+
+    def test_non_type(self):
+        assert_raises(ValueError, np.sctype2char, 1)
+
+@pytest.mark.parametrize("rep, expected", [
+    (np.int32, True),
+    (list, False),
+    (1.1, False),
+    (str, True),
+    (np.dtype(np.float64), True),
+    (np.dtype((np.int16, (3, 4))), True),
+    (np.dtype([('a', np.int8)]), True),
+    ])
+def test_issctype(rep, expected):
+    # ensure proper identification of scalar
+    # data-types by issctype()
+    actual = np.issctype(rep)
+    assert_equal(actual, expected)
+
+
+@pytest.mark.skipif(sys.flags.optimize > 1,
+                    reason="no docstrings present to inspect when PYTHONOPTIMIZE/Py_OptimizeFlag > 1")
+class TestDocStrings(object):
+    def test_platform_dependent_aliases(self):
+        if np.int64 is np.int_:
+            assert_('int64' in np.int_.__doc__)
+        elif np.int64 is np.longlong:
+            assert_('int64' in np.longlong.__doc__)
diff --git a/numpy/core/tests/test_overrides.py b/numpy/core/tests/test_overrides.py
new file mode 100644
index 000000000..3f87a6afe
--- /dev/null
+++ b/numpy/core/tests/test_overrides.py
@@ -0,0 +1,306 @@
+from __future__ import division, absolute_import, print_function
+
+import sys
+
+import numpy as np
+from numpy.testing import (
+    assert_, assert_equal, assert_raises, assert_raises_regex)
+from numpy.core.overrides import (
+    get_overloaded_types_and_args, array_function_dispatch,
+    verify_matching_signatures)
+from numpy.core.numeric import pickle
+
+
+def _get_overloaded_args(relevant_args):
+    types, args = get_overloaded_types_and_args(relevant_args)
+    return args
+
+
+def _return_self(self, *args, **kwargs):
+    return self
+
+
+class TestGetOverloadedTypesAndArgs(object):
+
+    def test_ndarray(self):
+        array = np.array(1)
+
+        types, args = get_overloaded_types_and_args([array])
+        assert_equal(set(types), {np.ndarray})
+        assert_equal(list(args), [])
+
+        types, args = get_overloaded_types_and_args([array, array])
+        assert_equal(len(types), 1)
+        assert_equal(set(types), {np.ndarray})
+        assert_equal(list(args), [])
+
+        types, args = get_overloaded_types_and_args([array, 1])
+        assert_equal(set(types), {np.ndarray})
+        assert_equal(list(args), [])
+
+        types, args = get_overloaded_types_and_args([1, array])
+        assert_equal(set(types), {np.ndarray})
+        assert_equal(list(args), [])
+
+    def test_ndarray_subclasses(self):
+
+        class OverrideSub(np.ndarray):
+            __array_function__ = _return_self
+
+        class NoOverrideSub(np.ndarray):
+            pass
+
+        array = np.array(1).view(np.ndarray)
+        override_sub = np.array(1).view(OverrideSub)
+        no_override_sub = np.array(1).view(NoOverrideSub)
+
+        types, args = get_overloaded_types_and_args([array, override_sub])
+        assert_equal(set(types), {np.ndarray, OverrideSub})
+        assert_equal(list(args), [override_sub])
+
+        types, args = get_overloaded_types_and_args([array, no_override_sub])
+        assert_equal(set(types), {np.ndarray, NoOverrideSub})
+        assert_equal(list(args), [])
+
+        types, args = get_overloaded_types_and_args(
+            [override_sub, no_override_sub])
+        assert_equal(set(types), {OverrideSub, NoOverrideSub})
+        assert_equal(list(args), [override_sub])
+
+    def test_ndarray_and_duck_array(self):
+
+        class Other(object):
+            __array_function__ = _return_self
+
+        array = np.array(1)
+        other = Other()
+
+        types, args = get_overloaded_types_and_args([other, array])
+        assert_equal(set(types), {np.ndarray, Other})
+        assert_equal(list(args), [other])
+
+        types, args = get_overloaded_types_and_args([array, other])
+        assert_equal(set(types), {np.ndarray, Other})
+        assert_equal(list(args), [other])
+
+    def test_ndarray_subclass_and_duck_array(self):
+
+        class OverrideSub(np.ndarray):
+            __array_function__ = _return_self
+
+        class Other(object):
+            __array_function__ = _return_self
+
+        array = np.array(1)
+        subarray = np.array(1).view(OverrideSub)
+        other = Other()
+
+        assert_equal(_get_overloaded_args([array, subarray, other]),
+                     [subarray, other])
+        assert_equal(_get_overloaded_args([array, other, subarray]),
+                     [subarray, other])
+
+    def test_many_duck_arrays(self):
+
+        class A(object):
+            __array_function__ = _return_self
+
+        class B(A):
+            __array_function__ = _return_self
+
+        class C(A):
+            __array_function__ = _return_self
+
+        class D(object):
+            __array_function__ = _return_self
+
+        a = A()
+        b = B()
+        c = C()
+        d = D()
+
+        assert_equal(_get_overloaded_args([1]), [])
+        assert_equal(_get_overloaded_args([a]), [a])
+        assert_equal(_get_overloaded_args([a, 1]), [a])
+        assert_equal(_get_overloaded_args([a, a, a]), [a])
+        assert_equal(_get_overloaded_args([a, d, a]), [a, d])
+        assert_equal(_get_overloaded_args([a, b]), [b, a])
+        assert_equal(_get_overloaded_args([b, a]), [b, a])
+        assert_equal(_get_overloaded_args([a, b, c]), [b, c, a])
+        assert_equal(_get_overloaded_args([a, c, b]), [c, b, a])
+
+
+class TestNDArrayArrayFunction(object):
+
+    def test_method(self):
+
+        class SubOverride(np.ndarray):
+            __array_function__ = _return_self
+
+        class NoOverrideSub(np.ndarray):
+            pass
+
+        array = np.array(1)
+
+        def func():
+            return 'original'
+
+        result = array.__array_function__(
+            func=func, types=(np.ndarray,), args=(), kwargs={})
+        assert_equal(result, 'original')
+
+        result = array.__array_function__(
+            func=func, types=(np.ndarray, SubOverride), args=(), kwargs={})
+        assert_(result is NotImplemented)
+
+        result = array.__array_function__(
+            func=func, types=(np.ndarray, NoOverrideSub), args=(), kwargs={})
+        assert_equal(result, 'original')
+
+
+# need to define this at the top level to test pickling
+@array_function_dispatch(lambda array: (array,))
+def dispatched_one_arg(array):
+    """Docstring."""
+    return 'original'
+
+
+class TestArrayFunctionDispatch(object):
+
+    def test_pickle(self):
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            roundtripped = pickle.loads(
+                    pickle.dumps(dispatched_one_arg, protocol=proto))
+            assert_(roundtripped is dispatched_one_arg)
+
+    def test_name_and_docstring(self):
+        assert_equal(dispatched_one_arg.__name__, 'dispatched_one_arg')
+        if sys.flags.optimize < 2:
+            assert_equal(dispatched_one_arg.__doc__, 'Docstring.')
+
+    def test_interface(self):
+
+        class MyArray(object):
+            def __array_function__(self, func, types, args, kwargs):
+                return (self, func, types, args, kwargs)
+
+        original = MyArray()
+        (obj, func, types, args, kwargs) = dispatched_one_arg(original)
+        assert_(obj is original)
+        assert_(func is dispatched_one_arg)
+        assert_equal(set(types), {MyArray})
+        assert_equal(args, (original,))
+        assert_equal(kwargs, {})
+
+    def test_not_implemented(self):
+
+        class MyArray(object):
+            def __array_function__(self, func, types, args, kwargs):
+                return NotImplemented
+
+        array = MyArray()
+        with assert_raises_regex(TypeError, 'no implementation found'):
+            dispatched_one_arg(array)
+
+
+class TestVerifyMatchingSignatures(object):
+
+    def test_verify_matching_signatures(self):
+
+        verify_matching_signatures(lambda x: 0, lambda x: 0)
+        verify_matching_signatures(lambda x=None: 0, lambda x=None: 0)
+        verify_matching_signatures(lambda x=1: 0, lambda x=None: 0)
+
+        with assert_raises(RuntimeError):
+            verify_matching_signatures(lambda a: 0, lambda b: 0)
+        with assert_raises(RuntimeError):
+            verify_matching_signatures(lambda x: 0, lambda x=None: 0)
+        with assert_raises(RuntimeError):
+            verify_matching_signatures(lambda x=None: 0, lambda y=None: 0)
+        with assert_raises(RuntimeError):
+            verify_matching_signatures(lambda x=1: 0, lambda y=1: 0)
+
+    def test_array_function_dispatch(self):
+
+        with assert_raises(RuntimeError):
+            @array_function_dispatch(lambda x: (x,))
+            def f(y):
+                pass
+
+        # should not raise
+        @array_function_dispatch(lambda x: (x,), verify=False)
+        def f(y):
+            pass
+
+
+def _new_duck_type_and_implements():
+    """Create a duck array type and implements functions."""
+    HANDLED_FUNCTIONS = {}
+
+    class MyArray(object):
+        def __array_function__(self, func, types, args, kwargs):
+            if func not in HANDLED_FUNCTIONS:
+                return NotImplemented
+            if not all(issubclass(t, MyArray) for t in types):
+                return NotImplemented
+            return HANDLED_FUNCTIONS[func](*args, **kwargs)
+
+    def implements(numpy_function):
+        """Register an __array_function__ implementations."""
+        def decorator(func):
+            HANDLED_FUNCTIONS[numpy_function] = func
+            return func
+        return decorator
+
+    return (MyArray, implements)
+
+
+class TestArrayFunctionImplementation(object):
+
+    def test_one_arg(self):
+        MyArray, implements = _new_duck_type_and_implements()
+
+        @implements(dispatched_one_arg)
+        def _(array):
+            return 'myarray'
+
+        assert_equal(dispatched_one_arg(1), 'original')
+        assert_equal(dispatched_one_arg(MyArray()), 'myarray')
+
+    def test_optional_args(self):
+        MyArray, implements = _new_duck_type_and_implements()
+
+        @array_function_dispatch(lambda array, option=None: (array,))
+        def func_with_option(array, option='default'):
+            return option
+
+        @implements(func_with_option)
+        def my_array_func_with_option(array, new_option='myarray'):
+            return new_option
+
+        # we don't need to implement every option on __array_function__
+        # implementations
+        assert_equal(func_with_option(1), 'default')
+        assert_equal(func_with_option(1, option='extra'), 'extra')
+        assert_equal(func_with_option(MyArray()), 'myarray')
+        with assert_raises(TypeError):
+            func_with_option(MyArray(), option='extra')
+
+        # but new options on implementations can't be used
+        result = my_array_func_with_option(MyArray(), new_option='yes')
+        assert_equal(result, 'yes')
+        with assert_raises(TypeError):
+            func_with_option(MyArray(), new_option='no')
+
+    def test_not_implemented(self):
+        MyArray, implements = _new_duck_type_and_implements()
+
+        @array_function_dispatch(lambda array: (array,))
+        def func(array):
+            return array
+
+        array = np.array(1)
+        assert_(func(array) is array)
+
+        with assert_raises_regex(TypeError, 'no implementation found'):
+            func(MyArray())
diff --git a/numpy/core/tests/test_print.py b/numpy/core/tests/test_print.py
index 433208748..c5c091e13 100644
--- a/numpy/core/tests/test_print.py
+++ b/numpy/core/tests/test_print.py
@@ -2,8 +2,10 @@ from __future__ import division, absolute_import, print_function
 
 import sys
 
+import pytest
+
 import numpy as np
-from numpy.testing import assert_, assert_equal, SkipTest
+from numpy.testing import assert_, assert_equal
 from numpy.core.tests._locales import CommaDecimalPointLocale
 
 
@@ -15,7 +17,15 @@ else:
 _REF = {np.inf: 'inf', -np.inf: '-inf', np.nan: 'nan'}
 
 
-def check_float_type(tp):
+@pytest.mark.parametrize('tp', [np.float32, np.double, np.longdouble])
+def test_float_types(tp):
+    """ Check formatting.
+
+        This is only for the str function, and only for simple types.
+        The precision of np.float32 and np.longdouble aren't the same as the
+        python float precision.
+
+    """
     for x in [0, 1, -1, 1e20]:
         assert_equal(str(tp(x)), str(float(x)),
                      err_msg='Failed str formatting for type %s' % tp)
@@ -28,34 +38,30 @@ def check_float_type(tp):
         assert_equal(str(tp(1e16)), ref,
                      err_msg='Failed str formatting for type %s' % tp)
 
-def test_float_types():
-    """ Check formatting.
+
+@pytest.mark.parametrize('tp', [np.float32, np.double, np.longdouble])
+def test_nan_inf_float(tp):
+    """ Check formatting of nan & inf.
 
         This is only for the str function, and only for simple types.
         The precision of np.float32 and np.longdouble aren't the same as the
         python float precision.
 
     """
-    for t in [np.float32, np.double, np.longdouble]:
-        check_float_type(t)
-
-def check_nan_inf_float(tp):
     for x in [np.inf, -np.inf, np.nan]:
         assert_equal(str(tp(x)), _REF[x],
                      err_msg='Failed str formatting for type %s' % tp)
 
-def test_nan_inf_float():
-    """ Check formatting of nan & inf.
+
+@pytest.mark.parametrize('tp', [np.complex64, np.cdouble, np.clongdouble])
+def test_complex_types(tp):
+    """Check formatting of complex types.
 
         This is only for the str function, and only for simple types.
         The precision of np.float32 and np.longdouble aren't the same as the
         python float precision.
 
     """
-    for t in [np.float32, np.double, np.longdouble]:
-        check_nan_inf_float(t)
-
-def check_complex_type(tp):
     for x in [0, 1, -1, 1e20]:
         assert_equal(str(tp(x)), str(complex(x)),
                      err_msg='Failed str formatting for type %s' % tp)
@@ -72,18 +78,9 @@ def check_complex_type(tp):
         assert_equal(str(tp(1e16)), ref,
                      err_msg='Failed str formatting for type %s' % tp)
 
-def test_complex_types():
-    """Check formatting of complex types.
-
-        This is only for the str function, and only for simple types.
-        The precision of np.float32 and np.longdouble aren't the same as the
-        python float precision.
-
-    """
-    for t in [np.complex64, np.cdouble, np.clongdouble]:
-        check_complex_type(t)
 
-def test_complex_inf_nan():
+@pytest.mark.parametrize('dtype', [np.complex64, np.cdouble, np.clongdouble])
+def test_complex_inf_nan(dtype):
     """Check inf/nan formatting of complex types."""
     TESTS = {
         complex(np.inf, 0): "(inf+0j)",
@@ -103,12 +100,9 @@ def test_complex_inf_nan():
         complex(-np.nan, 1): "(nan+1j)",
         complex(1, -np.nan): "(1+nanj)",
     }
-    for tp in [np.complex64, np.cdouble, np.clongdouble]:
-        for c, s in TESTS.items():
-            _check_complex_inf_nan(c, s, tp)
+    for c, s in TESTS.items():
+        assert_equal(str(dtype(c)), s)
 
-def _check_complex_inf_nan(c, s, dtype):
-    assert_equal(str(dtype(c)), s)
 
 # print tests
 def _test_redirected_print(x, tp, ref=None):
@@ -129,7 +123,10 @@ def _test_redirected_print(x, tp, ref=None):
     assert_equal(file.getvalue(), file_tp.getvalue(),
                  err_msg='print failed for type%s' % tp)
 
-def check_float_type_print(tp):
+
+@pytest.mark.parametrize('tp', [np.float32, np.double, np.longdouble])
+def test_float_type_print(tp):
+    """Check formatting when using print """
     for x in [0, 1, -1, 1e20]:
         _test_redirected_print(float(x), tp)
 
@@ -142,7 +139,10 @@ def check_float_type_print(tp):
         ref = '1e+16'
         _test_redirected_print(float(1e16), tp, ref)
 
-def check_complex_type_print(tp):
+
+@pytest.mark.parametrize('tp', [np.complex64, np.cdouble, np.clongdouble])
+def test_complex_type_print(tp):
+    """Check formatting when using print """
     # We do not create complex with inf/nan directly because the feature is
     # missing in python < 2.6
     for x in [0, 1, -1, 1e20]:
@@ -158,15 +158,6 @@ def check_complex_type_print(tp):
     _test_redirected_print(complex(-np.inf, 1), tp, '(-inf+1j)')
     _test_redirected_print(complex(-np.nan, 1), tp, '(nan+1j)')
 
-def test_float_type_print():
-    """Check formatting when using print """
-    for t in [np.float32, np.double, np.longdouble]:
-        check_float_type_print(t)
-
-def test_complex_type_print():
-    """Check formatting when using print """
-    for t in [np.complex64, np.cdouble, np.clongdouble]:
-        check_complex_type_print(t)
 
 def test_scalar_format():
     """Test the str.format method with NumPy scalar types"""
diff --git a/numpy/core/tests/test_records.py b/numpy/core/tests/test_records.py
index d7c7d16e3..a77eef404 100644
--- a/numpy/core/tests/test_records.py
+++ b/numpy/core/tests/test_records.py
@@ -7,7 +7,6 @@ try:
     import collections.abc as collections_abc
 except ImportError:
     import collections as collections_abc
-import pickle
 import warnings
 import textwrap
 from os import path
@@ -18,6 +17,7 @@ from numpy.testing import (
     assert_, assert_equal, assert_array_equal, assert_array_almost_equal,
     assert_raises, assert_warns
     )
+from numpy.core.numeric import pickle
 
 
 class TestFromrecords(object):
@@ -378,22 +378,27 @@ class TestRecord(object):
     def test_pickle_1(self):
         # Issue #1529
         a = np.array([(1, [])], dtype=[('a', np.int32), ('b', np.int32, 0)])
-        assert_equal(a, pickle.loads(pickle.dumps(a)))
-        assert_equal(a[0], pickle.loads(pickle.dumps(a[0])))
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            assert_equal(a, pickle.loads(pickle.dumps(a, protocol=proto)))
+            assert_equal(a[0], pickle.loads(pickle.dumps(a[0],
+                                                         protocol=proto)))
 
     def test_pickle_2(self):
         a = self.data
-        assert_equal(a, pickle.loads(pickle.dumps(a)))
-        assert_equal(a[0], pickle.loads(pickle.dumps(a[0])))
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            assert_equal(a, pickle.loads(pickle.dumps(a, protocol=proto)))
+            assert_equal(a[0], pickle.loads(pickle.dumps(a[0],
+                                                         protocol=proto)))
 
     def test_pickle_3(self):
         # Issue #7140
         a = self.data
-        pa = pickle.loads(pickle.dumps(a[0]))
-        assert_(pa.flags.c_contiguous)
-        assert_(pa.flags.f_contiguous)
-        assert_(pa.flags.writeable)
-        assert_(pa.flags.aligned)
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            pa = pickle.loads(pickle.dumps(a[0], protocol=proto))
+            assert_(pa.flags.c_contiguous)
+            assert_(pa.flags.f_contiguous)
+            assert_(pa.flags.writeable)
+            assert_(pa.flags.aligned)
 
     def test_objview_record(self):
         # https://github.com/numpy/numpy/issues/2599
diff --git a/numpy/core/tests/test_regression.py b/numpy/core/tests/test_regression.py
index c38625dac..d53f6da84 100644
--- a/numpy/core/tests/test_regression.py
+++ b/numpy/core/tests/test_regression.py
@@ -1,7 +1,6 @@
 from __future__ import division, absolute_import, print_function
 
 import copy
-import pickle
 import sys
 import platform
 import gc
@@ -16,9 +15,11 @@ import numpy as np
 from numpy.testing import (
         assert_, assert_equal, IS_PYPY, assert_almost_equal,
         assert_array_equal, assert_array_almost_equal, assert_raises,
-        assert_warns, suppress_warnings, _assert_valid_refcount, HAS_REFCOUNT,
+        assert_raises_regex, assert_warns, suppress_warnings,
+        _assert_valid_refcount, HAS_REFCOUNT,
         )
 from numpy.compat import asbytes, asunicode, long
+from numpy.core.numeric import pickle
 
 try:
     RecursionError
@@ -38,12 +39,13 @@ class TestRegression(object):
     def test_pickle_transposed(self):
         # Ticket #16
         a = np.transpose(np.array([[2, 9], [7, 0], [3, 8]]))
-        f = BytesIO()
-        pickle.dump(a, f)
-        f.seek(0)
-        b = pickle.load(f)
-        f.close()
-        assert_array_equal(a, b)
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            f = BytesIO()
+            pickle.dump(a, f, protocol=proto)
+            f.seek(0)
+            b = pickle.load(f)
+            f.close()
+            assert_array_equal(a, b)
 
     def test_typeNA(self):
         # Issue gh-515 
@@ -94,12 +96,13 @@ class TestRegression(object):
 
     def test_char_dump(self):
         # Ticket #50
-        f = BytesIO()
         ca = np.char.array(np.arange(1000, 1010), itemsize=4)
-        ca.dump(f)
-        f.seek(0)
-        ca = np.load(f)
-        f.close()
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            f = BytesIO()
+            pickle.dump(ca, f, protocol=proto)
+            f.seek(0)
+            ca = np.load(f)
+            f.close()
 
     def test_noncontiguous_fill(self):
         # Ticket #58.
@@ -358,12 +361,13 @@ class TestRegression(object):
     def test_unpickle_dtype_with_object(self):
         # Implemented in r2840
         dt = np.dtype([('x', int), ('y', np.object_), ('z', 'O')])
-        f = BytesIO()
-        pickle.dump(dt, f)
-        f.seek(0)
-        dt_ = pickle.load(f)
-        f.close()
-        assert_equal(dt, dt_)
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            f = BytesIO()
+            pickle.dump(dt, f, protocol=proto)
+            f.seek(0)
+            dt_ = pickle.load(f)
+            f.close()
+            assert_equal(dt, dt_)
 
     def test_mem_array_creation_invalid_specification(self):
         # Ticket #196
@@ -473,7 +477,8 @@ class TestRegression(object):
 
     def test_pickle_dtype(self):
         # Ticket #251
-        pickle.dumps(float)
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            pickle.dumps(float, protocol=proto)
 
     def test_swap_real(self):
         # Ticket #265
@@ -817,8 +822,9 @@ class TestRegression(object):
         # Ticket #600
         x = np.array(["DROND", "DROND1"], dtype="U6")
         el = x[1]
-        new = pickle.loads(pickle.dumps(el))
-        assert_equal(new, el)
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            new = pickle.loads(pickle.dumps(el, protocol=proto))
+            assert_equal(new, el)
 
     def test_arange_non_native_dtype(self):
         # Ticket #616
@@ -1065,11 +1071,12 @@ class TestRegression(object):
     def test_dot_alignment_sse2(self):
         # Test for ticket #551, changeset r5140
         x = np.zeros((30, 40))
-        y = pickle.loads(pickle.dumps(x))
-        # y is now typically not aligned on a 8-byte boundary
-        z = np.ones((1, y.shape[0]))
-        # This shouldn't cause a segmentation fault:
-        np.dot(z, y)
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            y = pickle.loads(pickle.dumps(x, protocol=proto))
+            # y is now typically not aligned on a 8-byte boundary
+            z = np.ones((1, y.shape[0]))
+            # This shouldn't cause a segmentation fault:
+            np.dot(z, y)
 
     def test_astype_copy(self):
         # Ticket #788, changeset r5155
@@ -1279,9 +1286,12 @@ class TestRegression(object):
 
         assert_(test_record_void_scalar == test_record)
 
-        #Test pickle and unpickle of void and record scalars
-        assert_(pickle.loads(pickle.dumps(test_string)) == test_string)
-        assert_(pickle.loads(pickle.dumps(test_record)) == test_record)
+        # Test pickle and unpickle of void and record scalars
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            assert_(pickle.loads(
+                pickle.dumps(test_string, protocol=proto)) == test_string)
+            assert_(pickle.loads(
+                pickle.dumps(test_record, protocol=proto)) == test_record)
 
     def test_blasdot_uninitialized_memory(self):
         # Ticket #950
@@ -1309,28 +1319,18 @@ class TestRegression(object):
         # Regression test for #1061.
         # Set a size which cannot fit into a 64 bits signed integer
         sz = 2 ** 64
-        good = 'Maximum allowed dimension exceeded'
-        try:
+        with assert_raises_regex(ValueError,
+                                 'Maximum allowed dimension exceeded'):
             np.empty(sz)
-        except ValueError as e:
-            if not str(e) == good:
-                self.fail("Got msg '%s', expected '%s'" % (e, good))
-        except Exception as e:
-            self.fail("Got exception of type %s instead of ValueError" % type(e))
 
     def test_huge_arange(self):
         # Regression test for #1062.
         # Set a size which cannot fit into a 64 bits signed integer
         sz = 2 ** 64
-        good = 'Maximum allowed size exceeded'
-        try:
+        with assert_raises_regex(ValueError,
+                                 'Maximum allowed size exceeded'):
             np.arange(sz)
             assert_(np.size == sz)
-        except ValueError as e:
-            if not str(e) == good:
-                self.fail("Got msg '%s', expected '%s'" % (e, good))
-        except Exception as e:
-            self.fail("Got exception of type %s instead of ValueError" % type(e))
 
     def test_fromiter_bytes(self):
         # Ticket #1058
@@ -1934,11 +1934,12 @@ class TestRegression(object):
 
     def test_pickle_bytes_overwrite(self):
         if sys.version_info[0] >= 3:
-            data = np.array([1], dtype='b')
-            data = pickle.loads(pickle.dumps(data))
-            data[0] = 0xdd
-            bytestring = "\x01  ".encode('ascii')
-            assert_equal(bytestring[0:1], '\x01'.encode('ascii'))
+            for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+                data = np.array([1], dtype='b')
+                data = pickle.loads(pickle.dumps(data, protocol=proto))
+                data[0] = 0xdd
+                bytestring = "\x01  ".encode('ascii')
+                assert_equal(bytestring[0:1], '\x01'.encode('ascii'))
 
     def test_pickle_py2_array_latin1_hack(self):
         # Check that unpickling hacks in Py3 that support
@@ -2240,10 +2241,10 @@ class TestRegression(object):
 
     def test_pickle_empty_string(self):
         # gh-3926
-
-        import pickle
-        test_string = np.string_('')
-        assert_equal(pickle.loads(pickle.dumps(test_string)), test_string)
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            test_string = np.string_('')
+            assert_equal(pickle.loads(
+                pickle.dumps(test_string, protocol=proto)), test_string)
 
     def test_frompyfunc_many_args(self):
         # gh-5672
@@ -2414,3 +2415,8 @@ class TestRegression(object):
             v = str(data[['f1']])
             if HAS_REFCOUNT:
                 assert_(base <= sys.getrefcount(s))
+
+    def test_object_casting_errors(self):
+        # gh-11993
+        arr = np.array(['AAAAA', 18465886.0, 18465886.0], dtype=object)
+        assert_raises(TypeError, arr.astype, 'c8')
diff --git a/numpy/core/tests/test_scalarbuffer.py b/numpy/core/tests/test_scalarbuffer.py
index cb6c521e1..cd520d99b 100644
--- a/numpy/core/tests/test_scalarbuffer.py
+++ b/numpy/core/tests/test_scalarbuffer.py
@@ -28,35 +28,36 @@ scalars_and_codes = [
     (np.cdouble, 'Zd'),
     (np.clongdouble, 'Zg'),
 ]
+scalars_only, codes_only = zip(*scalars_and_codes)
 
 
 @pytest.mark.skipif(sys.version_info.major < 3,
                     reason="Python 2 scalars lack a buffer interface")
 class TestScalarPEP3118(object):
 
-    def test_scalar_match_array(self):
-        for scalar, _ in scalars_and_codes:
-            x = scalar()
-            a = np.array([], dtype=np.dtype(scalar))
-            mv_x = memoryview(x)
-            mv_a = memoryview(a)
-            assert_equal(mv_x.format, mv_a.format)
+    @pytest.mark.parametrize('scalar', scalars_only, ids=codes_only)
+    def test_scalar_match_array(self, scalar):
+        x = scalar()
+        a = np.array([], dtype=np.dtype(scalar))
+        mv_x = memoryview(x)
+        mv_a = memoryview(a)
+        assert_equal(mv_x.format, mv_a.format)
 
-    def test_scalar_dim(self):
-        for scalar, _ in scalars_and_codes:
-            x = scalar()
-            mv_x = memoryview(x)
-            assert_equal(mv_x.itemsize, np.dtype(scalar).itemsize)
-            assert_equal(mv_x.ndim, 0)
-            assert_equal(mv_x.shape, ())
-            assert_equal(mv_x.strides, ())
-            assert_equal(mv_x.suboffsets, ())
+    @pytest.mark.parametrize('scalar', scalars_only, ids=codes_only)
+    def test_scalar_dim(self, scalar):
+        x = scalar()
+        mv_x = memoryview(x)
+        assert_equal(mv_x.itemsize, np.dtype(scalar).itemsize)
+        assert_equal(mv_x.ndim, 0)
+        assert_equal(mv_x.shape, ())
+        assert_equal(mv_x.strides, ())
+        assert_equal(mv_x.suboffsets, ())
 
-    def test_scalar_known_code(self):
-        for scalar, code in scalars_and_codes:
-            x = scalar()
-            mv_x = memoryview(x)
-            assert_equal(mv_x.format, code)
+    @pytest.mark.parametrize('scalar, code', scalars_and_codes, ids=codes_only)
+    def test_scalar_known_code(self, scalar, code):
+        x = scalar()
+        mv_x = memoryview(x)
+        assert_equal(mv_x.format, code)
 
     def test_void_scalar_structured_data(self):
         dt = np.dtype([('name', np.unicode_, 16), ('grades', np.float64, (2,))])
diff --git a/numpy/core/tests/test_shape_base.py b/numpy/core/tests/test_shape_base.py
index 72b3451a4..df819b73f 100644
--- a/numpy/core/tests/test_shape_base.py
+++ b/numpy/core/tests/test_shape_base.py
@@ -191,6 +191,12 @@ class TestVstack(object):
 
 
 class TestConcatenate(object):
+    def test_returns_copy(self):
+        a = np.eye(3)
+        b = np.concatenate([a])
+        b[0, 0] = 2
+        assert b[0, 0] != a[0, 0]
+
     def test_exceptions(self):
         # test axis must be in bounds
         for ndim in [1, 2, 3]:
@@ -367,6 +373,12 @@ def test_stack():
 
 
 class TestBlock(object):
+    def test_returns_copy(self):
+        a = np.eye(3)
+        b = np.block(a)
+        b[0, 0] = 2
+        assert b[0, 0] != a[0, 0]
+
     def test_block_simple_row_wise(self):
         a_2d = np.ones((2, 2))
         b_2d = 2 * a_2d
diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py
index 0e564e305..b83b8ccff 100644
--- a/numpy/core/tests/test_ufunc.py
+++ b/numpy/core/tests/test_ufunc.py
@@ -13,6 +13,7 @@ from numpy.testing import (
     assert_almost_equal, assert_array_almost_equal, assert_no_warnings,
     assert_allclose,
     )
+from numpy.core.numeric import pickle
 
 
 class TestUfuncKwargs(object):
@@ -43,16 +44,17 @@ class TestUfuncKwargs(object):
 
 class TestUfunc(object):
     def test_pickle(self):
-        import pickle
-        assert_(pickle.loads(pickle.dumps(np.sin)) is np.sin)
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            assert_(pickle.loads(pickle.dumps(np.sin,
+                                              protocol=proto)) is np.sin)
 
-        # Check that ufunc not defined in the top level numpy namespace such as
-        # numpy.core._rational_tests.test_add can also be pickled
-        res = pickle.loads(pickle.dumps(_rational_tests.test_add))
-        assert_(res is _rational_tests.test_add)
+            # Check that ufunc not defined in the top level numpy namespace
+            # such as numpy.core._rational_tests.test_add can also be pickled
+            res = pickle.loads(pickle.dumps(_rational_tests.test_add,
+                                            protocol=proto))
+            assert_(res is _rational_tests.test_add)
 
     def test_pickle_withstring(self):
-        import pickle
         astring = (b"cnumpy.core\n_ufunc_reconstruct\np0\n"
                    b"(S'numpy.core.umath'\np1\nS'cos'\np2\ntp3\nRp4\n.")
         assert_(pickle.loads(astring) is np.cos)
@@ -286,20 +288,98 @@ class TestUfunc(object):
         """
         pass
 
-    def test_signature(self):
+    # from include/numpy/ufuncobject.h
+    size_inferred = 2
+    can_ignore = 4
+    def test_signature0(self):
         # the arguments to test_signature are: nin, nout, core_signature
-        # pass
-        enabled, num_dims, ixs = umt.test_signature(2, 1, "(i),(i)->()")
+        enabled, num_dims, ixs, flags, sizes = umt.test_signature(
+            2, 1, "(i),(i)->()")
         assert_equal(enabled, 1)
         assert_equal(num_dims, (1,  1,  0))
         assert_equal(ixs, (0, 0))
+        assert_equal(flags, (self.size_inferred,))
+        assert_equal(sizes, (-1,))
 
+    def test_signature1(self):
         # empty core signature; treat as plain ufunc (with trivial core)
-        enabled, num_dims, ixs = umt.test_signature(2, 1, "(),()->()")
+        enabled, num_dims, ixs, flags, sizes = umt.test_signature(
+            2, 1, "(),()->()")
         assert_equal(enabled, 0)
         assert_equal(num_dims, (0,  0,  0))
         assert_equal(ixs, ())
+        assert_equal(flags, ())
+        assert_equal(sizes, ())
 
+    def test_signature2(self):
+        # more complicated names for variables
+        enabled, num_dims, ixs, flags, sizes = umt.test_signature(
+            2, 1, "(i1,i2),(J_1)->(_kAB)")
+        assert_equal(enabled, 1)
+        assert_equal(num_dims, (2, 1, 1))
+        assert_equal(ixs, (0, 1, 2, 3))
+        assert_equal(flags, (self.size_inferred,)*4)
+        assert_equal(sizes, (-1, -1, -1, -1))
+
+    def test_signature3(self):
+        enabled, num_dims, ixs, flags, sizes = umt.test_signature(
+            2, 1, u"(i1, i12),   (J_1)->(i12, i2)")
+        assert_equal(enabled, 1)
+        assert_equal(num_dims, (2, 1, 2))
+        assert_equal(ixs, (0, 1, 2, 1, 3))
+        assert_equal(flags, (self.size_inferred,)*4)
+        assert_equal(sizes, (-1, -1, -1, -1))
+
+    def test_signature4(self):
+        # matrix_multiply signature from _umath_tests
+        enabled, num_dims, ixs, flags, sizes = umt.test_signature(
+            2, 1, "(n,k),(k,m)->(n,m)")
+        assert_equal(enabled, 1)
+        assert_equal(num_dims, (2, 2, 2))
+        assert_equal(ixs, (0, 1, 1, 2, 0, 2))
+        assert_equal(flags, (self.size_inferred,)*3)
+        assert_equal(sizes, (-1, -1, -1))
+
+    def test_signature5(self):
+        # matmul signature from _umath_tests
+        enabled, num_dims, ixs, flags, sizes = umt.test_signature(
+            2, 1, "(n?,k),(k,m?)->(n?,m?)")
+        assert_equal(enabled, 1)
+        assert_equal(num_dims, (2, 2, 2))
+        assert_equal(ixs, (0, 1, 1, 2, 0, 2))
+        assert_equal(flags, (self.size_inferred | self.can_ignore,
+                             self.size_inferred,
+                             self.size_inferred | self.can_ignore))
+        assert_equal(sizes, (-1, -1, -1))
+
+    def test_signature6(self):
+        enabled, num_dims, ixs, flags, sizes = umt.test_signature(
+            1, 1, "(3)->()")
+        assert_equal(enabled, 1)
+        assert_equal(num_dims, (1, 0))
+        assert_equal(ixs, (0,))
+        assert_equal(flags, (0,))
+        assert_equal(sizes, (3,))
+
+    def test_signature7(self):
+        enabled, num_dims, ixs, flags, sizes = umt.test_signature(
+            3, 1, "(3),(03,3),(n)->(9)")
+        assert_equal(enabled, 1)
+        assert_equal(num_dims, (1, 2, 1, 1))
+        assert_equal(ixs, (0, 0, 0, 1, 2))
+        assert_equal(flags, (0, self.size_inferred, 0))
+        assert_equal(sizes, (3, -1, 9))
+
+    def test_signature8(self):
+        enabled, num_dims, ixs, flags, sizes = umt.test_signature(
+            3, 1, "(3?),(3?,3?),(n)->(9)")
+        assert_equal(enabled, 1)
+        assert_equal(num_dims, (1, 2, 1, 1))
+        assert_equal(ixs, (0, 0, 0, 1, 2))
+        assert_equal(flags, (self.can_ignore, self.size_inferred, 0))
+        assert_equal(sizes, (3, -1, 9))
+
+    def test_signature_failure0(self):
         # in the following calls, a ValueError should be raised because
         # of error in core signature
         # FIXME These should be using assert_raises
@@ -312,6 +392,7 @@ class TestUfunc(object):
         except ValueError:
             pass
 
+    def test_signature_failure1(self):
         # error: parenthesis matching
         msg = "core_sig: parenthesis matching"
         try:
@@ -320,6 +401,7 @@ class TestUfunc(object):
         except ValueError:
             pass
 
+    def test_signature_failure2(self):
         # error: incomplete signature. letters outside of parenthesis are ignored
         msg = "core_sig: incomplete signature"
         try:
@@ -328,6 +410,7 @@ class TestUfunc(object):
         except ValueError:
             pass
 
+    def test_signature_failure3(self):
         # error: incomplete signature. 2 output arguments are specified
         msg = "core_sig: incomplete signature"
         try:
@@ -336,12 +419,6 @@ class TestUfunc(object):
         except ValueError:
             pass
 
-        # more complicated names for variables
-        enabled, num_dims, ixs = umt.test_signature(2, 1, "(i1,i2),(J_1)->(_kAB)")
-        assert_equal(enabled, 1)
-        assert_equal(num_dims, (2, 1, 1))
-        assert_equal(ixs, (0, 1, 2, 3))
-
     def test_get_signature(self):
         assert_equal(umt.inner1d.signature, "(i),(i)->()")
 
@@ -866,6 +943,89 @@ class TestUfunc(object):
         w = np.array([], dtype='f8')
         assert_array_equal(umt.innerwt(a, b, w), np.sum(a*b*w, axis=-1))
 
+    def test_cross1d(self):
+        """Test with fixed-sized signature."""
+        a = np.eye(3)
+        assert_array_equal(umt.cross1d(a, a), np.zeros((3, 3)))
+        out = np.zeros((3, 3))
+        result = umt.cross1d(a[0], a, out)
+        assert_(result is out)
+        assert_array_equal(result, np.vstack((np.zeros(3), a[2], -a[1])))
+        assert_raises(ValueError, umt.cross1d, np.eye(4), np.eye(4))
+        assert_raises(ValueError, umt.cross1d, a, np.arange(4.))
+        assert_raises(ValueError, umt.cross1d, a, np.arange(3.), np.zeros((3, 4)))
+
+    def test_can_ignore_signature(self):
+        # Comparing the effects of ? in signature:
+        # matrix_multiply: (m,n),(n,p)->(m,p)    # all must be there.
+        # matmul:        (m?,n),(n,p?)->(m?,p?)  # allow missing m, p.
+        mat = np.arange(12).reshape((2, 3, 2))
+        single_vec = np.arange(2)
+        col_vec = single_vec[:, np.newaxis]
+        col_vec_array = np.arange(8).reshape((2, 2, 2, 1)) + 1
+        # matrix @ single column vector with proper dimension
+        mm_col_vec = umt.matrix_multiply(mat, col_vec)
+        # matmul does the same thing
+        matmul_col_vec = umt.matmul(mat, col_vec)
+        assert_array_equal(matmul_col_vec, mm_col_vec)
+        # matrix @ vector without dimension making it a column vector.
+        # matrix multiply fails -> missing core dim.
+        assert_raises(ValueError, umt.matrix_multiply, mat, single_vec)
+        # matmul mimicker passes, and returns a vector.
+        matmul_col = umt.matmul(mat, single_vec)
+        assert_array_equal(matmul_col, mm_col_vec.squeeze())
+        # Now with a column array: same as for column vector,
+        # broadcasting sensibly.
+        mm_col_vec = umt.matrix_multiply(mat, col_vec_array)
+        matmul_col_vec = umt.matmul(mat, col_vec_array)
+        assert_array_equal(matmul_col_vec, mm_col_vec)
+        # As above, but for row vector
+        single_vec = np.arange(3)
+        row_vec = single_vec[np.newaxis, :]
+        row_vec_array = np.arange(24).reshape((4, 2, 1, 1, 3)) + 1
+        # row vector @ matrix
+        mm_row_vec = umt.matrix_multiply(row_vec, mat)
+        matmul_row_vec = umt.matmul(row_vec, mat)
+        assert_array_equal(matmul_row_vec, mm_row_vec)
+        # single row vector @ matrix
+        assert_raises(ValueError, umt.matrix_multiply, single_vec, mat)
+        matmul_row = umt.matmul(single_vec, mat)
+        assert_array_equal(matmul_row, mm_row_vec.squeeze())
+        # row vector array @ matrix
+        mm_row_vec = umt.matrix_multiply(row_vec_array, mat)
+        matmul_row_vec = umt.matmul(row_vec_array, mat)
+        assert_array_equal(matmul_row_vec, mm_row_vec)
+        # Now for vector combinations
+        # row vector @ column vector
+        col_vec = row_vec.T
+        col_vec_array = row_vec_array.swapaxes(-2, -1)
+        mm_row_col_vec = umt.matrix_multiply(row_vec, col_vec)
+        matmul_row_col_vec = umt.matmul(row_vec, col_vec)
+        assert_array_equal(matmul_row_col_vec, mm_row_col_vec)
+        # single row vector @ single col vector
+        assert_raises(ValueError, umt.matrix_multiply, single_vec, single_vec)
+        matmul_row_col = umt.matmul(single_vec, single_vec)
+        assert_array_equal(matmul_row_col, mm_row_col_vec.squeeze())
+        # row vector array @ matrix
+        mm_row_col_array = umt.matrix_multiply(row_vec_array, col_vec_array)
+        matmul_row_col_array = umt.matmul(row_vec_array, col_vec_array)
+        assert_array_equal(matmul_row_col_array, mm_row_col_array)
+        # Finally, check that things are *not* squeezed if one gives an
+        # output.
+        out = np.zeros_like(mm_row_col_array)
+        out = umt.matrix_multiply(row_vec_array, col_vec_array, out=out)
+        assert_array_equal(out, mm_row_col_array)
+        out[:] = 0
+        out = umt.matmul(row_vec_array, col_vec_array, out=out)
+        assert_array_equal(out, mm_row_col_array)
+        # And check one cannot put missing dimensions back.
+        out = np.zeros_like(mm_row_col_vec)
+        assert_raises(ValueError, umt.matrix_multiply, single_vec, single_vec,
+                      out)
+        # But fine for matmul, since it is just a broadcast.
+        out = umt.matmul(single_vec, single_vec, out)
+        assert_array_equal(out, mm_row_col_vec.squeeze())
+
     def test_matrix_multiply(self):
         self.compare_matrix_multiply_results(np.long)
         self.compare_matrix_multiply_results(np.double)
diff --git a/numpy/core/umath.py b/numpy/core/umath.py
index 90f5ed37c..a0e8ad427 100644
--- a/numpy/core/umath.py
+++ b/numpy/core/umath.py
@@ -3,29 +3,33 @@ Create the numpy.core.umath namespace for backward compatibility. In v1.16
 the multiarray and umath c-extension modules were merged into a single
 _multiarray_umath extension module. So we replicate the old namespace
 by importing from the extension module.
+
 """
 
 from . import _multiarray_umath
 from numpy.core._multiarray_umath import *
-from numpy.core._multiarray_umath import _add_newdoc_ufunc, _arg, _ones_like
+from numpy.core._multiarray_umath import (
+    _UFUNC_API, _add_newdoc_ufunc, _arg, _ones_like
+    )
 
-__all__ = ['ERR_CALL', 'ERR_DEFAULT', 'ERR_IGNORE', 'ERR_LOG', 'ERR_PRINT',
-    'ERR_RAISE', 'ERR_WARN', 'FLOATING_POINT_SUPPORT', 'FPE_DIVIDEBYZERO',
-    'FPE_INVALID', 'FPE_OVERFLOW', 'FPE_UNDERFLOW', 'NAN', 'NINF', 'NZERO',
-    'PINF', 'PZERO', 'SHIFT_DIVIDEBYZERO', 'SHIFT_INVALID', 'SHIFT_OVERFLOW',
-    'SHIFT_UNDERFLOW', 'UFUNC_BUFSIZE_DEFAULT', 'UFUNC_PYVALS_NAME',
-    '_add_newdoc_ufunc', '_arg',
-    'absolute', 'add', 'arccos', 'arccosh', 'arcsin', 'arcsinh', 'arctan',
-    'arctan2', 'arctanh', 'bitwise_and', 'bitwise_or', 'bitwise_xor', 'cbrt',
-    'ceil', 'conj', 'conjugate', 'copysign', 'cos', 'cosh', 'deg2rad',
-    'degrees', 'divide', 'divmod', 'e', 'equal', 'euler_gamma', 'exp', 'exp2',
-    'expm1', 'fabs', 'floor', 'floor_divide', 'float_power', 'fmax', 'fmin',
-    'fmod', 'frexp', 'frompyfunc', 'gcd', 'geterrobj', 'greater',
-    'greater_equal', 'heaviside', 'hypot', 'invert', 'isfinite', 'isinf',
-    'isnan', 'isnat', 'lcm', 'ldexp', 'left_shift', 'less', 'less_equal',
-    'log', 'log10', 'log1p', 'log2', 'logaddexp', 'logaddexp2', 'logical_and',
-    'logical_not', 'logical_or', 'logical_xor', 'maximum', 'minimum', 'mod',
-    'modf', 'multiply', 'negative', 'nextafter', 'not_equal', 'pi', 'positive',
-    'power', 'rad2deg', 'radians', 'reciprocal', 'remainder', 'right_shift',
-    'rint', 'seterrobj', 'sign', 'signbit', 'sin', 'sinh', 'spacing', 'sqrt',
-    'square', 'subtract', 'tan', 'tanh', 'true_divide', 'trunc']
+__all__ = [
+    '_UFUNC_API', 'ERR_CALL', 'ERR_DEFAULT', 'ERR_IGNORE', 'ERR_LOG',
+    'ERR_PRINT', 'ERR_RAISE', 'ERR_WARN', 'FLOATING_POINT_SUPPORT',
+    'FPE_DIVIDEBYZERO', 'FPE_INVALID', 'FPE_OVERFLOW', 'FPE_UNDERFLOW', 'NAN',
+    'NINF', 'NZERO', 'PINF', 'PZERO', 'SHIFT_DIVIDEBYZERO', 'SHIFT_INVALID',
+    'SHIFT_OVERFLOW', 'SHIFT_UNDERFLOW', 'UFUNC_BUFSIZE_DEFAULT',
+    'UFUNC_PYVALS_NAME', '_add_newdoc_ufunc', '_arg', 'absolute', 'add',
+    'arccos', 'arccosh', 'arcsin', 'arcsinh', 'arctan', 'arctan2', 'arctanh',
+    'bitwise_and', 'bitwise_or', 'bitwise_xor', 'cbrt', 'ceil', 'conj',
+    'conjugate', 'copysign', 'cos', 'cosh', 'deg2rad', 'degrees', 'divide',
+    'divmod', 'e', 'equal', 'euler_gamma', 'exp', 'exp2', 'expm1', 'fabs',
+    'floor', 'floor_divide', 'float_power', 'fmax', 'fmin', 'fmod', 'frexp',
+    'frompyfunc', 'gcd', 'geterrobj', 'greater', 'greater_equal', 'heaviside',
+    'hypot', 'invert', 'isfinite', 'isinf', 'isnan', 'isnat', 'lcm', 'ldexp',
+    'left_shift', 'less', 'less_equal', 'log', 'log10', 'log1p', 'log2',
+    'logaddexp', 'logaddexp2', 'logical_and', 'logical_not', 'logical_or',
+    'logical_xor', 'maximum', 'minimum', 'mod', 'modf', 'multiply', 'negative',
+    'nextafter', 'not_equal', 'pi', 'positive', 'power', 'rad2deg', 'radians',
+    'reciprocal', 'remainder', 'right_shift', 'rint', 'seterrobj', 'sign',
+    'signbit', 'sin', 'sinh', 'spacing', 'sqrt', 'square', 'subtract', 'tan',
+    'tanh', 'true_divide', 'trunc']
diff --git a/numpy/ctypeslib.py b/numpy/ctypeslib.py
index 329c7a280..24cfc6762 100644
--- a/numpy/ctypeslib.py
+++ b/numpy/ctypeslib.py
@@ -54,7 +54,7 @@ from __future__ import division, absolute_import, print_function
 __all__ = ['load_library', 'ndpointer', 'test', 'ctypes_load_library',
            'c_intp', 'as_ctypes', 'as_array']
 
-import sys, os
+import os
 from numpy import integer, ndarray, dtype as _dtype, deprecate, array
 from numpy.core.multiarray import _flagdict, flagsobj
 
diff --git a/numpy/distutils/command/build_ext.py b/numpy/distutils/command/build_ext.py
index f6bd81b6c..18d36480a 100644
--- a/numpy/distutils/command/build_ext.py
+++ b/numpy/distutils/command/build_ext.py
@@ -4,8 +4,7 @@
 from __future__ import division, absolute_import, print_function
 
 import os
-import sys
-import shutil
+import subprocess
 from glob import glob
 
 from distutils.dep_util import newer_group
@@ -15,7 +14,7 @@ from distutils.errors import DistutilsFileError, DistutilsSetupError,\
 from distutils.file_util import copy_file
 
 from numpy.distutils import log
-from numpy.distutils.exec_command import exec_command
+from numpy.distutils.exec_command import filepath_from_subprocess_output
 from numpy.distutils.system_info import combine_paths, system_info
 from numpy.distutils.misc_util import filter_sources, has_f_sources, \
     has_cxx_sources, get_ext_source_files, \
@@ -558,9 +557,12 @@ class build_ext (old_build_ext):
             # correct path when compiling in Cygwin but with normal Win
             # Python
             if dir.startswith('/usr/lib'):
-                s, o = exec_command(['cygpath', '-w', dir], use_tee=False)
-                if not s:
-                    dir = o
+                try:
+                    dir = subprocess.check_output(['cygpath', '-w', dir])
+                except (OSError, subprocess.CalledProcessError):
+                    pass
+                else:
+                    dir = filepath_from_subprocess_output(dir)
             f_lib_dirs.append(dir)
         c_library_dirs.extend(f_lib_dirs)
 
diff --git a/numpy/distutils/command/config.py b/numpy/distutils/command/config.py
index 47bc496cf..6b904d6ef 100644
--- a/numpy/distutils/command/config.py
+++ b/numpy/distutils/command/config.py
@@ -7,6 +7,7 @@ from __future__ import division, absolute_import, print_function
 import os, signal
 import warnings
 import sys
+import subprocess
 
 from distutils.command.config import config as old_config
 from distutils.command.config import LANG_EXT
@@ -14,7 +15,7 @@ from distutils import log
 from distutils.file_util import copy_file
 from distutils.ccompiler import CompileError, LinkError
 import distutils
-from numpy.distutils.exec_command import exec_command
+from numpy.distutils.exec_command import filepath_from_subprocess_output
 from numpy.distutils.mingw32ccompiler import generate_manifest
 from numpy.distutils.command.autodist import (check_gcc_function_attribute,
                                               check_gcc_variable_attribute,
@@ -121,9 +122,13 @@ Original exception was: %s, and the Compiler class was %s
                         # correct path when compiling in Cygwin but with
                         # normal Win Python
                         if d.startswith('/usr/lib'):
-                            s, o = exec_command(['cygpath', '-w', d],
-                                               use_tee=False)
-                            if not s: d = o
+                            try:
+                                d = subprocess.check_output(['cygpath',
+                                                             '-w', d])
+                            except (OSError, subprocess.CalledProcessError):
+                                pass
+                            else:
+                                d = filepath_from_subprocess_output(d)
                         library_dirs.append(d)
                     for libname in self.fcompiler.libraries or []:
                         if libname not in libraries:
@@ -436,7 +441,6 @@ int main (void)
                       "involving running executable on the target machine.\n" \
                       "+++++++++++++++++++++++++++++++++++++++++++++++++\n",
                       DeprecationWarning, stacklevel=2)
-        from distutils.ccompiler import CompileError, LinkError
         self._check_compiler()
         exitcode, output = 255, ''
         try:
@@ -450,8 +454,24 @@ int main (void)
                 grabber.restore()
                 raise
             exe = os.path.join('.', exe)
-            exitstatus, output = exec_command(exe, execute_in='.',
-                                              use_tee=use_tee)
+            try:
+                # specify cwd arg for consistency with
+                # historic usage pattern of exec_command()
+                # also, note that exe appears to be a string,
+                # which exec_command() handled, but we now
+                # use a list for check_output() -- this assumes
+                # that exe is always a single command
+                output = subprocess.check_output([exe], cwd='.')
+            except subprocess.CalledProcessError as exc:
+                exitstatus = exc.returncode
+                output = ''
+            except OSError:
+                # preserve the EnvironmentError exit status
+                # used historically in exec_command()
+                exitstatus = 127
+                output = ''
+            else:
+                output = filepath_from_subprocess_output(output)
             if hasattr(os, 'WEXITSTATUS'):
                 exitcode = os.WEXITSTATUS(exitstatus)
                 if os.WIFSIGNALED(exitstatus):
diff --git a/numpy/distutils/command/config_compiler.py b/numpy/distutils/command/config_compiler.py
index 5e638fecc..bf170063e 100644
--- a/numpy/distutils/command/config_compiler.py
+++ b/numpy/distutils/command/config_compiler.py
@@ -5,9 +5,12 @@ from numpy.distutils import log
 
 #XXX: Linker flags
 
-def show_fortran_compilers(_cache=[]):
-    # Using cache to prevent infinite recursion
-    if _cache: return
+def show_fortran_compilers(_cache=None):
+    # Using cache to prevent infinite recursion.
+    if _cache:
+        return
+    elif _cache is None:
+        _cache = []
     _cache.append(1)
     from numpy.distutils.fcompiler import show_fcompilers
     import distutils.core
diff --git a/numpy/distutils/core.py b/numpy/distutils/core.py
index d9e125368..70cc37caa 100644
--- a/numpy/distutils/core.py
+++ b/numpy/distutils/core.py
@@ -71,12 +71,14 @@ def _dict_append(d, **kws):
         else:
             raise TypeError(repr(type(dv)))
 
-def _command_line_ok(_cache=[]):
+def _command_line_ok(_cache=None):
     """ Return True if command line does not contain any
     help or display requests.
     """
     if _cache:
         return _cache[0]
+    elif _cache is None:
+        _cache = []
     ok = True
     display_opts = ['--'+n for n in Distribution.display_option_names]
     for o in Distribution.display_options:
diff --git a/numpy/distutils/exec_command.py b/numpy/distutils/exec_command.py
index 8118e2fc3..af7810d75 100644
--- a/numpy/distutils/exec_command.py
+++ b/numpy/distutils/exec_command.py
@@ -61,6 +61,24 @@ import locale
 from numpy.distutils.misc_util import is_sequence, make_temp_file
 from numpy.distutils import log
 
+def filepath_from_subprocess_output(output):
+    """
+    Convert `bytes` in the encoding used by a subprocess into a filesystem-appropriate `str`.
+
+    Inherited from `exec_command`, and possibly incorrect.
+    """
+    output = output.decode(locale.getpreferredencoding(False),
+                           errors='replace')
+    output = output.replace('\r\n', '\n')
+    # Another historical oddity
+    if output[-1:] == '\n':
+        output = output[:-1]
+    # stdio uses bytes in python 2, so to avoid issues, we simply
+    # remove all non-ascii characters
+    if sys.version_info < (3, 0):
+        output = output.encode('ascii', errors='replace')
+    return output
+
 def temp_file_name():
     fo, name = make_temp_file()
     fo.close()
diff --git a/numpy/distutils/fcompiler/gnu.py b/numpy/distutils/fcompiler/gnu.py
index 0ebbe79dc..f151809c7 100644
--- a/numpy/distutils/fcompiler/gnu.py
+++ b/numpy/distutils/fcompiler/gnu.py
@@ -8,10 +8,10 @@ import platform
 import tempfile
 import hashlib
 import base64
+import subprocess
 from subprocess import Popen, PIPE, STDOUT
-from copy import copy
+from numpy.distutils.exec_command import filepath_from_subprocess_output
 from numpy.distutils.fcompiler import FCompiler
-from numpy.distutils.exec_command import exec_command
 from numpy.distutils.compat import get_exception
 from numpy.distutils.system_info import system_info
 
@@ -160,9 +160,13 @@ class GnuFCompiler(FCompiler):
         return opt
 
     def get_libgcc_dir(self):
-        status, output = exec_command(
-            self.compiler_f77 + ['-print-libgcc-file-name'], use_tee=0)
-        if not status:
+        try:
+            output = subprocess.check_output(self.compiler_f77 +
+                                            ['-print-libgcc-file-name'])
+        except (OSError, subprocess.CalledProcessError):
+            pass
+        else:
+            output = filepath_from_subprocess_output(output)
             return os.path.dirname(output)
         return None
 
@@ -177,9 +181,13 @@ class GnuFCompiler(FCompiler):
         libgfortran_dir = None
         if libgfortran_name:
             find_lib_arg = ['-print-file-name={0}'.format(libgfortran_name)]
-            status, output = exec_command(
-                self.compiler_f77 + find_lib_arg, use_tee=0)
-            if not status:
+            try:
+                output = subprocess.check_output(
+                                       self.compiler_f77 + find_lib_arg)
+            except (OSError, subprocess.CalledProcessError):
+                pass
+            else:
+                output = filepath_from_subprocess_output(output)
                 libgfortran_dir = os.path.dirname(output)
         return libgfortran_dir
 
@@ -373,8 +381,12 @@ class Gnu95FCompiler(GnuFCompiler):
         return opt
 
     def get_target(self):
-        status, output = exec_command(self.compiler_f77 + ['-v'], use_tee=0)
-        if not status:
+        try:
+            output = subprocess.check_output(self.compiler_f77 + ['-v'])
+        except (OSError, subprocess.CalledProcessError):
+            pass
+        else:
+            output = filepath_from_subprocess_output(output)
             m = TARGET_R.search(output)
             if m:
                 return m.group(1)
diff --git a/numpy/distutils/fcompiler/ibm.py b/numpy/distutils/fcompiler/ibm.py
index d0c2202d4..c4cb2fca7 100644
--- a/numpy/distutils/fcompiler/ibm.py
+++ b/numpy/distutils/fcompiler/ibm.py
@@ -3,9 +3,10 @@ from __future__ import division, absolute_import, print_function
 import os
 import re
 import sys
+import subprocess
 
 from numpy.distutils.fcompiler import FCompiler
-from numpy.distutils.exec_command import exec_command, find_executable
+from numpy.distutils.exec_command import find_executable
 from numpy.distutils.misc_util import make_temp_file
 from distutils import log
 
@@ -35,9 +36,13 @@ class IBMFCompiler(FCompiler):
             lslpp = find_executable('lslpp')
             xlf = find_executable('xlf')
             if os.path.exists(xlf) and os.path.exists(lslpp):
-                s, o = exec_command(lslpp + ' -Lc xlfcmp')
-                m = re.search(r'xlfcmp:(?P<version>\d+([.]\d+)+)', o)
-                if m: version = m.group('version')
+                try:
+                    o = subprocess.check_output([lslpp, '-Lc', 'xlfcmp'])
+                except (OSError, subprocess.CalledProcessError):
+                    pass
+                else:
+                    m = re.search(r'xlfcmp:(?P<version>\d+([.]\d+)+)', o)
+                    if m: version = m.group('version')
 
         xlf_dir = '/etc/opt/ibmcmp/xlf'
         if version is None and os.path.isdir(xlf_dir):
diff --git a/numpy/distutils/fcompiler/pg.py b/numpy/distutils/fcompiler/pg.py
index e6c816baa..99071800a 100644
--- a/numpy/distutils/fcompiler/pg.py
+++ b/numpy/distutils/fcompiler/pg.py
@@ -2,7 +2,6 @@
 from __future__ import division, absolute_import, print_function
 
 import sys
-import os
 
 from numpy.distutils.fcompiler import FCompiler, dummy_fortran_file
 from sys import platform
@@ -62,8 +61,6 @@ class PGroupFCompiler(FCompiler):
 
 
 if sys.version_info >= (3, 5):
-    import subprocess
-    import shlex
     import functools
 
     class PGroupFlangCompiler(FCompiler):
diff --git a/numpy/distutils/lib2def.py b/numpy/distutils/lib2def.py
index 0a5364566..2d013a1e3 100644
--- a/numpy/distutils/lib2def.py
+++ b/numpy/distutils/lib2def.py
@@ -2,7 +2,6 @@ from __future__ import division, absolute_import, print_function
 
 import re
 import sys
-import os
 import subprocess
 
 __doc__ = """This module generates a DEF file from the symbols in
diff --git a/numpy/distutils/mingw32ccompiler.py b/numpy/distutils/mingw32ccompiler.py
index e7fa7bc0d..e6bbe1996 100644
--- a/numpy/distutils/mingw32ccompiler.py
+++ b/numpy/distutils/mingw32ccompiler.py
@@ -71,7 +71,6 @@ class Mingw32CCompiler(distutils.cygwinccompiler.CygwinCCompiler):
         # we need to support 3.2 which doesn't match the standard
         # get_versions methods regex
         if self.gcc_version is None:
-            import re
             p = subprocess.Popen(['gcc', '-dumpversion'], shell=True,
                                  stdout=subprocess.PIPE)
             out_string = p.stdout.read()
diff --git a/numpy/distutils/misc_util.py b/numpy/distutils/misc_util.py
index 8305aeae5..073e841e8 100644
--- a/numpy/distutils/misc_util.py
+++ b/numpy/distutils/misc_util.py
@@ -84,7 +84,9 @@ def get_num_build_jobs():
     Get number of parallel build jobs set by the --parallel command line
     argument of setup.py
     If the command did not receive a setting the environment variable
-    NPY_NUM_BUILD_JOBS checked and if that is unset it returns 1.
+    NPY_NUM_BUILD_JOBS is checked. If that is unset, return the number of
+    processors on the system, with a maximum of 8 (to prevent
+    overloading the system if there a lot of CPUs).
 
     Returns
     -------
@@ -97,6 +99,7 @@ def get_num_build_jobs():
         cpu_count = len(os.sched_getaffinity(0))
     except AttributeError:
         cpu_count = multiprocessing.cpu_count()
+    cpu_count = min(cpu_count, 8)
     envjobs = int(os.environ.get("NPY_NUM_BUILD_JOBS", cpu_count))
     dist = get_distribution()
     # may be None during configuration
@@ -1564,7 +1567,6 @@ class Configuration(object):
         """Common implementation for add_library and add_installed_library. Do
         not use directly"""
         build_info = copy.copy(build_info)
-        name = name #+ '__OF__' + self.name
         build_info['sources'] = sources
 
         # Sometimes, depends is not set up to an empty list by default, and if
@@ -2009,7 +2011,6 @@ class Configuration(object):
                     f.write('version = %r\n' % (version))
                     f.close()
 
-                import atexit
                 def rm_file(f=target,p=self.info):
                     if delete:
                         try: os.remove(f); p('removed '+f)
@@ -2051,7 +2052,6 @@ class Configuration(object):
                     f.write('version = %r\n' % (version))
                     f.close()
 
-                import atexit
                 def rm_file(f=target,p=self.info):
                     if delete:
                         try: os.remove(f); p('removed '+f)
diff --git a/numpy/distutils/npy_pkg_config.py b/numpy/distutils/npy_pkg_config.py
index 6fe517659..ea16e772d 100644
--- a/numpy/distutils/npy_pkg_config.py
+++ b/numpy/distutils/npy_pkg_config.py
@@ -5,9 +5,9 @@ import re
 import os
 
 if sys.version_info[0] < 3:
-    from ConfigParser import RawConfigParser, NoOptionError
+    from ConfigParser import RawConfigParser
 else:
-    from configparser import RawConfigParser, NoOptionError
+    from configparser import RawConfigParser
 
 __all__ = ['FormatError', 'PkgNotFound', 'LibraryInfo', 'VariableSet',
         'read_config', 'parse_flags']
@@ -414,7 +414,6 @@ if __name__ == '__main__':
             print("%s\t%s - %s" % (info.name, info.name, info.description))
 
     pkg_name = args[1]
-    import os
     d = os.environ.get('NPY_PKG_CONFIG_PATH')
     if d:
         info = read_config(pkg_name, ['numpy/core/lib/npy-pkg-config', '.', d])
diff --git a/numpy/distutils/system_info.py b/numpy/distutils/system_info.py
index a5693bdd5..79adcc334 100644
--- a/numpy/distutils/system_info.py
+++ b/numpy/distutils/system_info.py
@@ -126,7 +126,6 @@ import os
 import re
 import copy
 import warnings
-import atexit
 from glob import glob
 from functools import reduce
 if sys.version_info[0] < 3:
@@ -147,7 +146,8 @@ from distutils import log
 from distutils.util import get_platform
 
 from numpy.distutils.exec_command import (
-    find_executable, exec_command, get_pythonexe)
+    find_executable, filepath_from_subprocess_output,
+    get_pythonexe)
 from numpy.distutils.misc_util import (is_sequence, is_string,
                                        get_shared_lib_extension)
 from numpy.distutils.command.config import config as cmd_config
@@ -2243,8 +2243,12 @@ class _pkg_config_info(system_info):
 
     def get_config_output(self, config_exe, option):
         cmd = config_exe + ' ' + self.append_config_exe + ' ' + option
-        s, o = exec_command(cmd, use_tee=0)
-        if not s:
+        try:
+            o = subprocess.check_output(cmd)
+        except (OSError, subprocess.CalledProcessError):
+            pass
+        else:
+            o = filepath_from_subprocess_output(o)
             return o
 
     def calc_info(self):
diff --git a/numpy/doc/basics.py b/numpy/doc/basics.py
index 4d3ab046e..c87a40ccd 100644
--- a/numpy/doc/basics.py
+++ b/numpy/doc/basics.py
@@ -9,39 +9,157 @@ Array types and conversions between types
 NumPy supports a much greater variety of numerical types than Python does.
 This section shows which are available, and how to modify an array's data-type.
 
-============ ==========================================================
-Data type    Description
-============ ==========================================================
-``bool_``    Boolean (True or False) stored as a byte
-``int_``     Default integer type (same as C ``long``; normally either
-             ``int64`` or ``int32``)
-intc         Identical to C ``int`` (normally ``int32`` or ``int64``)
-intp         Integer used for indexing (same as C ``ssize_t``; normally
-             either ``int32`` or ``int64``)
-int8         Byte (-128 to 127)
-int16        Integer (-32768 to 32767)
-int32        Integer (-2147483648 to 2147483647)
-int64        Integer (-9223372036854775808 to 9223372036854775807)
-uint8        Unsigned integer (0 to 255)
-uint16       Unsigned integer (0 to 65535)
-uint32       Unsigned integer (0 to 4294967295)
-uint64       Unsigned integer (0 to 18446744073709551615)
-``float_``   Shorthand for ``float64``.
-float16      Half precision float: sign bit, 5 bits exponent,
-             10 bits mantissa
-float32      Single precision float: sign bit, 8 bits exponent,
-             23 bits mantissa
-float64      Double precision float: sign bit, 11 bits exponent,
-             52 bits mantissa
-``complex_`` Shorthand for ``complex128``.
-complex64    Complex number, represented by two 32-bit floats (real
-             and imaginary components)
-complex128   Complex number, represented by two 64-bit floats (real
-             and imaginary components)
-============ ==========================================================
-
-Additionally to ``intc`` the platform dependent C integer types ``short``,
-``long``, ``longlong`` and their unsigned versions are defined.
+The primitive types supported are tied closely to those in C:
+
+.. list-table::
+    :header-rows: 1
+
+    * - Numpy type
+      - C type
+      - Description
+
+    * - `np.bool`
+      - ``bool``
+      - Boolean (True or False) stored as a byte
+
+    * - `np.byte`
+      - ``signed char``
+      - Platform-defined
+
+    * - `np.ubyte`
+      - ``unsigned char``
+      - Platform-defined
+
+    * - `np.short`
+      - ``short``
+      - Platform-defined
+
+    * - `np.ushort`
+      - ``unsigned short``
+      - Platform-defined
+
+    * - `np.intc`
+      - ``int``
+      - Platform-defined
+
+    * - `np.uintc`
+      - ``unsigned int``
+      - Platform-defined
+
+    * - `np.int_`
+      - ``long``
+      - Platform-defined
+
+    * - `np.uint`
+      - ``unsigned long``
+      - Platform-defined
+
+    * - `np.longlong`
+      - ``long long``
+      - Platform-defined
+
+    * - `np.ulonglong`
+      - ``unsigned long long``
+      - Platform-defined
+
+    * - `np.half` / `np.float16`
+      -
+      - Half precision float:
+        sign bit, 5 bits exponent, 10 bits mantissa
+
+    * - `np.single`
+      - ``float``
+      - Platform-defined single precision float:
+        typically sign bit, 8 bits exponent, 23 bits mantissa
+
+    * - `np.double`
+      - ``double``
+      - Platform-defined double precision float:
+        typically sign bit, 11 bits exponent, 52 bits mantissa.
+
+    * - `np.longdouble`
+      - ``long double``
+      - Platform-defined extended-precision float
+
+    * - `np.csingle`
+      - ``float complex``
+      - Complex number, represented by two single-precision floats (real and imaginary components)
+
+    * - `np.cdouble`
+      - ``double complex``
+      - Complex number, represented by two double-precision floats (real and imaginary components).
+
+    * - `np.clongdouble`
+      - ``long double complex``
+      - Complex number, represented by two extended-precision floats (real and imaginary components).
+
+
+Since many of these have platform-dependent definitions, a set of fixed-size
+aliases are provided:
+
+.. list-table::
+    :header-rows: 1
+
+    * - Numpy type
+      - C type
+      - Description
+
+    * - `np.int8`
+      - ``int8_t``
+      - Byte (-128 to 127)
+
+    * - `np.int16`
+      - ``int16_t``
+      - Integer (-32768 to 32767)
+
+    * - `np.int32`
+      - ``int32_t``
+      - Integer (-2147483648 to 2147483647)
+
+    * - `np.int64`
+      - ``int64_t``
+      - Integer (-9223372036854775808 to 9223372036854775807)
+
+    * - `np.uint8`
+      - ``uint8_t``
+      - Unsigned integer (0 to 255)
+
+    * - `np.uint16`
+      - ``uint16_t``
+      - Unsigned integer (0 to 65535)
+
+    * - `np.uint32`
+      - ``uint32_t``
+      - Unsigned integer (0 to 4294967295)
+
+    * - `np.uint64`
+      - ``uint64_t``
+      - Unsigned integer (0 to 18446744073709551615)
+
+    * - `np.intp`
+      - ``intptr_t``
+      - Integer used for indexing, typically the same as ``ssize_t``
+
+    * - `np.uintp`
+      - ``uintptr_t``
+      - Integer large enough to hold a pointer
+
+    * - `np.float32`
+      - ``float``
+      -
+
+    * - `np.float64` / `np.float_`
+      - ``double``
+      - Note that this matches the precision of the builtin python `float`.
+
+    * - `np.complex64`
+      - ``float complex``
+      - Complex number, represented by two 32-bit floats (real and imaginary components)
+
+    * - `np.complex128` / `np.complex_`
+      - ``double complex``
+      - Note that this matches the precision of the builtin python `complex`.
+
 
 NumPy numerical types are instances of ``dtype`` (data-type) objects, each
 having unique characteristics.  Once you have imported NumPy using
diff --git a/numpy/f2py/__init__.py b/numpy/f2py/__init__.py
index fbb64f762..23a4b7c41 100644
--- a/numpy/f2py/__init__.py
+++ b/numpy/f2py/__init__.py
@@ -7,6 +7,10 @@ from __future__ import division, absolute_import, print_function
 __all__ = ['run_main', 'compile', 'f2py_testing']
 
 import sys
+import subprocess
+import os
+
+import numpy as np
 
 from . import f2py2e
 from . import f2py_testing
@@ -32,8 +36,12 @@ def compile(source,
         Fortran source of module / subroutine to compile
     modulename : str, optional
         The name of the compiled python module
-    extra_args : str, optional
+    extra_args : str or list, optional
         Additional parameters passed to f2py
+
+        .. versionchanged:: 1.16.0
+            A list of args may also be provided.
+
     verbose : bool, optional
         Print f2py output to screen
     source_fn : str, optional
@@ -48,25 +56,48 @@ def compile(source,
         .. versionadded:: 1.11.0
 
     """
-    from numpy.distutils.exec_command import exec_command
     import tempfile
+    import shlex
+
     if source_fn is None:
-        f = tempfile.NamedTemporaryFile(suffix=extension)
+        f, fname = tempfile.mkstemp(suffix=extension)
+        # f is a file descriptor so need to close it
+        # carefully -- not with .close() directly
+        os.close(f)
     else:
-        f = open(source_fn, 'w')
+        fname = source_fn
 
     try:
-        f.write(source)
-        f.flush()
+        with open(fname, 'w') as f:
+            f.write(str(source))
+
+        args = ['-c', '-m', modulename, f.name]
+
+        if isinstance(extra_args, np.compat.basestring):
+            is_posix = (os.name == 'posix')
+            extra_args = shlex.split(extra_args, posix=is_posix)
+
+        args.extend(extra_args)
 
-        args = ' -c -m {} {} {}'.format(modulename, f.name, extra_args)
-        c = '{} -c "import numpy.f2py as f2py2e;f2py2e.main()" {}'
-        c = c.format(sys.executable, args)
-        status, output = exec_command(c)
+        c = [sys.executable,
+             '-c',
+             'import numpy.f2py as f2py2e;f2py2e.main()'] + args
+        try:
+            output = subprocess.check_output(c)
+        except subprocess.CalledProcessError as exc:
+            status = exc.returncode
+            output = ''
+        except OSError:
+            # preserve historic status code used by exec_command()
+            status = 127
+            output = ''
+        else:
+            status = 0
         if verbose:
             print(output)
     finally:
-        f.close()
+        if source_fn is None:
+            os.remove(fname)
     return status
 
 from numpy._pytesttester import PytestTester
diff --git a/numpy/f2py/crackfortran.py b/numpy/f2py/crackfortran.py
index 99ff030e3..361203a57 100755
--- a/numpy/f2py/crackfortran.py
+++ b/numpy/f2py/crackfortran.py
@@ -33,7 +33,7 @@ Note: f2py directive: <commentchar>f2py<line> is read as <line>
 Note: pythonmodule is introduced to represent Python module
 
 Usage:
-  `postlist=crackfortran(files,funcs)`
+  `postlist=crackfortran(files)`
   `postlist` contains declaration information read from the list of files `files`.
   `crack2fortran(postlist)` returns a fortran code to be saved to pyf-file
 
@@ -3341,7 +3341,7 @@ if __name__ == "__main__":
   and also be sure that the files do not contain programs without program statement).
 """, 0)
 
-    postlist = crackfortran(files, funcs)
+    postlist = crackfortran(files)
     if pyffilename:
         outmess('Writing fortran code to file %s\n' % repr(pyffilename), 0)
         pyf = crack2fortran(postlist)
diff --git a/numpy/f2py/setup.py b/numpy/f2py/setup.py
index e95b9584f..c0c50ce54 100644
--- a/numpy/f2py/setup.py
+++ b/numpy/f2py/setup.py
@@ -18,10 +18,6 @@ Pearu Peterson
 """
 from __future__ import division, print_function
 
-import os
-import sys
-from distutils.dep_util import newer
-from numpy.distutils import log
 from numpy.distutils.core import setup
 from numpy.distutils.misc_util import Configuration
 
diff --git a/numpy/f2py/tests/test_array_from_pyobj.py b/numpy/f2py/tests/test_array_from_pyobj.py
index 8b021491f..a80090185 100644
--- a/numpy/f2py/tests/test_array_from_pyobj.py
+++ b/numpy/f2py/tests/test_array_from_pyobj.py
@@ -1,14 +1,14 @@
 from __future__ import division, absolute_import, print_function
 
-import unittest
 import os
 import sys
 import copy
+import pytest
 
 from numpy import (
     array, alltrue, ndarray, zeros, dtype, intp, clongdouble
     )
-from numpy.testing import assert_, assert_equal, SkipTest
+from numpy.testing import assert_, assert_equal
 from numpy.core.multiarray import typeinfo
 from . import util
 
@@ -24,7 +24,7 @@ def setup_module():
 
     # Check compiler availability first
     if not util.has_c_compiler():
-        raise SkipTest("No C compiler available")
+        pytest.skip("No C compiler available")
 
     if wrap is None:
         config_code = """
@@ -304,10 +304,16 @@ class TestIntent(object):
         assert_(not intent.in_.is_intent('c'))
 
 
-class _test_shared_memory(object):
+class TestSharedMemory(object):
     num2seq = [1, 2]
     num23seq = [[1, 2, 3], [4, 5, 6]]
 
+    @pytest.fixture(autouse=True, scope='class', params=_type_names)
+    def setup_type(self, request):
+        request.cls.type = Type(request.param)
+        request.cls.array = lambda self, dims, intent, obj: \
+            Array(Type(request.param), dims, intent, obj)
+
     def test_in_from_2seq(self):
         a = self.array([2], intent.in_, self.num2seq)
         assert_(not a.has_shared_memory())
@@ -573,12 +579,3 @@ class _test_shared_memory(object):
             assert_(obj.flags['FORTRAN'])  # obj attributes changed inplace!
             assert_(not obj.flags['CONTIGUOUS'])
             assert_(obj.dtype.type is self.type.dtype)  # obj changed inplace!
-
-
-for t in _type_names:
-    exec('''\
-class TestGen_%s(_test_shared_memory):
-    def setup(self):
-        self.type = Type(%r)
-    array = lambda self,dims,intent,obj: Array(Type(%r),dims,intent,obj)
-''' % (t, t, t))
diff --git a/numpy/f2py/tests/test_callback.py b/numpy/f2py/tests/test_callback.py
index 4e74947b0..824ef7b0c 100644
--- a/numpy/f2py/tests/test_callback.py
+++ b/numpy/f2py/tests/test_callback.py
@@ -62,9 +62,9 @@ cf2py  intent(out) a
     """
 
     @pytest.mark.slow
-    def test_all(self):
-        for name in "t,t2".split(","):
-            self.check_function(name)
+    @pytest.mark.parametrize('name', 't,t2'.split(','))
+    def test_all(self, name):
+        self.check_function(name)
 
     @pytest.mark.slow
     def test_docstring(self):
diff --git a/numpy/f2py/tests/test_compile_function.py b/numpy/f2py/tests/test_compile_function.py
new file mode 100644
index 000000000..74e0804e2
--- /dev/null
+++ b/numpy/f2py/tests/test_compile_function.py
@@ -0,0 +1,108 @@
+"""See https://github.com/numpy/numpy/pull/11937.
+
+"""
+from __future__ import division, absolute_import, print_function
+
+import sys
+import os
+import uuid
+from importlib import import_module
+import pytest
+
+import numpy.f2py
+
+from numpy.testing import assert_equal
+from . import util
+
+
+def setup_module():
+    if sys.platform == 'win32' and sys.version_info[0] < 3:
+        pytest.skip('Fails with MinGW64 Gfortran (Issue #9673)')
+    if not util.has_c_compiler():
+        pytest.skip("Needs C compiler")
+    if not util.has_f77_compiler():
+        pytest.skip('Needs FORTRAN 77 compiler')
+
+
+# extra_args can be a list (since gh-11937) or string.
+# also test absence of extra_args
+@pytest.mark.parametrize(
+    "extra_args", [['--noopt', '--debug'], '--noopt --debug', '']
+    )
+def test_f2py_init_compile(extra_args):
+    # flush through the f2py __init__ compile() function code path as a
+    # crude test for input handling following migration from
+    # exec_command() to subprocess.check_output() in gh-11937
+
+    # the Fortran 77 syntax requires 6 spaces before any commands, but
+    # more space may be added/
+    fsource =  """
+        integer function foo()
+        foo = 10 + 5
+        return
+        end
+    """
+    # use various helper functions in util.py to enable robust build /
+    # compile and reimport cycle in test suite
+    moddir = util.get_module_dir()
+    modname = util.get_temp_module_name()
+
+    cwd = os.getcwd()
+    target = os.path.join(moddir, str(uuid.uuid4()) + '.f')
+    # try running compile() with and without a source_fn provided so
+    # that the code path where a temporary file for writing Fortran
+    # source is created is also explored
+    for source_fn in [target, None]:
+        # mimic the path changing behavior used by build_module() in
+        # util.py, but don't actually use build_module() because it has
+        # its own invocation of subprocess that circumvents the
+        # f2py.compile code block under test
+        try:
+            os.chdir(moddir)
+            ret_val = numpy.f2py.compile(
+                fsource,
+                modulename=modname,
+                extra_args=extra_args,
+                source_fn=source_fn
+                )
+        finally:
+            os.chdir(cwd)
+
+        # check for compile success return value
+        assert_equal(ret_val, 0)
+
+        # we are not currently able to import the Python-Fortran
+        # interface module on Windows / Appveyor, even though we do get
+        # successful compilation on that platform with Python 3.x
+        if sys.platform != 'win32':
+            # check for sensible result of Fortran function; that means
+            # we can import the module name in Python and retrieve the
+            # result of the sum operation
+            return_check = import_module(modname)
+            calc_result = return_check.foo()
+            assert_equal(calc_result, 15)
+
+
+def test_f2py_init_compile_failure():
+    # verify an appropriate integer status value returned by
+    # f2py.compile() when invalid Fortran is provided
+    ret_val = numpy.f2py.compile(b"invalid")
+    assert_equal(ret_val, 1)
+
+
+def test_f2py_init_compile_bad_cmd():
+    # verify that usage of invalid command in f2py.compile() returns
+    # status value of 127 for historic consistency with exec_command()
+    # error handling
+
+    # patch the sys Python exe path temporarily to induce an OSError
+    # downstream NOTE: how bad of an idea is this patching?
+    try:
+        temp = sys.executable
+        sys.executable = 'does not exist'
+
+        # the OSError should take precedence over invalid Fortran
+        ret_val = numpy.f2py.compile(b"invalid")
+        assert_equal(ret_val, 127)
+    finally:
+        sys.executable = temp
diff --git a/numpy/f2py/tests/test_quoted_character.py b/numpy/f2py/tests/test_quoted_character.py
index 09fc7328f..38e380802 100644
--- a/numpy/f2py/tests/test_quoted_character.py
+++ b/numpy/f2py/tests/test_quoted_character.py
@@ -1,11 +1,20 @@
+"""See https://github.com/numpy/numpy/pull/10676.
+
+"""
 from __future__ import division, absolute_import, print_function
 
 import sys
+import os
+import uuid
+from importlib import import_module
 import pytest
 
+import numpy.f2py
+
 from numpy.testing import assert_equal
 from . import util
 
+
 class TestQuotedCharacter(util.F2PyTest):
     code = """
       SUBROUTINE FOO(OUT1, OUT2, OUT3, OUT4, OUT5, OUT6)
diff --git a/numpy/f2py/tests/test_return_character.py b/numpy/f2py/tests/test_return_character.py
index 4a94c5b98..fc3a58d36 100644
--- a/numpy/f2py/tests/test_return_character.py
+++ b/numpy/f2py/tests/test_return_character.py
@@ -82,9 +82,9 @@ cf2py    intent(out) ts
     """
 
     @pytest.mark.slow
-    def test_all(self):
-        for name in "t0,t1,t5,s0,s1,s5,ss".split(","):
-            self.check_function(getattr(self.module, name))
+    @pytest.mark.parametrize('name', 't0,t1,t5,s0,s1,s5,ss'.split(','))
+    def test_all(self, name):
+        self.check_function(getattr(self.module, name))
 
 
 class TestF90ReturnCharacter(TestReturnCharacter):
@@ -141,6 +141,6 @@ end module f90_return_char
     """
 
     @pytest.mark.slow
-    def test_all(self):
-        for name in "t0,t1,t5,ts,s0,s1,s5,ss".split(","):
-            self.check_function(getattr(self.module.f90_return_char, name))
+    @pytest.mark.parametrize('name', 't0,t1,t5,ts,s0,s1,s5,ss'.split(','))
+    def test_all(self, name):
+        self.check_function(getattr(self.module.f90_return_char, name))
diff --git a/numpy/f2py/tests/test_return_complex.py b/numpy/f2py/tests/test_return_complex.py
index 152cfc960..43c884dfb 100644
--- a/numpy/f2py/tests/test_return_complex.py
+++ b/numpy/f2py/tests/test_return_complex.py
@@ -105,9 +105,9 @@ cf2py    intent(out) td
     """
 
     @pytest.mark.slow
-    def test_all(self):
-        for name in "t0,t8,t16,td,s0,s8,s16,sd".split(","):
-            self.check_function(getattr(self.module, name))
+    @pytest.mark.parametrize('name', 't0,t8,t16,td,s0,s8,s16,sd'.split(','))
+    def test_all(self, name):
+        self.check_function(getattr(self.module, name))
 
 
 class TestF90ReturnComplex(TestReturnComplex):
@@ -164,6 +164,6 @@ end module f90_return_complex
     """
 
     @pytest.mark.slow
-    def test_all(self):
-        for name in "t0,t8,t16,td,s0,s8,s16,sd".split(","):
-            self.check_function(getattr(self.module.f90_return_complex, name))
+    @pytest.mark.parametrize('name', 't0,t8,t16,td,s0,s8,s16,sd'.split(','))
+    def test_all(self, name):
+        self.check_function(getattr(self.module.f90_return_complex, name))
diff --git a/numpy/f2py/tests/test_return_integer.py b/numpy/f2py/tests/test_return_integer.py
index 7a4b07c4f..22f4acfdf 100644
--- a/numpy/f2py/tests/test_return_integer.py
+++ b/numpy/f2py/tests/test_return_integer.py
@@ -104,9 +104,10 @@ cf2py    intent(out) t8
     """
 
     @pytest.mark.slow
-    def test_all(self):
-        for name in "t0,t1,t2,t4,t8,s0,s1,s2,s4,s8".split(","):
-            self.check_function(getattr(self.module, name))
+    @pytest.mark.parametrize('name',
+                             't0,t1,t2,t4,t8,s0,s1,s2,s4,s8'.split(','))
+    def test_all(self, name):
+        self.check_function(getattr(self.module, name))
 
 
 class TestF90ReturnInteger(TestReturnInteger):
@@ -174,6 +175,7 @@ end module f90_return_integer
     """
 
     @pytest.mark.slow
-    def test_all(self):
-        for name in "t0,t1,t2,t4,t8,s0,s1,s2,s4,s8".split(","):
-            self.check_function(getattr(self.module.f90_return_integer, name))
+    @pytest.mark.parametrize('name',
+                             't0,t1,t2,t4,t8,s0,s1,s2,s4,s8'.split(','))
+    def test_all(self, name):
+        self.check_function(getattr(self.module.f90_return_integer, name))
diff --git a/numpy/f2py/tests/test_return_logical.py b/numpy/f2py/tests/test_return_logical.py
index 403f4e205..96f215a91 100644
--- a/numpy/f2py/tests/test_return_logical.py
+++ b/numpy/f2py/tests/test_return_logical.py
@@ -113,9 +113,9 @@ c       end
     """
 
     @pytest.mark.slow
-    def test_all(self):
-        for name in "t0,t1,t2,t4,s0,s1,s2,s4".split(","):
-            self.check_function(getattr(self.module, name))
+    @pytest.mark.parametrize('name', 't0,t1,t2,t4,s0,s1,s2,s4'.split(','))
+    def test_all(self, name):
+        self.check_function(getattr(self.module, name))
 
 
 class TestF90ReturnLogical(TestReturnLogical):
@@ -183,6 +183,7 @@ end module f90_return_logical
     """
 
     @pytest.mark.slow
-    def test_all(self):
-        for name in "t0,t1,t2,t4,t8,s0,s1,s2,s4,s8".split(","):
-            self.check_function(getattr(self.module.f90_return_logical, name))
+    @pytest.mark.parametrize('name',
+                             't0,t1,t2,t4,t8,s0,s1,s2,s4,s8'.split(','))
+    def test_all(self, name):
+        self.check_function(getattr(self.module.f90_return_logical, name))
diff --git a/numpy/f2py/tests/test_return_real.py b/numpy/f2py/tests/test_return_real.py
index fcb13e1e0..315cfe49b 100644
--- a/numpy/f2py/tests/test_return_real.py
+++ b/numpy/f2py/tests/test_return_real.py
@@ -1,5 +1,6 @@
 from __future__ import division, absolute_import, print_function
 
+import platform
 import pytest
 
 from numpy import array
@@ -52,6 +53,11 @@ class TestReturnReal(util.F2PyTest):
             pass
 
 
+
+@pytest.mark.skipif(
+    platform.system() == 'Darwin',
+    reason="Prone to error when run with numpy/f2py/tests on mac os, "
+           "but not when run in isolation")
 class TestCReturnReal(TestReturnReal):
     suffix = ".pyf"
     module_name = "c_ext_return_real"
@@ -85,9 +91,9 @@ end python module c_ext_return_real
     """
 
     @pytest.mark.slow
-    def test_all(self):
-        for name in "t4,t8,s4,s8".split(","):
-            self.check_function(getattr(self.module, name))
+    @pytest.mark.parametrize('name', 't4,t8,s4,s8'.split(','))
+    def test_all(self, name):
+        self.check_function(getattr(self.module, name))
 
 
 class TestF77ReturnReal(TestReturnReal):
@@ -140,9 +146,9 @@ cf2py    intent(out) td
     """
 
     @pytest.mark.slow
-    def test_all(self):
-        for name in "t0,t4,t8,td,s0,s4,s8,sd".split(","):
-            self.check_function(getattr(self.module, name))
+    @pytest.mark.parametrize('name', 't0,t4,t8,td,s0,s4,s8,sd'.split(','))
+    def test_all(self, name):
+        self.check_function(getattr(self.module, name))
 
 
 class TestF90ReturnReal(TestReturnReal):
@@ -199,6 +205,6 @@ end module f90_return_real
     """
 
     @pytest.mark.slow
-    def test_all(self):
-        for name in "t0,t4,t8,td,s0,s4,s8,sd".split(","):
-            self.check_function(getattr(self.module.f90_return_real, name))
+    @pytest.mark.parametrize('name', 't0,t4,t8,td,s0,s4,s8,sd'.split(','))
+    def test_all(self, name):
+        self.check_function(getattr(self.module.f90_return_real, name))
diff --git a/numpy/f2py/tests/test_semicolon_split.py b/numpy/f2py/tests/test_semicolon_split.py
index 2b0f32727..bcd18c893 100644
--- a/numpy/f2py/tests/test_semicolon_split.py
+++ b/numpy/f2py/tests/test_semicolon_split.py
@@ -1,8 +1,15 @@
 from __future__ import division, absolute_import, print_function
 
+import platform
+import pytest
+
 from . import util
 from numpy.testing import assert_equal
 
+@pytest.mark.skipif(
+    platform.system() == 'Darwin',
+    reason="Prone to error when run with numpy/f2py/tests on mac os, "
+           "but not when run in isolation")
 class TestMultiline(util.F2PyTest):
     suffix = ".pyf"
     module_name = "multiline"
@@ -26,6 +33,11 @@ end python module {module}
     def test_multiline(self):
         assert_equal(self.module.foo(), 42)
 
+
+@pytest.mark.skipif(
+    platform.system() == 'Darwin',
+    reason="Prone to error when run with numpy/f2py/tests on mac os, "
+           "but not when run in isolation")
 class TestCallstatement(util.F2PyTest):
     suffix = ".pyf"
     module_name = "callstatement"
diff --git a/numpy/f2py/tests/util.py b/numpy/f2py/tests/util.py
index 466fd4970..73fc27b96 100644
--- a/numpy/f2py/tests/util.py
+++ b/numpy/f2py/tests/util.py
@@ -20,7 +20,7 @@ import pytest
 import numpy.f2py
 
 from numpy.compat import asbytes, asstr
-from numpy.testing import SkipTest, temppath
+from numpy.testing import temppath
 from importlib import import_module
 
 try:
@@ -322,14 +322,14 @@ class F2PyTest(object):
 
     def setup(self):
         if sys.platform == 'win32':
-            raise SkipTest('Fails with MinGW64 Gfortran (Issue #9673)')
+            pytest.skip('Fails with MinGW64 Gfortran (Issue #9673)')
 
         if self.module is not None:
             return
 
         # Check compiler availability first
         if not has_c_compiler():
-            raise SkipTest("No C compiler available")
+            pytest.skip("No C compiler available")
 
         codes = []
         if self.sources:
@@ -345,9 +345,9 @@ class F2PyTest(object):
             elif fn.endswith('.f90'):
                 needs_f90 = True
         if needs_f77 and not has_f77_compiler():
-            raise SkipTest("No Fortran 77 compiler available")
+            pytest.skip("No Fortran 77 compiler available")
         if needs_f90 and not has_f90_compiler():
-            raise SkipTest("No Fortran 90 compiler available")
+            pytest.skip("No Fortran 90 compiler available")
 
         # Build the module
         if self.code is not None:
diff --git a/numpy/fft/fftpack.py b/numpy/fft/fftpack.py
index e17e1cb34..d88990373 100644
--- a/numpy/fft/fftpack.py
+++ b/numpy/fft/fftpack.py
@@ -37,6 +37,8 @@ __all__ = ['fft', 'ifft', 'rfft', 'irfft', 'hfft', 'ihfft', 'rfftn',
 
 from numpy.core import (array, asarray, zeros, swapaxes, shape, conjugate,
                         take, sqrt)
+from numpy.core.multiarray import normalize_axis_index
+from numpy.core.overrides import array_function_dispatch
 from . import fftpack_lite as fftpack
 from .helper import _FFTCache
 
@@ -47,6 +49,7 @@ _real_fft_cache = _FFTCache(max_size_in_mb=100, max_item_count=32)
 def _raw_fft(a, n=None, axis=-1, init_function=fftpack.cffti,
              work_function=fftpack.cfftf, fft_cache=_fft_cache):
     a = asarray(a)
+    axis = normalize_axis_index(axis, a.ndim)
 
     if n is None:
         n = a.shape[axis]
@@ -78,10 +81,10 @@ def _raw_fft(a, n=None, axis=-1, init_function=fftpack.cffti,
             z[tuple(index)] = a
             a = z
 
-    if axis != -1:
+    if axis != a.ndim - 1:
         a = swapaxes(a, axis, -1)
     r = work_function(a, wsave)
-    if axis != -1:
+    if axis != a.ndim - 1:
         r = swapaxes(r, axis, -1)
 
     # As soon as we put wsave back into the cache, another thread could pick it
@@ -99,6 +102,11 @@ def _unitary(norm):
     return norm is not None
 
 
+def _fft_dispatcher(a, n=None, axis=None, norm=None):
+    return (a,)
+
+
+@array_function_dispatch(_fft_dispatcher)
 def fft(a, n=None, axis=-1, norm=None):
     """
     Compute the one-dimensional discrete Fourier Transform.
@@ -195,6 +203,7 @@ def fft(a, n=None, axis=-1, norm=None):
     return output
 
 
+@array_function_dispatch(_fft_dispatcher)
 def ifft(a, n=None, axis=-1, norm=None):
     """
     Compute the one-dimensional inverse discrete Fourier Transform.
@@ -288,6 +297,8 @@ def ifft(a, n=None, axis=-1, norm=None):
     return output * (1 / (sqrt(n) if unitary else n))
 
 
+
+@array_function_dispatch(_fft_dispatcher)
 def rfft(a, n=None, axis=-1, norm=None):
     """
     Compute the one-dimensional discrete Fourier Transform for real input.
@@ -377,6 +388,7 @@ def rfft(a, n=None, axis=-1, norm=None):
     return output
 
 
+@array_function_dispatch(_fft_dispatcher)
 def irfft(a, n=None, axis=-1, norm=None):
     """
     Compute the inverse of the n-point DFT for real input.
@@ -467,6 +479,7 @@ def irfft(a, n=None, axis=-1, norm=None):
     return output * (1 / (sqrt(n) if unitary else n))
 
 
+@array_function_dispatch(_fft_dispatcher)
 def hfft(a, n=None, axis=-1, norm=None):
     """
     Compute the FFT of a signal that has Hermitian symmetry, i.e., a real
@@ -549,6 +562,7 @@ def hfft(a, n=None, axis=-1, norm=None):
     return irfft(conjugate(a), n, axis) * (sqrt(n) if unitary else n)
 
 
+@array_function_dispatch(_fft_dispatcher)
 def ihfft(a, n=None, axis=-1, norm=None):
     """
     Compute the inverse FFT of a signal that has Hermitian symmetry.
@@ -639,6 +653,11 @@ def _raw_fftnd(a, s=None, axes=None, function=fft, norm=None):
     return a
 
 
+def _fftn_dispatcher(a, s=None, axes=None, norm=None):
+    return (a,)
+
+
+@array_function_dispatch(_fftn_dispatcher)
 def fftn(a, s=None, axes=None, norm=None):
     """
     Compute the N-dimensional discrete Fourier Transform.
@@ -736,6 +755,7 @@ def fftn(a, s=None, axes=None, norm=None):
     return _raw_fftnd(a, s, axes, fft, norm)
 
 
+@array_function_dispatch(_fftn_dispatcher)
 def ifftn(a, s=None, axes=None, norm=None):
     """
     Compute the N-dimensional inverse discrete Fourier Transform.
@@ -833,6 +853,7 @@ def ifftn(a, s=None, axes=None, norm=None):
     return _raw_fftnd(a, s, axes, ifft, norm)
 
 
+@array_function_dispatch(_fftn_dispatcher)
 def fft2(a, s=None, axes=(-2, -1), norm=None):
     """
     Compute the 2-dimensional discrete Fourier Transform
@@ -923,6 +944,7 @@ def fft2(a, s=None, axes=(-2, -1), norm=None):
     return _raw_fftnd(a, s, axes, fft, norm)
 
 
+@array_function_dispatch(_fftn_dispatcher)
 def ifft2(a, s=None, axes=(-2, -1), norm=None):
     """
     Compute the 2-dimensional inverse discrete Fourier Transform.
@@ -1010,6 +1032,7 @@ def ifft2(a, s=None, axes=(-2, -1), norm=None):
     return _raw_fftnd(a, s, axes, ifft, norm)
 
 
+@array_function_dispatch(_fftn_dispatcher)
 def rfftn(a, s=None, axes=None, norm=None):
     """
     Compute the N-dimensional discrete Fourier Transform for real input.
@@ -1102,6 +1125,7 @@ def rfftn(a, s=None, axes=None, norm=None):
     return a
 
 
+@array_function_dispatch(_fftn_dispatcher)
 def rfft2(a, s=None, axes=(-2, -1), norm=None):
     """
     Compute the 2-dimensional FFT of a real array.
@@ -1139,6 +1163,7 @@ def rfft2(a, s=None, axes=(-2, -1), norm=None):
     return rfftn(a, s, axes, norm)
 
 
+@array_function_dispatch(_fftn_dispatcher)
 def irfftn(a, s=None, axes=None, norm=None):
     """
     Compute the inverse of the N-dimensional FFT of real input.
@@ -1233,6 +1258,7 @@ def irfftn(a, s=None, axes=None, norm=None):
     return a
 
 
+@array_function_dispatch(_fftn_dispatcher)
 def irfft2(a, s=None, axes=(-2, -1), norm=None):
     """
     Compute the 2-dimensional inverse FFT of a real array.
diff --git a/numpy/fft/helper.py b/numpy/fft/helper.py
index 729121f31..4b698bb4d 100644
--- a/numpy/fft/helper.py
+++ b/numpy/fft/helper.py
@@ -11,6 +11,7 @@ except ImportError:
     import dummy_threading as threading
 from numpy.compat import integer_types
 from numpy.core import integer, empty, arange, asarray, roll
+from numpy.core.overrides import array_function_dispatch
 
 # Created by Pearu Peterson, September 2002
 
@@ -19,6 +20,11 @@ __all__ = ['fftshift', 'ifftshift', 'fftfreq', 'rfftfreq']
 integer_types = integer_types + (integer,)
 
 
+def _fftshift_dispatcher(x, axes=None):
+    return (x,)
+
+
+@array_function_dispatch(_fftshift_dispatcher)
 def fftshift(x, axes=None):
     """
     Shift the zero-frequency component to the center of the spectrum.
@@ -75,6 +81,7 @@ def fftshift(x, axes=None):
     return roll(x, shift, axes)
 
 
+@array_function_dispatch(_fftshift_dispatcher)
 def ifftshift(x, axes=None):
     """
     The inverse of `fftshift`. Although identical for even-length `x`, the
diff --git a/numpy/lib/arraypad.py b/numpy/lib/arraypad.py
index e9ca9de4d..f76ad456f 100644
--- a/numpy/lib/arraypad.py
+++ b/numpy/lib/arraypad.py
@@ -6,6 +6,7 @@ of an n-dimensional array.
 from __future__ import division, absolute_import, print_function
 
 import numpy as np
+from numpy.core.overrides import array_function_dispatch
 
 
 __all__ = ['pad']
@@ -990,6 +991,11 @@ def _validate_lengths(narray, number_elements):
 # Public functions
 
 
+def _pad_dispatcher(array, pad_width, mode, **kwargs):
+    return (array,)
+
+
+@array_function_dispatch(_pad_dispatcher)
 def pad(array, pad_width, mode, **kwargs):
     """
     Pads an array.
diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index 62e9b6d50..ec62cd7a6 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -28,6 +28,7 @@ To do: Optionally return indices analogously to unique for all functions.
 from __future__ import division, absolute_import, print_function
 
 import numpy as np
+from numpy.core.overrides import array_function_dispatch
 
 
 __all__ = [
@@ -36,6 +37,11 @@ __all__ = [
     ]
 
 
+def _ediff1d_dispatcher(ary, to_end=None, to_begin=None):
+    return (ary, to_end, to_begin)
+
+
+@array_function_dispatch(_ediff1d_dispatcher)
 def ediff1d(ary, to_end=None, to_begin=None):
     """
     The differences between consecutive elements of an array.
@@ -133,6 +139,12 @@ def _unpack_tuple(x):
         return x
 
 
+def _unique_dispatcher(ar, return_index=None, return_inverse=None,
+                       return_counts=None, axis=None):
+    return (ar,)
+
+
+@array_function_dispatch(_unique_dispatcher)
 def unique(ar, return_index=False, return_inverse=False,
            return_counts=False, axis=None):
     """
@@ -313,6 +325,12 @@ def _unique1d(ar, return_index=False, return_inverse=False,
     return ret
 
 
+def _intersect1d_dispatcher(
+        ar1, ar2, assume_unique=None, return_indices=None):
+    return (ar1, ar2)
+
+
+@array_function_dispatch(_intersect1d_dispatcher)
 def intersect1d(ar1, ar2, assume_unique=False, return_indices=False):
     """
     Find the intersection of two arrays.
@@ -408,6 +426,11 @@ def intersect1d(ar1, ar2, assume_unique=False, return_indices=False):
         return int1d
 
 
+def _setxor1d_dispatcher(ar1, ar2, assume_unique=None):
+    return (ar1, ar2)
+
+
+@array_function_dispatch(_setxor1d_dispatcher)
 def setxor1d(ar1, ar2, assume_unique=False):
     """
     Find the set exclusive-or of two arrays.
@@ -562,6 +585,11 @@ def in1d(ar1, ar2, assume_unique=False, invert=False):
         return ret[rev_idx]
 
 
+def _isin_dispatcher(element, test_elements, assume_unique=None, invert=None):
+    return (element, test_elements)
+
+
+@array_function_dispatch(_isin_dispatcher)
 def isin(element, test_elements, assume_unique=False, invert=False):
     """
     Calculates `element in test_elements`, broadcasting over `element` only.
@@ -660,6 +688,11 @@ def isin(element, test_elements, assume_unique=False, invert=False):
                 invert=invert).reshape(element.shape)
 
 
+def _union1d_dispatcher(ar1, ar2):
+    return (ar1, ar2)
+
+
+@array_function_dispatch(_union1d_dispatcher)
 def union1d(ar1, ar2):
     """
     Find the union of two arrays.
@@ -695,11 +728,17 @@ def union1d(ar1, ar2):
     """
     return unique(np.concatenate((ar1, ar2), axis=None))
 
+
+def _setdiff1d_dispatcher(ar1, ar2, assume_unique=None):
+    return (ar1, ar2)
+
+
+@array_function_dispatch(_setdiff1d_dispatcher)
 def setdiff1d(ar1, ar2, assume_unique=False):
     """
     Find the set difference of two arrays.
 
-    Return the sorted, unique values in `ar1` that are not in `ar2`.
+    Return the unique values in `ar1` that are not in `ar2`.
 
     Parameters
     ----------
@@ -714,7 +753,9 @@ def setdiff1d(ar1, ar2, assume_unique=False):
     Returns
     -------
     setdiff1d : ndarray
-        Sorted 1D array of values in `ar1` that are not in `ar2`.
+        1D array of values in `ar1` that are not in `ar2`. The result
+        is sorted when `assume_unique=False`, but otherwise only sorted
+        if the input is sorted.
 
     See Also
     --------
diff --git a/numpy/lib/financial.py b/numpy/lib/financial.py
index 06fa1bd92..d1a0cd9c0 100644
--- a/numpy/lib/financial.py
+++ b/numpy/lib/financial.py
@@ -15,6 +15,8 @@ from __future__ import division, absolute_import, print_function
 from decimal import Decimal
 
 import numpy as np
+from numpy.core.overrides import array_function_dispatch
+
 
 __all__ = ['fv', 'pmt', 'nper', 'ipmt', 'ppmt', 'pv', 'rate',
            'irr', 'npv', 'mirr']
@@ -36,6 +38,12 @@ def _convert_when(when):
     except (KeyError, TypeError):
         return [_when_to_num[x] for x in when]
 
+
+def _fv_dispatcher(rate, nper, pmt, pv, when=None):
+    return (rate, nper, pmt, pv)
+
+
+@array_function_dispatch(_fv_dispatcher)
 def fv(rate, nper, pmt, pv, when='end'):
     """
     Compute the future value.
@@ -124,6 +132,12 @@ def fv(rate, nper, pmt, pv, when='end'):
                     (1 + rate*when)*(temp - 1)/rate)
     return -(pv*temp + pmt*fact)
 
+
+def _pmt_dispatcher(rate, nper, pv, fv=None, when=None):
+    return (rate, nper, pv, fv)
+
+
+@array_function_dispatch(_pmt_dispatcher)
 def pmt(rate, nper, pv, fv=0, when='end'):
     """
     Compute the payment against loan principal plus interest.
@@ -216,6 +230,12 @@ def pmt(rate, nper, pv, fv=0, when='end'):
                     (1 + masked_rate*when)*(temp - 1)/masked_rate)
     return -(fv + pv*temp) / fact
 
+
+def _nper_dispatcher(rate, pmt, pv, fv=None, when=None):
+    return (rate, pmt, pv, fv)
+
+
+@array_function_dispatch(_nper_dispatcher)
 def nper(rate, pmt, pv, fv=0, when='end'):
     """
     Compute the number of periodic payments.
@@ -284,6 +304,12 @@ def nper(rate, pmt, pv, fv=0, when='end'):
         B = np.log((-fv+z) / (pv+z))/np.log(1+rate)
         return np.where(rate == 0, A, B)
 
+
+def _ipmt_dispatcher(rate, per, nper, pv, fv=None, when=None):
+    return (rate, per, nper, pv, fv)
+
+
+@array_function_dispatch(_ipmt_dispatcher)
 def ipmt(rate, per, nper, pv, fv=0, when='end'):
     """
     Compute the interest portion of a payment.
@@ -379,6 +405,7 @@ def ipmt(rate, per, nper, pv, fv=0, when='end'):
         pass
     return ipmt
 
+
 def _rbl(rate, per, pmt, pv, when):
     """
     This function is here to simply have a different name for the 'fv'
@@ -388,6 +415,12 @@ def _rbl(rate, per, pmt, pv, when):
     """
     return fv(rate, (per - 1), pmt, pv, when)
 
+
+def _ppmt_dispatcher(rate, per, nper, pv, fv=None, when=None):
+    return (rate, per, nper, pv, fv)
+
+
+@array_function_dispatch(_ppmt_dispatcher)
 def ppmt(rate, per, nper, pv, fv=0, when='end'):
     """
     Compute the payment against loan principal.
@@ -416,6 +449,12 @@ def ppmt(rate, per, nper, pv, fv=0, when='end'):
     total = pmt(rate, nper, pv, fv, when)
     return total - ipmt(rate, per, nper, pv, fv, when)
 
+
+def _pv_dispatcher(rate, nper, pmt, fv=None, when=None):
+    return (rate, nper, nper, pv, fv)
+
+
+@array_function_dispatch(_pv_dispatcher)
 def pv(rate, nper, pmt, fv=0, when='end'):
     """
     Compute the present value.
@@ -520,6 +559,12 @@ def _g_div_gp(r, n, p, x, y, w):
                 (n*t2*x - p*(t1 - 1)*(r*w + 1)/(r**2) + n*p*t2*(r*w + 1)/r +
                  p*(t1 - 1)*w/r))
 
+
+def _rate_dispatcher(nper, pmt, pv, fv, when=None, guess=None, tol=None,
+                     maxiter=None):
+    return (nper, pmt, pv, fv)
+
+
 # Use Newton's iteration until the change is less than 1e-6
 #  for all values or a maximum of 100 iterations is reached.
 #  Newton's rule is
@@ -527,6 +572,7 @@ def _g_div_gp(r, n, p, x, y, w):
 #     where
 #  g(r) is the formula
 #  g'(r) is the derivative with respect to r.
+@array_function_dispatch(_rate_dispatcher)
 def rate(nper, pmt, pv, fv, when='end', guess=None, tol=None, maxiter=100):
     """
     Compute the rate of interest per period.
@@ -598,6 +644,12 @@ def rate(nper, pmt, pv, fv, when='end', guess=None, tol=None, maxiter=100):
     else:
         return rn
 
+
+def _irr_dispatcher(values):
+    return (values,)
+
+
+@array_function_dispatch(_irr_dispatcher)
 def irr(values):
     """
     Return the Internal Rate of Return (IRR).
@@ -677,6 +729,12 @@ def irr(values):
     rate = rate.item(np.argmin(np.abs(rate)))
     return rate
 
+
+def _npv_dispatcher(rate, values):
+    return (values,)
+
+
+@array_function_dispatch(_npv_dispatcher)
 def npv(rate, values):
     """
     Returns the NPV (Net Present Value) of a cash flow series.
@@ -722,6 +780,12 @@ def npv(rate, values):
     values = np.asarray(values)
     return (values / (1+rate)**np.arange(0, len(values))).sum(axis=0)
 
+
+def _mirr_dispatcher(values, finance_rate, reinvest_rate):
+    return (values,)
+
+
+@array_function_dispatch(_mirr_dispatcher)
 def mirr(values, finance_rate, reinvest_rate):
     """
     Modified internal rate of return.
diff --git a/numpy/lib/format.py b/numpy/lib/format.py
index ef5ec57e3..e25868236 100644
--- a/numpy/lib/format.py
+++ b/numpy/lib/format.py
@@ -162,11 +162,8 @@ import io
 import warnings
 from numpy.lib.utils import safe_eval
 from numpy.compat import asbytes, asstr, isfileobj, long, basestring
+from numpy.core.numeric import pickle
 
-if sys.version_info[0] >= 3:
-    import pickle
-else:
-    import cPickle as pickle
 
 MAGIC_PREFIX = b'\x93NUMPY'
 MAGIC_LEN = len(MAGIC_PREFIX) + 2
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 2992e92bb..c52ecdbd8 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -9,24 +9,24 @@ except ImportError:
 import re
 import sys
 import warnings
-import operator
 
 import numpy as np
 import numpy.core.numeric as _nx
-from numpy.core import linspace, atleast_1d, atleast_2d, transpose
+from numpy.core import atleast_1d, transpose
 from numpy.core.numeric import (
     ones, zeros, arange, concatenate, array, asarray, asanyarray, empty,
     empty_like, ndarray, around, floor, ceil, take, dot, where, intp,
-    integer, isscalar, absolute, AxisError
+    integer, isscalar, absolute
     )
 from numpy.core.umath import (
-    pi, multiply, add, arctan2, frompyfunc, cos, less_equal, sqrt, sin,
-    mod, exp, log10, not_equal, subtract
+    pi, add, arctan2, frompyfunc, cos, less_equal, sqrt, sin,
+    mod, exp, not_equal, subtract
     )
 from numpy.core.fromnumeric import (
-    ravel, nonzero, sort, partition, mean, any, sum
+    ravel, nonzero, partition, mean, any, sum
     )
-from numpy.core.numerictypes import typecodes, number
+from numpy.core.numerictypes import typecodes
+from numpy.core.overrides import array_function_dispatch
 from numpy.core.function_base import add_newdoc
 from numpy.lib.twodim_base import diag
 from .utils import deprecate
@@ -36,7 +36,6 @@ from numpy.core.multiarray import (
     )
 from numpy.core.umath import _add_newdoc_ufunc as add_newdoc_ufunc
 from numpy.compat import long
-from numpy.compat.py3k import basestring
 
 if sys.version_info[0] < 3:
     # Force range to be a generator, for np.delete's usage.
@@ -60,6 +59,11 @@ __all__ = [
     ]
 
 
+def _rot90_dispatcher(m, k=None, axes=None):
+    return (m,)
+
+
+@array_function_dispatch(_rot90_dispatcher)
 def rot90(m, k=1, axes=(0,1)):
     """
     Rotate an array by 90 degrees in the plane specified by axes.
@@ -146,6 +150,11 @@ def rot90(m, k=1, axes=(0,1)):
         return flip(transpose(m, axes_list), axes[1])
 
 
+def _flip_dispatcher(m, axis=None):
+    return (m,)
+
+
+@array_function_dispatch(_flip_dispatcher)
 def flip(m, axis=None):
     """
     Reverse the order of elements in an array along the given axis.
@@ -270,6 +279,11 @@ def iterable(y):
     return True
 
 
+def _average_dispatcher(a, axis=None, weights=None, returned=None):
+    return (a, weights)
+
+
+@array_function_dispatch(_average_dispatcher)
 def average(a, axis=None, weights=None, returned=False):
     """
     Compute the weighted average along the specified axis.
@@ -476,6 +490,15 @@ def asarray_chkfinite(a, dtype=None, order=None):
     return a
 
 
+def _piecewise_dispatcher(x, condlist, funclist, *args, **kw):
+    yield x
+    # support the undocumented behavior of allowing scalars
+    if np.iterable(condlist):
+        for c in condlist:
+            yield c
+
+
+@array_function_dispatch(_piecewise_dispatcher)
 def piecewise(x, condlist, funclist, *args, **kw):
     """
     Evaluate a piecewise-defined function.
@@ -597,6 +620,14 @@ def piecewise(x, condlist, funclist, *args, **kw):
     return y
 
 
+def _select_dispatcher(condlist, choicelist, default=None):
+    for c in condlist:
+        yield c
+    for c in choicelist:
+        yield c
+
+
+@array_function_dispatch(_select_dispatcher)
 def select(condlist, choicelist, default=0):
     """
     Return an array drawn from elements in choicelist, depending on conditions.
@@ -700,6 +731,11 @@ def select(condlist, choicelist, default=0):
     return result
 
 
+def _copy_dispatcher(a, order=None):
+    return (a,)
+
+
+@array_function_dispatch(_copy_dispatcher)
 def copy(a, order='K'):
     """
     Return an array copy of the given object.
@@ -749,6 +785,13 @@ def copy(a, order='K'):
 # Basic operations
 
 
+def _gradient_dispatcher(f, *varargs, **kwargs):
+    yield f
+    for v in varargs:
+        yield v
+
+
+@array_function_dispatch(_gradient_dispatcher)
 def gradient(f, *varargs, **kwargs):
     """
     Return the gradient of an N-dimensional array.
@@ -1090,7 +1133,12 @@ def gradient(f, *varargs, **kwargs):
         return outvals
 
 
-def diff(a, n=1, axis=-1):
+def _diff_dispatcher(a, n=None, axis=None, prepend=None, append=None):
+    return (a, prepend, append)
+
+
+@array_function_dispatch(_diff_dispatcher)
+def diff(a, n=1, axis=-1, prepend=np._NoValue, append=np._NoValue):
     """
     Calculate the n-th discrete difference along the given axis.
 
@@ -1108,6 +1156,12 @@ def diff(a, n=1, axis=-1):
     axis : int, optional
         The axis along which the difference is taken, default is the
         last axis.
+    prepend, append : array_like, optional
+        Values to prepend or append to "a" along axis prior to
+        performing the difference.  Scalar values are expanded to
+        arrays with length 1 in the direction of axis and the shape
+        of the input array in along all other axes.  Otherwise the
+        dimension and shape must match "a" except along axis.
 
     Returns
     -------
@@ -1176,6 +1230,28 @@ def diff(a, n=1, axis=-1):
     nd = a.ndim
     axis = normalize_axis_index(axis, nd)
 
+    combined = []
+    if prepend is not np._NoValue:
+        prepend = np.asanyarray(prepend)
+        if prepend.ndim == 0:
+            shape = list(a.shape)
+            shape[axis] = 1
+            prepend = np.broadcast_to(prepend, tuple(shape))
+        combined.append(prepend)
+
+    combined.append(a)
+
+    if append is not np._NoValue:
+        append = np.asanyarray(append)
+        if append.ndim == 0:
+            shape = list(a.shape)
+            shape[axis] = 1
+            append = np.broadcast_to(append, tuple(shape))
+        combined.append(append)
+
+    if len(combined) > 1:
+        a = np.concatenate(combined, axis)
+
     slice1 = [slice(None)] * nd
     slice2 = [slice(None)] * nd
     slice1[axis] = slice(1, None)
@@ -1190,6 +1266,11 @@ def diff(a, n=1, axis=-1):
     return a
 
 
+def _interp_dispatcher(x, xp, fp, left=None, right=None, period=None):
+    return (x, xp, fp)
+
+
+@array_function_dispatch(_interp_dispatcher)
 def interp(x, xp, fp, left=None, right=None, period=None):
     """
     One-dimensional linear interpolation.
@@ -1322,6 +1403,11 @@ def interp(x, xp, fp, left=None, right=None, period=None):
     return interp_func(x, xp, fp, left, right)
 
 
+def _angle_dispatcher(z, deg=None):
+    return (z,)
+
+
+@array_function_dispatch(_angle_dispatcher)
 def angle(z, deg=False):
     """
     Return the angle of the complex argument.
@@ -1369,6 +1455,11 @@ def angle(z, deg=False):
     return a
 
 
+def _unwrap_dispatcher(p, discont=None, axis=None):
+    return (p,)
+
+
+@array_function_dispatch(_unwrap_dispatcher)
 def unwrap(p, discont=pi, axis=-1):
     """
     Unwrap by changing deltas between values to 2*pi complement.
@@ -1425,6 +1516,11 @@ def unwrap(p, discont=pi, axis=-1):
     return up
 
 
+def _sort_complex(a):
+    return (a,)
+
+
+@array_function_dispatch(_sort_complex)
 def sort_complex(a):
     """
     Sort a complex array using the real part first, then the imaginary part.
@@ -1461,6 +1557,11 @@ def sort_complex(a):
         return b
 
 
+def _trim_zeros(filt, trim=None):
+    return (filt,)
+
+
+@array_function_dispatch(_trim_zeros)
 def trim_zeros(filt, trim='fb'):
     """
     Trim the leading and/or trailing zeros from a 1-D array or sequence.
@@ -1530,6 +1631,11 @@ def unique(x):
         return asarray(items)
 
 
+def _extract_dispatcher(condition, arr):
+    return (condition, arr)
+
+
+@array_function_dispatch(_extract_dispatcher)
 def extract(condition, arr):
     """
     Return the elements of an array that satisfy some condition.
@@ -1581,6 +1687,11 @@ def extract(condition, arr):
     return _nx.take(ravel(arr), nonzero(ravel(condition))[0])
 
 
+def _place_dispatcher(arr, mask, vals):
+    return (arr, mask, vals)
+
+
+@array_function_dispatch(_place_dispatcher)
 def place(arr, mask, vals):
     """
     Change elements of an array based on conditional and input values.
@@ -2135,6 +2246,12 @@ class vectorize(object):
         return outputs[0] if nout == 1 else outputs
 
 
+def _cov_dispatcher(m, y=None, rowvar=None, bias=None, ddof=None,
+                    fweights=None, aweights=None):
+    return (m, y, fweights, aweights)
+
+
+@array_function_dispatch(_cov_dispatcher)
 def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None,
         aweights=None):
     """
@@ -2344,6 +2461,11 @@ def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None,
     return c.squeeze()
 
 
+def _corrcoef_dispatcher(x, y=None, rowvar=None, bias=None, ddof=None):
+    return (x, y)
+
+
+@array_function_dispatch(_corrcoef_dispatcher)
 def corrcoef(x, y=None, rowvar=True, bias=np._NoValue, ddof=np._NoValue):
     """
     Return Pearson product-moment correlation coefficients.
@@ -2912,6 +3034,11 @@ def _i0_2(x):
     return exp(x) * _chbevl(32.0/x - 2.0, _i0B) / sqrt(x)
 
 
+def _i0_dispatcher(x):
+    return (x,)
+
+
+@array_function_dispatch(_i0_dispatcher)
 def i0(x):
     """
     Modified Bessel function of the first kind, order 0.
@@ -3106,6 +3233,11 @@ def kaiser(M, beta):
     return i0(beta * sqrt(1-((n-alpha)/alpha)**2.0))/i0(float(beta))
 
 
+def _sinc_dispatcher(x):
+    return (x,)
+
+
+@array_function_dispatch(_sinc_dispatcher)
 def sinc(x):
     """
     Return the sinc function.
@@ -3185,6 +3317,11 @@ def sinc(x):
     return sin(y)/y
 
 
+def _msort_dispatcher(a):
+    return (a,)
+
+
+@array_function_dispatch(_msort_dispatcher)
 def msort(a):
     """
     Return a copy of an array sorted along the first axis.
@@ -3268,6 +3405,12 @@ def _ureduce(a, func, **kwargs):
     return r, keepdim
 
 
+def _median_dispatcher(
+        a, axis=None, out=None, overwrite_input=None, keepdims=None):
+    return (a, out)
+
+
+@array_function_dispatch(_median_dispatcher)
 def median(a, axis=None, out=None, overwrite_input=False, keepdims=False):
     """
     Compute the median along the specified axis.
@@ -3412,6 +3555,12 @@ def _median(a, axis=None, out=None, overwrite_input=False):
         return mean(part[indexer], axis=axis, out=out)
 
 
+def _percentile_dispatcher(a, q, axis=None, out=None, overwrite_input=None,
+                           interpolation=None, keepdims=None):
+    return (a, q, out)
+
+
+@array_function_dispatch(_percentile_dispatcher)
 def percentile(a, q, axis=None, out=None,
                overwrite_input=False, interpolation='linear', keepdims=False):
     """
@@ -3557,6 +3706,12 @@ def percentile(a, q, axis=None, out=None,
         a, q, axis, out, overwrite_input, interpolation, keepdims)
 
 
+def _quantile_dispatcher(a, q, axis=None, out=None, overwrite_input=None,
+                         interpolation=None, keepdims=None):
+    return (a, q, out)
+
+
+@array_function_dispatch(_quantile_dispatcher)
 def quantile(a, q, axis=None, out=None,
              overwrite_input=False, interpolation='linear', keepdims=False):
     """
@@ -3819,6 +3974,11 @@ def _quantile_ureduce_func(a, q, axis=None, out=None, overwrite_input=False,
     return r
 
 
+def _trapz_dispatcher(y, x=None, dx=None, axis=None):
+    return (y, x)
+
+
+@array_function_dispatch(_trapz_dispatcher)
 def trapz(y, x=None, dx=1.0, axis=-1):
     """
     Integrate along the given axis using the composite trapezoidal rule.
@@ -3909,7 +4069,12 @@ def trapz(y, x=None, dx=1.0, axis=-1):
     return ret
 
 
+def _meshgrid_dispatcher(*xi, **kwargs):
+    return xi
+
+
 # Based on scitools meshgrid
+@array_function_dispatch(_meshgrid_dispatcher)
 def meshgrid(*xi, **kwargs):
     """
     Return coordinate matrices from coordinate vectors.
@@ -4047,6 +4212,11 @@ def meshgrid(*xi, **kwargs):
     return output
 
 
+def _delete_dispatcher(arr, obj, axis=None):
+    return (arr, obj)
+
+
+@array_function_dispatch(_delete_dispatcher)
 def delete(arr, obj, axis=None):
     """
     Return a new array with sub-arrays along an axis deleted. For a one
@@ -4252,6 +4422,11 @@ def delete(arr, obj, axis=None):
         return new
 
 
+def _insert_dispatcher(arr, obj, values, axis=None):
+    return (arr, obj, values)
+
+
+@array_function_dispatch(_insert_dispatcher)
 def insert(arr, obj, values, axis=None):
     """
     Insert values along the given axis before the given indices.
@@ -4458,6 +4633,11 @@ def insert(arr, obj, values, axis=None):
     return new
 
 
+def _append_dispatcher(arr, values, axis=None):
+    return (arr, values)
+
+
+@array_function_dispatch(_append_dispatcher)
 def append(arr, values, axis=None):
     """
     Append values to the end of an array.
@@ -4513,6 +4693,11 @@ def append(arr, values, axis=None):
     return concatenate((arr, values), axis=axis)
 
 
+def _digitize_dispatcher(x, bins, right=None):
+    return (x, bins)
+
+
+@array_function_dispatch(_digitize_dispatcher)
 def digitize(x, bins, right=False):
     """
     Return the indices of the bins to which each value in input array belongs.
diff --git a/numpy/lib/histograms.py b/numpy/lib/histograms.py
index f03f30fb0..1ff25b81f 100644
--- a/numpy/lib/histograms.py
+++ b/numpy/lib/histograms.py
@@ -8,6 +8,7 @@ import warnings
 
 import numpy as np
 from numpy.compat.py3k import basestring
+from numpy.core.overrides import array_function_dispatch
 
 __all__ = ['histogram', 'histogramdd', 'histogram_bin_edges']
 
@@ -220,6 +221,14 @@ _hist_bin_selectors = {'auto': _hist_bin_auto,
 def _ravel_and_check_weights(a, weights):
     """ Check a and weights have matching shapes, and ravel both """
     a = np.asarray(a)
+
+    # Ensure that the array is a "subtractable" dtype
+    if a.dtype == np.bool_:
+        warnings.warn("Converting input from {} to {} for compatibility."
+                      .format(a.dtype, np.uint8),
+                      RuntimeWarning, stacklevel=2)
+        a = a.astype(np.uint8)
+
     if weights is not None:
         weights = np.asarray(weights)
         if weights.shape != a.shape:
@@ -392,6 +401,11 @@ def _search_sorted_inclusive(a, v):
     ))
 
 
+def _histogram_bin_edges_dispatcher(a, bins=None, range=None, weights=None):
+    return (a, bins, weights)
+
+
+@array_function_dispatch(_histogram_bin_edges_dispatcher)
 def histogram_bin_edges(a, bins=10, range=None, weights=None):
     r"""
     Function to calculate only the edges of the bins used by the `histogram` function.
@@ -586,6 +600,12 @@ def histogram_bin_edges(a, bins=10, range=None, weights=None):
     return bin_edges
 
 
+def _histogram_dispatcher(
+        a, bins=None, range=None, normed=None, weights=None, density=None):
+    return (a, bins, weights)
+
+
+@array_function_dispatch(_histogram_dispatcher)
 def histogram(a, bins=10, range=None, normed=None, weights=None,
               density=None):
     r"""
@@ -838,6 +858,12 @@ def histogram(a, bins=10, range=None, normed=None, weights=None,
         return n, bin_edges
 
 
+def _histogramdd_dispatcher(sample, bins=None, range=None, normed=None,
+                            weights=None, density=None):
+    return (sample, bins, weights)
+
+
+@array_function_dispatch(_histogramdd_dispatcher)
 def histogramdd(sample, bins=10, range=None, normed=None, weights=None,
                 density=None):
     """
diff --git a/numpy/lib/index_tricks.py b/numpy/lib/index_tricks.py
index 009e6d229..26243d231 100644
--- a/numpy/lib/index_tricks.py
+++ b/numpy/lib/index_tricks.py
@@ -13,6 +13,7 @@ from . import function_base
 import numpy.matrixlib as matrixlib
 from .function_base import diff
 from numpy.core.multiarray import ravel_multi_index, unravel_index
+from numpy.core.overrides import array_function_dispatch
 from numpy.lib.stride_tricks import as_strided
 
 
@@ -23,6 +24,11 @@ __all__ = [
     ]
 
 
+def _ix__dispatcher(*args):
+    return args
+
+
+@array_function_dispatch(_ix__dispatcher)
 def ix_(*args):
     """
     Construct an open mesh from multiple sequences.
@@ -194,9 +200,6 @@ class nd_grid(object):
             else:
                 return _nx.arange(start, stop, step)
 
-    def __len__(self):
-        return 0
-
 
 class MGridClass(nd_grid):
     """
@@ -729,6 +732,12 @@ s_ = IndexExpression(maketuple=False)
 # The following functions complement those in twodim_base, but are
 # applicable to N-dimensions.
 
+
+def _fill_diagonal_dispatcher(a, val, wrap=None):
+    return (a,)
+
+
+@array_function_dispatch(_fill_diagonal_dispatcher)
 def fill_diagonal(a, val, wrap=False):
     """Fill the main diagonal of the given array of any dimensionality.
 
@@ -911,6 +920,11 @@ def diag_indices(n, ndim=2):
     return (idx,) * ndim
 
 
+def _diag_indices_from(arr):
+    return (arr,)
+
+
+@array_function_dispatch(_diag_indices_from)
 def diag_indices_from(arr):
     """
     Return the indices to access the main diagonal of an n-dimensional array.
diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py
index 8d6b0f139..279c4c5c4 100644
--- a/numpy/lib/nanfunctions.py
+++ b/numpy/lib/nanfunctions.py
@@ -25,6 +25,7 @@ from __future__ import division, absolute_import, print_function
 import warnings
 import numpy as np
 from numpy.lib import function_base
+from numpy.core.overrides import array_function_dispatch
 
 
 __all__ = [
@@ -188,6 +189,11 @@ def _divide_by_count(a, b, out=None):
                 return np.divide(a, b, out=out, casting='unsafe')
 
 
+def _nanmin_dispatcher(a, axis=None, out=None, keepdims=None):
+    return (a, out)
+
+
+@array_function_dispatch(_nanmin_dispatcher)
 def nanmin(a, axis=None, out=None, keepdims=np._NoValue):
     """
     Return minimum of an array or minimum along an axis, ignoring any NaNs.
@@ -296,6 +302,11 @@ def nanmin(a, axis=None, out=None, keepdims=np._NoValue):
     return res
 
 
+def _nanmax_dispatcher(a, axis=None, out=None, keepdims=None):
+    return (a, out)
+
+
+@array_function_dispatch(_nanmax_dispatcher)
 def nanmax(a, axis=None, out=None, keepdims=np._NoValue):
     """
     Return the maximum of an array or maximum along an axis, ignoring any
@@ -404,6 +415,11 @@ def nanmax(a, axis=None, out=None, keepdims=np._NoValue):
     return res
 
 
+def _nanargmin_dispatcher(a, axis=None):
+    return (a,)
+
+
+@array_function_dispatch(_nanargmin_dispatcher)
 def nanargmin(a, axis=None):
     """
     Return the indices of the minimum values in the specified axis ignoring
@@ -448,6 +464,11 @@ def nanargmin(a, axis=None):
     return res
 
 
+def _nanargmax_dispatcher(a, axis=None):
+    return (a,)
+
+
+@array_function_dispatch(_nanargmax_dispatcher)
 def nanargmax(a, axis=None):
     """
     Return the indices of the maximum values in the specified axis ignoring
@@ -493,6 +514,11 @@ def nanargmax(a, axis=None):
     return res
 
 
+def _nansum_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None):
+    return (a, out)
+
+
+@array_function_dispatch(_nansum_dispatcher)
 def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
     """
     Return the sum of array elements over a given axis treating Not a
@@ -583,6 +609,11 @@ def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
     return np.sum(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims)
 
 
+def _nanprod_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None):
+    return (a, out)
+
+
+@array_function_dispatch(_nanprod_dispatcher)
 def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
     """
     Return the product of array elements over a given axis treating Not a
@@ -648,6 +679,11 @@ def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
     return np.prod(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims)
 
 
+def _nancumsum_dispatcher(a, axis=None, dtype=None, out=None):
+    return (a, out)
+
+
+@array_function_dispatch(_nancumsum_dispatcher)
 def nancumsum(a, axis=None, dtype=None, out=None):
     """
     Return the cumulative sum of array elements over a given axis treating Not a
@@ -713,6 +749,11 @@ def nancumsum(a, axis=None, dtype=None, out=None):
     return np.cumsum(a, axis=axis, dtype=dtype, out=out)
 
 
+def _nancumprod_dispatcher(a, axis=None, dtype=None, out=None):
+    return (a, out)
+
+
+@array_function_dispatch(_nancumprod_dispatcher)
 def nancumprod(a, axis=None, dtype=None, out=None):
     """
     Return the cumulative product of array elements over a given axis treating Not a
@@ -775,6 +816,11 @@ def nancumprod(a, axis=None, dtype=None, out=None):
     return np.cumprod(a, axis=axis, dtype=dtype, out=out)
 
 
+def _nanmean_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None):
+    return (a, out)
+
+
+@array_function_dispatch(_nanmean_dispatcher)
 def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
     """
     Compute the arithmetic mean along the specified axis, ignoring NaNs.
@@ -928,6 +974,12 @@ def _nanmedian_small(a, axis=None, out=None, overwrite_input=False):
     return m.filled(np.nan)
 
 
+def _nanmedian_dispatcher(
+        a, axis=None, out=None, overwrite_input=None, keepdims=None):
+    return (a, out)
+
+
+@array_function_dispatch(_nanmedian_dispatcher)
 def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=np._NoValue):
     """
     Compute the median along the specified axis, while ignoring NaNs.
@@ -1026,6 +1078,12 @@ def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=np._NoValu
         return r
 
 
+def _nanpercentile_dispatcher(a, q, axis=None, out=None, overwrite_input=None,
+                              interpolation=None, keepdims=None):
+    return (a, q, out)
+
+
+@array_function_dispatch(_nanpercentile_dispatcher)
 def nanpercentile(a, q, axis=None, out=None, overwrite_input=False,
                   interpolation='linear', keepdims=np._NoValue):
     """
@@ -1146,6 +1204,12 @@ def nanpercentile(a, q, axis=None, out=None, overwrite_input=False,
         a, q, axis, out, overwrite_input, interpolation, keepdims)
 
 
+def _nanquantile_dispatcher(a, q, axis=None, out=None, overwrite_input=None,
+                            interpolation=None, keepdims=None):
+    return (a, q, out)
+
+
+@array_function_dispatch(_nanquantile_dispatcher)
 def nanquantile(a, q, axis=None, out=None, overwrite_input=False,
                 interpolation='linear', keepdims=np._NoValue):
     """
@@ -1308,6 +1372,12 @@ def _nanquantile_1d(arr1d, q, overwrite_input=False, interpolation='linear'):
         arr1d, q, overwrite_input=overwrite_input, interpolation=interpolation)
 
 
+def _nanvar_dispatcher(
+        a, axis=None, dtype=None, out=None, ddof=None, keepdims=None):
+    return (a, out)
+
+
+@array_function_dispatch(_nanvar_dispatcher)
 def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
     """
     Compute the variance along the specified axis, while ignoring NaNs.
@@ -1449,6 +1519,12 @@ def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
     return var
 
 
+def _nanstd_dispatcher(
+        a, axis=None, dtype=None, out=None, ddof=None, keepdims=None):
+    return (a, out)
+
+
+@array_function_dispatch(_nanstd_dispatcher)
 def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
     """
     Compute the standard deviation along the specified axis, while
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py
index d8cfbf769..62fc9c5b3 100644
--- a/numpy/lib/npyio.py
+++ b/numpy/lib/npyio.py
@@ -1,6 +1,5 @@
 from __future__ import division, absolute_import, print_function
 
-import io
 import sys
 import os
 import re
@@ -13,6 +12,7 @@ import numpy as np
 from . import format
 from ._datasource import DataSource
 from numpy.core.multiarray import packbits, unpackbits
+from numpy.core._internal import recursive
 from ._iotools import (
     LineSplitter, NameValidator, StringConverter, ConverterError,
     ConverterLockError, ConversionWarning, _is_string_like,
@@ -23,12 +23,11 @@ from numpy.compat import (
     asbytes, asstr, asunicode, asbytes_nested, bytes, basestring, unicode,
     is_pathlib_path
     )
+from numpy.core.numeric import pickle
 
 if sys.version_info[0] >= 3:
-    import pickle
     from collections.abc import Mapping
 else:
-    import cPickle as pickle
     from future_builtins import map
     from collections import Mapping
 
@@ -379,16 +378,6 @@ def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True,
     memmap([4, 5, 6])
 
     """
-    own_fid = False
-    if isinstance(file, basestring):
-        fid = open(file, "rb")
-        own_fid = True
-    elif is_pathlib_path(file):
-        fid = file.open("rb")
-        own_fid = True
-    else:
-        fid = file
-
     if encoding not in ('ASCII', 'latin1', 'bytes'):
         # The 'encoding' value for pickle also affects what encoding
         # the serialized binary data of NumPy arrays is loaded
@@ -409,6 +398,17 @@ def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True,
         # Nothing to do on Python 2
         pickle_kwargs = {}
 
+    # TODO: Use contextlib.ExitStack once we drop Python 2
+    if isinstance(file, basestring):
+        fid = open(file, "rb")
+        own_fid = True
+    elif is_pathlib_path(file):
+        fid = file.open("rb")
+        own_fid = True
+    else:
+        fid = file
+        own_fid = False
+
     try:
         # Code to distinguish from NumPy binary files and pickles.
         _ZIP_PREFIX = b'PK\x03\x04'
@@ -421,10 +421,10 @@ def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True,
         if magic.startswith(_ZIP_PREFIX) or magic.startswith(_ZIP_SUFFIX):
             # zip-file (assume .npz)
             # Transfer file ownership to NpzFile
-            tmp = own_fid
+            ret = NpzFile(fid, own_fid=own_fid, allow_pickle=allow_pickle,
+                          pickle_kwargs=pickle_kwargs)
             own_fid = False
-            return NpzFile(fid, own_fid=tmp, allow_pickle=allow_pickle,
-                           pickle_kwargs=pickle_kwargs)
+            return ret
         elif magic == format.MAGIC_PREFIX:
             # .npy file
             if mmap_mode:
@@ -773,7 +773,7 @@ _loadtxt_chunksize = 50000
 
 def loadtxt(fname, dtype=float, comments='#', delimiter=None,
             converters=None, skiprows=0, usecols=None, unpack=False,
-            ndmin=0, encoding='bytes'):
+            ndmin=0, encoding='bytes', max_rows=None):
     """
     Load data from a text file.
 
@@ -835,6 +835,11 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
         the system default is used. The default value is 'bytes'.
 
         .. versionadded:: 1.14.0
+    max_rows : int, optional
+        Read `max_rows` lines of content after `skiprows` lines. The default
+        is to read all the lines.
+
+        .. versionadded:: 1.16.0
 
     Returns
     -------
@@ -944,7 +949,8 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
         fencoding = locale.getpreferredencoding()
 
     # not to be confused with the flatten_dtype we import...
-    def flatten_dtype_internal(dt):
+    @recursive
+    def flatten_dtype_internal(self, dt):
         """Unpack a structured data-type, and produce re-packing info."""
         if dt.names is None:
             # If the dtype is flattened, return.
@@ -964,7 +970,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
             packing = []
             for field in dt.names:
                 tp, bytes = dt.fields[field]
-                flat_dt, flat_packing = flatten_dtype_internal(tp)
+                flat_dt, flat_packing = self(tp)
                 types.extend(flat_dt)
                 # Avoid extra nesting for subarrays
                 if tp.ndim > 0:
@@ -973,7 +979,8 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
                     packing.append((len(flat_dt), flat_packing))
             return (types, packing)
 
-    def pack_items(items, packing):
+    @recursive
+    def pack_items(self, items, packing):
         """Pack items into nested lists based on re-packing info."""
         if packing is None:
             return items[0]
@@ -985,7 +992,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
             start = 0
             ret = []
             for length, subpacking in packing:
-                ret.append(pack_items(items[start:start+length], subpacking))
+                ret.append(self(items[start:start+length], subpacking))
                 start += length
             return tuple(ret)
 
@@ -1014,7 +1021,9 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
 
         """
         X = []
-        for i, line in enumerate(itertools.chain([first_line], fh)):
+        line_iter = itertools.chain([first_line], fh)
+        line_iter = itertools.islice(line_iter, max_rows)
+        for i, line in enumerate(line_iter):
             vals = split_line(line)
             if len(vals) == 0:
                 continue
@@ -1111,11 +1120,6 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
     finally:
         if fown:
             fh.close()
-        # recursive closures have a cyclic reference to themselves, which
-        # requires gc to collect (gh-10620). To avoid this problem, for
-        # performance and PyPy friendliness, we break the cycle:
-        flatten_dtype_internal = None
-        pack_items = None
 
     if X is None:
         X = np.array([], dtype)
diff --git a/numpy/lib/tests/test__datasource.py b/numpy/lib/tests/test__datasource.py
index 85788941c..1df8bebf6 100644
--- a/numpy/lib/tests/test__datasource.py
+++ b/numpy/lib/tests/test__datasource.py
@@ -8,7 +8,7 @@ from shutil import rmtree
 
 import numpy.lib._datasource as datasource
 from numpy.testing import (
-    assert_, assert_equal, assert_raises, assert_warns, SkipTest
+    assert_, assert_equal, assert_raises, assert_warns
     )
 
 if sys.version_info[0] >= 3:
@@ -137,7 +137,7 @@ class TestDataSourceOpen(object):
             import gzip
         except ImportError:
             # We don't have the gzip capabilities to test.
-            raise SkipTest
+            pytest.skip()
         # Test datasource's internal file_opener for Gzip files.
         filepath = os.path.join(self.tmpdir, 'foobar.txt.gz')
         fp = gzip.open(filepath, 'w')
@@ -153,7 +153,7 @@ class TestDataSourceOpen(object):
             import bz2
         except ImportError:
             # We don't have the bz2 capabilities to test.
-            raise SkipTest
+            pytest.skip()
         # Test datasource's internal file_opener for BZip2 files.
         filepath = os.path.join(self.tmpdir, 'foobar.txt.bz2')
         fp = bz2.BZ2File(filepath, 'w')
@@ -170,7 +170,7 @@ class TestDataSourceOpen(object):
             import bz2
         except ImportError:
             # We don't have the bz2 capabilities to test.
-            raise SkipTest
+            pytest.skip()
         # Test datasource's internal file_opener for BZip2 files.
         filepath = os.path.join(self.tmpdir, 'foobar.txt.bz2')
         fp = bz2.BZ2File(filepath, 'w')
diff --git a/numpy/lib/tests/test_arraypad.py b/numpy/lib/tests/test_arraypad.py
index 45d624781..e62fccaa0 100644
--- a/numpy/lib/tests/test_arraypad.py
+++ b/numpy/lib/tests/test_arraypad.py
@@ -3,8 +3,11 @@
 """
 from __future__ import division, absolute_import, print_function
 
+import pytest
+
 import numpy as np
-from numpy.testing import (assert_array_equal, assert_raises, assert_allclose,)
+from numpy.testing import (assert_array_equal, assert_raises, assert_allclose,
+                           assert_equal)
 from numpy.lib import pad
 
 
@@ -344,6 +347,20 @@ class TestStatistic(object):
             )
         assert_array_equal(a, b)
 
+    @pytest.mark.parametrize("mode", [
+        pytest.param("mean", marks=pytest.mark.xfail(reason="gh-11216")),
+        "median",
+        "minimum",
+        "maximum"
+    ])
+    def test_same_prepend_append(self, mode):
+        """ Test that appended and prepended values are equal """
+        # This test is constructed to trigger floating point rounding errors in
+        # a way that caused gh-11216 for mode=='mean'
+        a = np.array([-1, 2, -1]) + np.array([0, 1e-12, 0], dtype=np.float64)
+        a = np.pad(a, (1, 1), mode)
+        assert_equal(a[0], a[-1])
+
 
 class TestConstant(object):
     def test_check_constant(self):
@@ -502,6 +519,21 @@ class TestConstant(object):
         expected = np.full(7, int64_max, dtype=np.int64)
         assert_array_equal(test, expected)
 
+    def test_check_object_array(self):
+        arr = np.empty(1, dtype=object)
+        obj_a = object()
+        arr[0] = obj_a
+        obj_b = object()
+        obj_c = object()
+        arr = np.pad(arr, pad_width=1, mode='constant',
+                     constant_values=(obj_b, obj_c))
+
+        expected = np.empty((3,), dtype=object)
+        expected[0] = obj_b
+        expected[1] = obj_a
+        expected[2] = obj_c
+
+        assert_array_equal(arr, expected)
 
 class TestLinearRamp(object):
     def test_check_simple(self):
@@ -542,6 +574,25 @@ class TestLinearRamp(object):
              [0.,   0.,   0.,   0.,   0.,   0.,   0.,    0.,   0.]])
         assert_allclose(test, expected)
 
+    @pytest.mark.xfail(exceptions=(AssertionError,))
+    def test_object_array(self):
+        from fractions import Fraction
+        arr = np.array([Fraction(1, 2), Fraction(-1, 2)])
+        actual = np.pad(arr, (2, 3), mode='linear_ramp', end_values=0)
+
+        # deliberately chosen to have a non-power-of-2 denominator such that
+        # rounding to floats causes a failure.
+        expected = np.array([
+            Fraction( 0, 12),
+            Fraction( 3, 12),
+            Fraction( 6, 12),
+            Fraction(-6, 12),
+            Fraction(-4, 12),
+            Fraction(-2, 12),
+            Fraction(-0, 12),
+        ])
+        assert_equal(actual, expected)
+
 
 class TestReflect(object):
     def test_check_simple(self):
@@ -887,6 +938,11 @@ class TestWrap(object):
         b = np.array([3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1])
         assert_array_equal(a, b)
 
+    def test_pad_with_zero(self):
+        a = np.ones((3, 5))
+        b = np.pad(a, (0, 5), mode="wrap")
+        assert_array_equal(a, b[:-5, :-5])
+
 
 class TestStatLen(object):
     def test_check_simple(self):
diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py
index 4b61726d2..fef06ba53 100644
--- a/numpy/lib/tests/test_arraysetops.py
+++ b/numpy/lib/tests/test_arraysetops.py
@@ -388,6 +388,13 @@ class TestSetOps(object):
         a = np.array((), np.uint32)
         assert_equal(setdiff1d(a, []).dtype, np.uint32)
 
+    def test_setdiff1d_unique(self):
+        a = np.array([3, 2, 1])
+        b = np.array([7, 5, 2])
+        expected = np.array([3, 1])
+        actual = setdiff1d(a, b, assume_unique=True)
+        assert_equal(actual, expected)
+
     def test_setdiff1d_char_array(self):
         a = np.array(['a', 'b', 'c'])
         b = np.array(['a', 'b', 's'])
diff --git a/numpy/lib/tests/test_format.py b/numpy/lib/tests/test_format.py
index c7869c582..3185e32ac 100644
--- a/numpy/lib/tests/test_format.py
+++ b/numpy/lib/tests/test_format.py
@@ -286,7 +286,8 @@ from io import BytesIO
 
 import numpy as np
 from numpy.testing import (
-    assert_, assert_array_equal, assert_raises, raises, SkipTest
+    assert_, assert_array_equal, assert_raises, assert_raises_regex,
+    raises
     )
 from numpy.lib import format
 
@@ -678,12 +679,9 @@ def test_write_version():
         (255, 255),
     ]
     for version in bad_versions:
-        try:
+        with assert_raises_regex(ValueError,
+                                 'we only support format version.*'):
             format.write_array(f, arr, version=version)
-        except ValueError:
-            pass
-        else:
-            raise AssertionError("we should have raised a ValueError for the bad version %r" % (version,))
 
 
 bad_version_magic = [
@@ -809,7 +807,7 @@ def test_bad_header():
 
 def test_large_file_support():
     if (sys.platform == 'win32' or sys.platform == 'cygwin'):
-        raise SkipTest("Unknown if Windows has sparse filesystems")
+        pytest.skip("Unknown if Windows has sparse filesystems")
     # try creating a large sparse file
     tf_name = os.path.join(tempdir, 'sparse_file')
     try:
@@ -819,7 +817,7 @@ def test_large_file_support():
         import subprocess as sp
         sp.check_call(["truncate", "-s", "5368709120", tf_name])
     except Exception:
-        raise SkipTest("Could not create 5GB large file")
+        pytest.skip("Could not create 5GB large file")
     # write a small array to the end
     with open(tf_name, "wb") as f:
         f.seek(5368709120)
@@ -841,7 +839,7 @@ def test_large_archive():
     try:
         a = np.empty((2**30, 2), dtype=np.uint8)
     except MemoryError:
-        raise SkipTest("Could not create large file")
+        pytest.skip("Could not create large file")
 
     fname = os.path.join(tempdir, "large_archive")
 
diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py
index d5faed6ae..40cca1dbb 100644
--- a/numpy/lib/tests/test_function_base.py
+++ b/numpy/lib/tests/test_function_base.py
@@ -734,6 +734,58 @@ class TestDiff(object):
         assert_array_equal(out3.mask, [[], [], [], [], []])
         assert_(type(out3) is type(x))
 
+    def test_prepend(self):
+        x = np.arange(5) + 1
+        assert_array_equal(diff(x, prepend=0), np.ones(5))
+        assert_array_equal(diff(x, prepend=[0]), np.ones(5))
+        assert_array_equal(np.cumsum(np.diff(x, prepend=0)), x)
+        assert_array_equal(diff(x, prepend=[-1, 0]), np.ones(6))
+
+        x = np.arange(4).reshape(2, 2)
+        result = np.diff(x, axis=1, prepend=0)
+        expected = [[0, 1], [2, 1]]
+        assert_array_equal(result, expected)
+        result = np.diff(x, axis=1, prepend=[[0], [0]])
+        assert_array_equal(result, expected)
+
+        result = np.diff(x, axis=0, prepend=0)
+        expected = [[0, 1], [2, 2]]
+        assert_array_equal(result, expected)
+        result = np.diff(x, axis=0, prepend=[[0, 0]])
+        assert_array_equal(result, expected)
+
+        assert_raises(ValueError, np.diff, x, prepend=np.zeros((3,3)))
+
+        assert_raises(np.AxisError, diff, x, prepend=0, axis=3)
+
+    def test_append(self):
+        x = np.arange(5)
+        result = diff(x, append=0)
+        expected = [1, 1, 1, 1, -4]
+        assert_array_equal(result, expected)
+        result = diff(x, append=[0])
+        assert_array_equal(result, expected)
+        result = diff(x, append=[0, 2])
+        expected = expected + [2]
+        assert_array_equal(result, expected)
+
+        x = np.arange(4).reshape(2, 2)
+        result = np.diff(x, axis=1, append=0)
+        expected = [[1, -1], [1, -3]]
+        assert_array_equal(result, expected)
+        result = np.diff(x, axis=1, append=[[0], [0]])
+        assert_array_equal(result, expected)
+
+        result = np.diff(x, axis=0, append=0)
+        expected = [[2, 2], [-2, -3]]
+        assert_array_equal(result, expected)
+        result = np.diff(x, axis=0, append=[[0, 0]])
+        assert_array_equal(result, expected)
+
+        assert_raises(ValueError, np.diff, x, append=np.zeros((3,3)))
+
+        assert_raises(np.AxisError, diff, x, append=0, axis=3)
+
 
 class TestDelete(object):
 
diff --git a/numpy/lib/tests/test_histograms.py b/numpy/lib/tests/test_histograms.py
index 561f5f938..1b5a71d0e 100644
--- a/numpy/lib/tests/test_histograms.py
+++ b/numpy/lib/tests/test_histograms.py
@@ -119,6 +119,13 @@ class TestHistogram(object):
         h, b = histogram(a, bins=8, range=[1, 9], weights=w)
         assert_equal(h, w[1:-1])
 
+    def test_arr_weights_mismatch(self):
+        a = np.arange(10) + .5
+        w = np.arange(11) + .5
+        with assert_raises_regex(ValueError, "same shape as"):
+            h, b = histogram(a, range=[1, 9], weights=w, density=True)
+
+
     def test_type(self):
         # Check the type of the returned histogram
         a = np.arange(10) + .5
@@ -141,6 +148,23 @@ class TestHistogram(object):
         counts_hist, xedges, yedges = np.histogram2d(x, y, bins=100)
         assert_equal(counts_hist.sum(), 3.)
 
+    def test_bool_conversion(self):
+        # gh-12107
+        # Reference integer histogram
+        a = np.array([1, 1, 0], dtype=np.uint8)
+        int_hist, int_edges = np.histogram(a)
+
+        # Should raise an warning on booleans
+        # Ensure that the histograms are equivalent, need to suppress
+        # the warnings to get the actual outputs
+        with suppress_warnings() as sup:
+            rec = sup.record(RuntimeWarning, 'Converting input from .*')
+            hist, edges = np.histogram([True, True, False])
+            # A warning should be issued
+            assert_equal(len(rec), 1)
+            assert_array_equal(hist, int_hist)
+            assert_array_equal(edges, int_edges)
+
     def test_weights(self):
         v = np.random.rand(100)
         w = np.ones(100) * 5
@@ -225,6 +249,12 @@ class TestHistogram(object):
         assert_raises(ValueError, histogram, vals, range=[np.nan,0.75])
         assert_raises(ValueError, histogram, vals, range=[0.25,np.inf])
 
+    def test_invalid_range(self):
+        # start of range must be < end of range
+        vals = np.linspace(0.0, 1.0, num=100)
+        with assert_raises_regex(ValueError, "max must be larger than"):
+            np.histogram(vals, range=[0.1, 0.01])
+
     def test_bin_edge_cases(self):
         # Ensure that floating-point computations correctly place edge cases.
         arr = np.array([337, 404, 739, 806, 1007, 1811, 2012])
@@ -241,6 +271,13 @@ class TestHistogram(object):
         hist, edges = np.histogram(arr, bins=30, range=(-0.5, 5))
         assert_equal(hist[-1], 1)
 
+    def test_bin_array_dims(self):
+        # gracefully handle bins object > 1 dimension
+        vals = np.linspace(0.0, 1.0, num=100)
+        bins = np.array([[0, 0.5], [0.6, 1.0]])
+        with assert_raises_regex(ValueError, "must be 1d"):
+            np.histogram(vals, bins=bins)
+
     def test_unsigned_monotonicity_check(self):
         # Ensures ValueError is raised if bins not increasing monotonically
         # when bins contain unsigned values (see #9222)
diff --git a/numpy/lib/tests/test_index_tricks.py b/numpy/lib/tests/test_index_tricks.py
index 7e9c026e4..76d9b403e 100644
--- a/numpy/lib/tests/test_index_tricks.py
+++ b/numpy/lib/tests/test_index_tricks.py
@@ -1,9 +1,12 @@
 from __future__ import division, absolute_import, print_function
 
+import pytest
+
 import numpy as np
 from numpy.testing import (
     assert_, assert_equal, assert_array_equal, assert_almost_equal,
-    assert_array_almost_equal, assert_raises, assert_raises_regex
+    assert_array_almost_equal, assert_raises, assert_raises_regex,
+    assert_warns
     )
 from numpy.lib.index_tricks import (
     mgrid, ogrid, ndenumerate, fill_diagonal, diag_indices, diag_indices_from,
@@ -14,6 +17,33 @@ from numpy.lib.index_tricks import (
 class TestRavelUnravelIndex(object):
     def test_basic(self):
         assert_equal(np.unravel_index(2, (2, 2)), (1, 0))
+
+        # test backwards compatibility with older dims
+        # keyword argument; see Issue #10586
+        with assert_warns(DeprecationWarning):
+            # we should achieve the correct result
+            # AND raise the appropriate warning
+            # when using older "dims" kw argument
+            assert_equal(np.unravel_index(indices=2,
+                                          dims=(2, 2)),
+                                          (1, 0))
+
+        # test that new shape argument works properly
+        assert_equal(np.unravel_index(indices=2,
+                                      shape=(2, 2)),
+                                      (1, 0))
+
+        # test that an invalid second keyword argument
+        # is properly handled
+        with assert_raises(TypeError):
+            np.unravel_index(indices=2, hape=(2, 2))
+
+        with assert_raises(TypeError):
+            np.unravel_index(2, hape=(2, 2))
+
+        with assert_raises(TypeError):
+            np.unravel_index(254, ims=(17, 94))
+
         assert_equal(np.ravel_multi_index((1, 0), (2, 2)), 2)
         assert_equal(np.unravel_index(254, (17, 94)), (2, 66))
         assert_equal(np.ravel_multi_index((2, 66), (17, 94)), 254)
@@ -164,6 +194,22 @@ class TestGrid(object):
         for f, b in zip(grid_full, grid_broadcast):
             assert_equal(f, b)
 
+    @pytest.mark.parametrize("start, stop, step, expected", [
+        (None, 10, 10j, (200, 10)),
+        (-10, 20, None, (1800, 30)),
+        ])
+    def test_mgrid_size_none_handling(self, start, stop, step, expected):
+        # regression test None value handling for
+        # start and step values used by mgrid;
+        # internally, this aims to cover previously
+        # unexplored code paths in nd_grid()
+        grid = mgrid[start:stop:step, start:stop:step]
+        # need a smaller grid to explore one of the
+        # untested code paths
+        grid_small = mgrid[start:stop:step]
+        assert_equal(grid.size, expected[0])
+        assert_equal(grid_small.size, expected[1])
+
 
 class TestConcatenator(object):
     def test_1d(self):
@@ -318,6 +364,19 @@ class TestFillDiagonal(object):
         i = np.array([0, 1, 2])
         assert_equal(np.where(a != 0), (i, i, i, i))
 
+    def test_low_dim_handling(self):
+        # raise error with low dimensionality
+        a = np.zeros(3, int)
+        with assert_raises_regex(ValueError, "at least 2-d"):
+            fill_diagonal(a, 5)
+
+    def test_hetero_shape_handling(self):
+        # raise error with high dimensionality and
+        # shape mismatch
+        a = np.zeros((3,3,7,3), int)
+        with assert_raises_regex(ValueError, "equal length"):
+            fill_diagonal(a, 2)
+
 
 def test_diag_indices():
     di = diag_indices(4)
@@ -347,11 +406,23 @@ def test_diag_indices():
         )
 
 
-def test_diag_indices_from():
-    x = np.random.random((4, 4))
-    r, c = diag_indices_from(x)
-    assert_array_equal(r, np.arange(4))
-    assert_array_equal(c, np.arange(4))
+class TestDiagIndicesFrom(object):
+
+    def test_diag_indices_from(self):
+        x = np.random.random((4, 4))
+        r, c = diag_indices_from(x)
+        assert_array_equal(r, np.arange(4))
+        assert_array_equal(c, np.arange(4))
+
+    def test_error_small_input(self):
+        x = np.ones(7)
+        with assert_raises_regex(ValueError, "at least 2-d"):
+            diag_indices_from(x)
+
+    def test_error_shape_mismatch(self):
+        x = np.zeros((3, 3, 2, 3), int)
+        with assert_raises_regex(ValueError, "equal length"):
+            diag_indices_from(x)
 
 
 def test_ndindex():
diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py
index 1f3664d92..08800ff97 100644
--- a/numpy/lib/tests/test_io.py
+++ b/numpy/lib/tests/test_io.py
@@ -21,7 +21,7 @@ from numpy.lib._iotools import ConverterError, ConversionWarning
 from numpy.compat import asbytes, bytes, unicode, Path
 from numpy.ma.testutils import assert_equal
 from numpy.testing import (
-    assert_warns, assert_, SkipTest, assert_raises_regex, assert_raises,
+    assert_warns, assert_, assert_raises_regex, assert_raises,
     assert_allclose, assert_array_equal, temppath, tempdir, IS_PYPY,
     HAS_REFCOUNT, suppress_warnings, assert_no_gc_cycles,
     )
@@ -567,12 +567,12 @@ class LoadTxtBase(object):
 
     @pytest.mark.skipif(not HAS_BZ2, reason="Needs bz2")
     @pytest.mark.skipif(MAJVER == 2, reason="Needs Python version >= 3")
-    def test_compressed_gzip(self):
+    def test_compressed_bz2(self):
         self.check_compressed(bz2.open, ('.bz2',))
 
     @pytest.mark.skipif(not HAS_LZMA, reason="Needs lzma")
     @pytest.mark.skipif(MAJVER == 2, reason="Needs Python version >= 3")
-    def test_compressed_gzip(self):
+    def test_compressed_lzma(self):
         self.check_compressed(lzma.open, ('.xz', '.lzma'))
 
     def test_encoding(self):
@@ -1068,6 +1068,55 @@ class TestLoadTxt(LoadTxtBase):
             x = [b'5,6,7,\xc3\x95scarscar', b'15,2,3,hello', b'20,2,3,\xc3\x95scar']
             assert_array_equal(x, np.array(x, dtype="S"))
 
+    def test_max_rows(self):
+        c = TextIO()
+        c.write('1,2,3,5\n4,5,7,8\n2,1,4,5')
+        c.seek(0)
+        x = np.loadtxt(c, dtype=int, delimiter=',',
+                       max_rows=1)
+        a = np.array([1, 2, 3, 5], int)
+        assert_array_equal(x, a)
+
+    def test_max_rows_with_skiprows(self):
+        c = TextIO()
+        c.write('comments\n1,2,3,5\n4,5,7,8\n2,1,4,5')
+        c.seek(0)
+        x = np.loadtxt(c, dtype=int, delimiter=',',
+                       skiprows=1, max_rows=1)
+        a = np.array([1, 2, 3, 5], int)
+        assert_array_equal(x, a)
+
+        c = TextIO()
+        c.write('comment\n1,2,3,5\n4,5,7,8\n2,1,4,5')
+        c.seek(0)
+        x = np.loadtxt(c, dtype=int, delimiter=',',
+                       skiprows=1, max_rows=2)
+        a = np.array([[1, 2, 3, 5], [4, 5, 7, 8]], int)
+        assert_array_equal(x, a)
+
+    def test_max_rows_with_read_continuation(self):
+        c = TextIO()
+        c.write('1,2,3,5\n4,5,7,8\n2,1,4,5')
+        c.seek(0)
+        x = np.loadtxt(c, dtype=int, delimiter=',',
+                       max_rows=2)
+        a = np.array([[1, 2, 3, 5], [4, 5, 7, 8]], int)
+        assert_array_equal(x, a)
+        # test continuation
+        x = np.loadtxt(c, dtype=int, delimiter=',')
+        a = np.array([2,1,4,5], int)
+        assert_array_equal(x, a)
+
+    def test_max_rows_larger(self):
+        #test max_rows > num rows
+        c = TextIO()
+        c.write('comment\n1,2,3,5\n4,5,7,8\n2,1,4,5')
+        c.seek(0)
+        x = np.loadtxt(c, dtype=int, delimiter=',',
+                       skiprows=1, max_rows=6)
+        a = np.array([[1, 2, 3, 5], [4, 5, 7, 8], [2, 1, 4, 5]], int)
+        assert_array_equal(x, a)
+
 class Testfromregex(object):
     def test_record(self):
         c = TextIO()
@@ -1454,14 +1503,10 @@ M   33  21.99
         assert_equal(test, control)
 
         ndtype = [('nest', [('idx', int), ('code', object)])]
-        try:
+        with assert_raises_regex(NotImplementedError,
+                                 'Nested fields.* not supported.*'):
             test = np.genfromtxt(TextIO(data), delimiter=";",
                                  dtype=ndtype, converters=converters)
-        except NotImplementedError:
-            pass
-        else:
-            errmsg = "Nested dtype involving objects should be supported."
-            raise AssertionError(errmsg)
 
     def test_userconverters_with_explicit_dtype(self):
         # Test user_converters w/ explicit (standard) dtype
@@ -2037,8 +2082,8 @@ M   33  21.99
             encoding = locale.getpreferredencoding()
             utf8.encode(encoding)
         except (UnicodeError, ImportError):
-            raise SkipTest('Skipping test_utf8_file_nodtype_unicode, '
-                           'unable to encode utf8 in preferred encoding')
+            pytest.skip('Skipping test_utf8_file_nodtype_unicode, '
+                        'unable to encode utf8 in preferred encoding')
 
         with temppath() as path:
             with io.open(path, "wt") as f:
@@ -2416,3 +2461,9 @@ def test_load_refcount():
 
     with assert_no_gc_cycles():
         np.load(f)
+
+    f.seek(0)
+    dt = [("a", 'u1', 2), ("b", 'u1', 2)]
+    with assert_no_gc_cycles():
+        x = np.loadtxt(TextIO("0 1 2 3"), dtype=dt)
+        assert_equal(x, np.array([((0, 1), (2, 3))], dtype=dt))
diff --git a/numpy/lib/tests/test_recfunctions.py b/numpy/lib/tests/test_recfunctions.py
index d4828bc1f..5585a95f9 100644
--- a/numpy/lib/tests/test_recfunctions.py
+++ b/numpy/lib/tests/test_recfunctions.py
@@ -541,12 +541,8 @@ class TestStackArrays(object):
         test = stack_arrays((a, b), autoconvert=True)
         assert_equal(test, control)
         assert_equal(test.mask, control.mask)
-        try:
-            test = stack_arrays((a, b), autoconvert=False)
-        except TypeError:
-            pass
-        else:
-            raise AssertionError
+        with assert_raises(TypeError):
+            stack_arrays((a, b), autoconvert=False)
 
     def test_checktitles(self):
         # Test using titles in the field names
diff --git a/numpy/lib/tests/test_utils.py b/numpy/lib/tests/test_utils.py
index c27c3cbf5..2723f3440 100644
--- a/numpy/lib/tests/test_utils.py
+++ b/numpy/lib/tests/test_utils.py
@@ -56,10 +56,34 @@ def test_safe_eval_nameconstant():
     utils.safe_eval('None')
 
 
-def test_byte_bounds():
-    a = arange(12).reshape(3, 4)
-    low, high = utils.byte_bounds(a)
-    assert_equal(high - low, a.size * a.itemsize)
+class TestByteBounds(object):
+
+    def test_byte_bounds(self):
+        # pointer difference matches size * itemsize
+        # due to contiguity
+        a = arange(12).reshape(3, 4)
+        low, high = utils.byte_bounds(a)
+        assert_equal(high - low, a.size * a.itemsize)
+
+    def test_unusual_order_positive_stride(self):
+        a = arange(12).reshape(3, 4)
+        b = a.T
+        low, high = utils.byte_bounds(b)
+        assert_equal(high - low, b.size * b.itemsize)
+
+    def test_unusual_order_negative_stride(self):
+        a = arange(12).reshape(3, 4)
+        b = a.T[::-1]
+        low, high = utils.byte_bounds(b)
+        assert_equal(high - low, b.size * b.itemsize)
+
+    def test_strided(self):
+        a = arange(12)
+        b = a[::2]
+        low, high = utils.byte_bounds(b)
+        # the largest pointer address is lost (even numbers only in the
+        # stride), and compensate addresses for striding by 2
+        assert_equal(high - low, b.size * 2 * b.itemsize - b.itemsize)
 
 
 def test_assert_raises_regex_context_manager():
diff --git a/numpy/lib/type_check.py b/numpy/lib/type_check.py
index 3f7aa32fa..603da8567 100644
--- a/numpy/lib/type_check.py
+++ b/numpy/lib/type_check.py
@@ -2,6 +2,7 @@
 
 """
 from __future__ import division, absolute_import, print_function
+import warnings
 
 __all__ = ['iscomplexobj', 'isrealobj', 'imag', 'iscomplex',
            'isreal', 'nan_to_num', 'real', 'real_if_close',
@@ -469,6 +470,10 @@ def asscalar(a):
     """
     Convert an array of size 1 to its scalar equivalent.
 
+    .. deprecated:: 1.16
+
+        Deprecated, use `numpy.ndarray.item()` instead.
+
     Parameters
     ----------
     a : ndarray
@@ -486,6 +491,10 @@ def asscalar(a):
     24
 
     """
+
+    # 2018-10-10, 1.16
+    warnings.warn('np.asscalar(a) is deprecated since NumPy v1.16, use '
+                  'a.item() instead', DeprecationWarning, stacklevel=1)
     return a.item()
 
 #-----------------------------------------------------------------------------
diff --git a/numpy/lib/utils.py b/numpy/lib/utils.py
index 9678bab76..249873654 100644
--- a/numpy/lib/utils.py
+++ b/numpy/lib/utils.py
@@ -80,7 +80,6 @@ class _Deprecate(object):
         new_name = self.new_name
         message = self.message
 
-        import warnings
         if old_name is None:
             try:
                 old_name = func.__name__
diff --git a/numpy/linalg/linalg.py b/numpy/linalg/linalg.py
index ccc437663..59923f3c5 100644
--- a/numpy/linalg/linalg.py
+++ b/numpy/linalg/linalg.py
@@ -28,6 +28,7 @@ from numpy.core import (
     swapaxes, divide, count_nonzero, isnan
 )
 from numpy.core.multiarray import normalize_axis_index
+from numpy.core.overrides import array_function_dispatch
 from numpy.lib.twodim_base import triu, eye
 from numpy.linalg import lapack_lite, _umath_linalg
 
@@ -198,11 +199,6 @@ def _assertRankAtLeast2(*arrays):
             raise LinAlgError('%d-dimensional array given. Array must be '
                     'at least two-dimensional' % a.ndim)
 
-def _assertSquareness(*arrays):
-    for a in arrays:
-        if max(a.shape) != min(a.shape):
-            raise LinAlgError('Array must be square')
-
 def _assertNdSquareness(*arrays):
     for a in arrays:
         m, n = a.shape[-2:]
@@ -242,6 +238,11 @@ def transpose(a):
 
 # Linear equations
 
+def _tensorsolve_dispatcher(a, b, axes=None):
+    return (a, b)
+
+
+@array_function_dispatch(_tensorsolve_dispatcher)
 def tensorsolve(a, b, axes=None):
     """
     Solve the tensor equation ``a x = b`` for x.
@@ -311,6 +312,12 @@ def tensorsolve(a, b, axes=None):
     res.shape = oldshape
     return res
 
+
+def _solve_dispatcher(a, b):
+    return (a, b)
+
+
+@array_function_dispatch(_solve_dispatcher)
 def solve(a, b):
     """
     Solve a linear matrix equation, or system of linear scalar equations.
@@ -391,6 +398,11 @@ def solve(a, b):
     return wrap(r.astype(result_t, copy=False))
 
 
+def _tensorinv_dispatcher(a, ind=None):
+    return (a,)
+
+
+@array_function_dispatch(_tensorinv_dispatcher)
 def tensorinv(a, ind=2):
     """
     Compute the 'inverse' of an N-dimensional array.
@@ -460,6 +472,11 @@ def tensorinv(a, ind=2):
 
 # Matrix inversion
 
+def _unary_dispatcher(a):
+    return (a,)
+
+
+@array_function_dispatch(_unary_dispatcher)
 def inv(a):
     """
     Compute the (multiplicative) inverse of a matrix.
@@ -528,6 +545,11 @@ def inv(a):
     return wrap(ainv.astype(result_t, copy=False))
 
 
+def _matrix_power_dispatcher(a, n):
+    return (a,)
+
+
+@array_function_dispatch(_matrix_power_dispatcher)
 def matrix_power(a, n):
     """
     Raise a square matrix to the (integer) power `n`.
@@ -645,6 +667,8 @@ def matrix_power(a, n):
 
 # Cholesky decomposition
 
+
+@array_function_dispatch(_unary_dispatcher)
 def cholesky(a):
     """
     Cholesky decomposition.
@@ -728,8 +752,14 @@ def cholesky(a):
     r = gufunc(a, signature=signature, extobj=extobj)
     return wrap(r.astype(result_t, copy=False))
 
+
 # QR decompostion
 
+def _qr_dispatcher(a, mode=None):
+    return (a,)
+
+
+@array_function_dispatch(_qr_dispatcher)
 def qr(a, mode='reduced'):
     """
     Compute the qr factorization of a matrix.
@@ -945,6 +975,7 @@ def qr(a, mode='reduced'):
 # Eigenvalues
 
 
+@array_function_dispatch(_unary_dispatcher)
 def eigvals(a):
     """
     Compute the eigenvalues of a general matrix.
@@ -1034,6 +1065,12 @@ def eigvals(a):
 
     return w.astype(result_t, copy=False)
 
+
+def _eigvalsh_dispatcher(a, UPLO=None):
+    return (a,)
+
+
+@array_function_dispatch(_eigvalsh_dispatcher)
 def eigvalsh(a, UPLO='L'):
     """
     Compute the eigenvalues of a complex Hermitian or real symmetric matrix.
@@ -1135,6 +1172,7 @@ def _convertarray(a):
 # Eigenvectors
 
 
+@array_function_dispatch(_unary_dispatcher)
 def eig(a):
     """
     Compute the eigenvalues and right eigenvectors of a square array.
@@ -1276,6 +1314,7 @@ def eig(a):
     return w.astype(result_t, copy=False), wrap(vt)
 
 
+@array_function_dispatch(_eigvalsh_dispatcher)
 def eigh(a, UPLO='L'):
     """
     Return the eigenvalues and eigenvectors of a complex Hermitian
@@ -1415,6 +1454,11 @@ def eigh(a, UPLO='L'):
 
 # Singular value decomposition
 
+def _svd_dispatcher(a, full_matrices=None, compute_uv=None):
+    return (a,)
+
+
+@array_function_dispatch(_svd_dispatcher)
 def svd(a, full_matrices=True, compute_uv=True):
     """
     Singular Value Decomposition.
@@ -1575,6 +1619,11 @@ def svd(a, full_matrices=True, compute_uv=True):
         return s
 
 
+def _cond_dispatcher(x, p=None):
+    return (x,)
+
+
+@array_function_dispatch(_cond_dispatcher)
 def cond(x, p=None):
     """
     Compute the condition number of a matrix.
@@ -1692,6 +1741,11 @@ def cond(x, p=None):
     return r
 
 
+def _matrix_rank_dispatcher(M, tol=None, hermitian=None):
+    return (M,)
+
+
+@array_function_dispatch(_matrix_rank_dispatcher)
 def matrix_rank(M, tol=None, hermitian=False):
     """
     Return matrix rank of array using SVD method
@@ -1796,7 +1850,12 @@ def matrix_rank(M, tol=None, hermitian=False):
 
 # Generalized inverse
 
-def pinv(a, rcond=1e-15 ):
+def _pinv_dispatcher(a, rcond=None):
+    return (a,)
+
+
+@array_function_dispatch(_pinv_dispatcher)
+def pinv(a, rcond=1e-15):
     """
     Compute the (Moore-Penrose) pseudo-inverse of a matrix.
 
@@ -1880,8 +1939,11 @@ def pinv(a, rcond=1e-15 ):
     res = matmul(transpose(vt), multiply(s[..., newaxis], transpose(u)))
     return wrap(res)
 
+
 # Determinant
 
+
+@array_function_dispatch(_unary_dispatcher)
 def slogdet(a):
     """
     Compute the sign and (natural) logarithm of the determinant of an array.
@@ -1967,6 +2029,8 @@ def slogdet(a):
     logdet = logdet.astype(real_t, copy=False)
     return sign, logdet
 
+
+@array_function_dispatch(_unary_dispatcher)
 def det(a):
     """
     Compute the determinant of an array.
@@ -2023,8 +2087,14 @@ def det(a):
     r = r.astype(result_t, copy=False)
     return r
 
+
 # Linear Least Squares
 
+def _lstsq_dispatcher(a, b, rcond=None):
+    return (a, b)
+
+
+@array_function_dispatch(_lstsq_dispatcher)
 def lstsq(a, b, rcond="warn"):
     """
     Return the least-squares solution to a linear matrix equation.
@@ -2208,6 +2278,11 @@ def _multi_svd_norm(x, row_axis, col_axis, op):
     return result
 
 
+def _norm_dispatcher(x, ord=None, axis=None, keepdims=None):
+    return (x,)
+
+
+@array_function_dispatch(_norm_dispatcher)
 def norm(x, ord=None, axis=None, keepdims=False):
     """
     Matrix or vector norm.
@@ -2450,6 +2525,11 @@ def norm(x, ord=None, axis=None, keepdims=False):
 
 # multi_dot
 
+def _multidot_dispatcher(arrays):
+    return arrays
+
+
+@array_function_dispatch(_multidot_dispatcher)
 def multi_dot(arrays):
     """
     Compute the dot product of two or more arrays in a single function call,
diff --git a/numpy/linalg/tests/test_linalg.py b/numpy/linalg/tests/test_linalg.py
index 98a77d8f5..0e94c2633 100644
--- a/numpy/linalg/tests/test_linalg.py
+++ b/numpy/linalg/tests/test_linalg.py
@@ -19,7 +19,7 @@ from numpy.linalg import matrix_power, norm, matrix_rank, multi_dot, LinAlgError
 from numpy.linalg.linalg import _multi_dot_matrix_chain_order
 from numpy.testing import (
     assert_, assert_equal, assert_raises, assert_array_equal,
-    assert_almost_equal, assert_allclose, SkipTest, suppress_warnings
+    assert_almost_equal, assert_allclose, suppress_warnings
     )
 
 
@@ -462,12 +462,10 @@ class SolveCases(LinalgSquareTestCase, LinalgGeneralizedSquareTestCase):
 
 
 class TestSolve(SolveCases):
-    def test_types(self):
-        def check(dtype):
-            x = np.array([[1, 0.5], [0.5, 1]], dtype=dtype)
-            assert_equal(linalg.solve(x, x).dtype, dtype)
-        for dtype in [single, double, csingle, cdouble]:
-            check(dtype)
+    @pytest.mark.parametrize('dtype', [single, double, csingle, cdouble])
+    def test_types(self, dtype):
+        x = np.array([[1, 0.5], [0.5, 1]], dtype=dtype)
+        assert_equal(linalg.solve(x, x).dtype, dtype)
 
     def test_0_size(self):
         class ArraySubclass(np.ndarray):
@@ -531,12 +529,10 @@ class InvCases(LinalgSquareTestCase, LinalgGeneralizedSquareTestCase):
 
 
 class TestInv(InvCases):
-    def test_types(self):
-        def check(dtype):
-            x = np.array([[1, 0.5], [0.5, 1]], dtype=dtype)
-            assert_equal(linalg.inv(x).dtype, dtype)
-        for dtype in [single, double, csingle, cdouble]:
-            check(dtype)
+    @pytest.mark.parametrize('dtype', [single, double, csingle, cdouble])
+    def test_types(self, dtype):
+        x = np.array([[1, 0.5], [0.5, 1]], dtype=dtype)
+        assert_equal(linalg.inv(x).dtype, dtype)
 
     def test_0_size(self):
         # Check that all kinds of 0-sized arrays work
@@ -564,14 +560,12 @@ class EigvalsCases(LinalgSquareTestCase, LinalgGeneralizedSquareTestCase):
 
 
 class TestEigvals(EigvalsCases):
-    def test_types(self):
-        def check(dtype):
-            x = np.array([[1, 0.5], [0.5, 1]], dtype=dtype)
-            assert_equal(linalg.eigvals(x).dtype, dtype)
-            x = np.array([[1, 0.5], [-1, 1]], dtype=dtype)
-            assert_equal(linalg.eigvals(x).dtype, get_complex_dtype(dtype))
-        for dtype in [single, double, csingle, cdouble]:
-            check(dtype)
+    @pytest.mark.parametrize('dtype', [single, double, csingle, cdouble])
+    def test_types(self, dtype):
+        x = np.array([[1, 0.5], [0.5, 1]], dtype=dtype)
+        assert_equal(linalg.eigvals(x).dtype, dtype)
+        x = np.array([[1, 0.5], [-1, 1]], dtype=dtype)
+        assert_equal(linalg.eigvals(x).dtype, get_complex_dtype(dtype))
 
     def test_0_size(self):
         # Check that all kinds of 0-sized arrays work
@@ -603,20 +597,17 @@ class EigCases(LinalgSquareTestCase, LinalgGeneralizedSquareTestCase):
 
 
 class TestEig(EigCases):
-    def test_types(self):
-        def check(dtype):
-            x = np.array([[1, 0.5], [0.5, 1]], dtype=dtype)
-            w, v = np.linalg.eig(x)
-            assert_equal(w.dtype, dtype)
-            assert_equal(v.dtype, dtype)
-
-            x = np.array([[1, 0.5], [-1, 1]], dtype=dtype)
-            w, v = np.linalg.eig(x)
-            assert_equal(w.dtype, get_complex_dtype(dtype))
-            assert_equal(v.dtype, get_complex_dtype(dtype))
-
-        for dtype in [single, double, csingle, cdouble]:
-            check(dtype)
+    @pytest.mark.parametrize('dtype', [single, double, csingle, cdouble])
+    def test_types(self, dtype):
+        x = np.array([[1, 0.5], [0.5, 1]], dtype=dtype)
+        w, v = np.linalg.eig(x)
+        assert_equal(w.dtype, dtype)
+        assert_equal(v.dtype, dtype)
+
+        x = np.array([[1, 0.5], [-1, 1]], dtype=dtype)
+        w, v = np.linalg.eig(x)
+        assert_equal(w.dtype, get_complex_dtype(dtype))
+        assert_equal(v.dtype, get_complex_dtype(dtype))
 
     def test_0_size(self):
         # Check that all kinds of 0-sized arrays work
@@ -653,18 +644,15 @@ class SVDCases(LinalgSquareTestCase, LinalgGeneralizedSquareTestCase):
 
 
 class TestSVD(SVDCases):
-    def test_types(self):
-        def check(dtype):
-            x = np.array([[1, 0.5], [0.5, 1]], dtype=dtype)
-            u, s, vh = linalg.svd(x)
-            assert_equal(u.dtype, dtype)
-            assert_equal(s.dtype, get_real_dtype(dtype))
-            assert_equal(vh.dtype, dtype)
-            s = linalg.svd(x, compute_uv=False)
-            assert_equal(s.dtype, get_real_dtype(dtype))
-
-        for dtype in [single, double, csingle, cdouble]:
-            check(dtype)
+    @pytest.mark.parametrize('dtype', [single, double, csingle, cdouble])
+    def test_types(self, dtype):
+        x = np.array([[1, 0.5], [0.5, 1]], dtype=dtype)
+        u, s, vh = linalg.svd(x)
+        assert_equal(u.dtype, dtype)
+        assert_equal(s.dtype, get_real_dtype(dtype))
+        assert_equal(vh.dtype, dtype)
+        s = linalg.svd(x, compute_uv=False)
+        assert_equal(s.dtype, get_real_dtype(dtype))
 
     def test_empty_identity(self):
         """ Empty input should put an identity matrix in u or vh """
@@ -842,15 +830,13 @@ class TestDet(DetCases):
         assert_equal(type(linalg.slogdet([[0.0j]])[0]), cdouble)
         assert_equal(type(linalg.slogdet([[0.0j]])[1]), double)
 
-    def test_types(self):
-        def check(dtype):
-            x = np.array([[1, 0.5], [0.5, 1]], dtype=dtype)
-            assert_equal(np.linalg.det(x).dtype, dtype)
-            ph, s = np.linalg.slogdet(x)
-            assert_equal(s.dtype, get_real_dtype(dtype))
-            assert_equal(ph.dtype, dtype)
-        for dtype in [single, double, csingle, cdouble]:
-            check(dtype)
+    @pytest.mark.parametrize('dtype', [single, double, csingle, cdouble])
+    def test_types(self, dtype):
+        x = np.array([[1, 0.5], [0.5, 1]], dtype=dtype)
+        assert_equal(np.linalg.det(x).dtype, dtype)
+        ph, s = np.linalg.slogdet(x)
+        assert_equal(s.dtype, get_real_dtype(dtype))
+        assert_equal(ph.dtype, dtype)
 
     def test_0_size(self):
         a = np.zeros((0, 0), dtype=np.complex64)
@@ -1049,13 +1035,11 @@ class TestEigvalshCases(HermitianTestCase, HermitianGeneralizedTestCase):
 
 
 class TestEigvalsh(object):
-    def test_types(self):
-        def check(dtype):
-            x = np.array([[1, 0.5], [0.5, 1]], dtype=dtype)
-            w = np.linalg.eigvalsh(x)
-            assert_equal(w.dtype, get_real_dtype(dtype))
-        for dtype in [single, double, csingle, cdouble]:
-            check(dtype)
+    @pytest.mark.parametrize('dtype', [single, double, csingle, cdouble])
+    def test_types(self, dtype):
+        x = np.array([[1, 0.5], [0.5, 1]], dtype=dtype)
+        w = np.linalg.eigvalsh(x)
+        assert_equal(w.dtype, get_real_dtype(dtype))
 
     def test_invalid(self):
         x = np.array([[1, 0.5], [0.5, 1]], dtype=np.float32)
@@ -1127,14 +1111,12 @@ class TestEighCases(HermitianTestCase, HermitianGeneralizedTestCase):
 
 
 class TestEigh(object):
-    def test_types(self):
-        def check(dtype):
-            x = np.array([[1, 0.5], [0.5, 1]], dtype=dtype)
-            w, v = np.linalg.eigh(x)
-            assert_equal(w.dtype, get_real_dtype(dtype))
-            assert_equal(v.dtype, dtype)
-        for dtype in [single, double, csingle, cdouble]:
-            check(dtype)
+    @pytest.mark.parametrize('dtype', [single, double, csingle, cdouble])
+    def test_types(self, dtype):
+        x = np.array([[1, 0.5], [0.5, 1]], dtype=dtype)
+        w, v = np.linalg.eigh(x)
+        assert_equal(w.dtype, get_real_dtype(dtype))
+        assert_equal(v.dtype, dtype)
 
     def test_invalid(self):
         x = np.array([[1, 0.5], [0.5, 1]], dtype=np.float32)
@@ -1769,7 +1751,7 @@ def test_xerbla_override():
         pid = os.fork()
     except (OSError, AttributeError):
         # fork failed, or not running on POSIX
-        raise SkipTest("Not POSIX or fork failed.")
+        pytest.skip("Not POSIX or fork failed.")
 
     if pid == 0:
         # child; close i/o file handles
@@ -1804,7 +1786,7 @@ def test_xerbla_override():
         # parent
         pid, status = os.wait()
         if os.WEXITSTATUS(status) != XERBLA_OK:
-            raise SkipTest('Numpy xerbla not linked in.')
+            pytest.skip('Numpy xerbla not linked in.')
 
 
 def test_sdot_bug_8577():
@@ -1853,6 +1835,14 @@ class TestMultiDot(object):
         assert_almost_equal(multi_dot([A, B, C]), A.dot(B).dot(C))
         assert_almost_equal(multi_dot([A, B, C]), np.dot(A, np.dot(B, C)))
 
+    def test_basic_function_with_two_arguments(self):
+        # separate code path with two arguments
+        A = np.random.random((6, 2))
+        B = np.random.random((2, 6))
+
+        assert_almost_equal(multi_dot([A, B]), A.dot(B))
+        assert_almost_equal(multi_dot([A, B]), np.dot(A, B))
+
     def test_basic_function_with_dynamic_programing_optimization(self):
         # multi_dot with four or more arguments uses the dynamic programing
         # optimization and therefore deserve a separate
diff --git a/numpy/ma/core.py b/numpy/ma/core.py
index 65ce967ae..85a830661 100644
--- a/numpy/ma/core.py
+++ b/numpy/ma/core.py
@@ -46,13 +46,10 @@ from numpy.compat import (
 from numpy import expand_dims
 from numpy.core.multiarray import normalize_axis_index
 from numpy.core.numeric import normalize_axis_tuple
+from numpy.core._internal import recursive
+from numpy.core.numeric import pickle
 
 
-if sys.version_info[0] >= 3:
-    import pickle
-else:
-    import cPickle as pickle
-
 __all__ = [
     'MAError', 'MaskError', 'MaskType', 'MaskedArray', 'abs', 'absolute',
     'add', 'all', 'allclose', 'allequal', 'alltrue', 'amax', 'amin',
@@ -1729,12 +1726,13 @@ def mask_or(m1, m2, copy=False, shrink=True):
 
     """
 
-    def _recursive_mask_or(m1, m2, newmask):
+    @recursive
+    def _recursive_mask_or(self, m1, m2, newmask):
         names = m1.dtype.names
         for name in names:
             current1 = m1[name]
             if current1.dtype.names is not None:
-                _recursive_mask_or(current1, m2[name], newmask[name])
+                self(current1, m2[name], newmask[name])
             else:
                 umath.logical_or(current1, m2[name], newmask[name])
         return
diff --git a/numpy/ma/mrecords.py b/numpy/ma/mrecords.py
index 90a5141b3..daf2f8770 100644
--- a/numpy/ma/mrecords.py
+++ b/numpy/ma/mrecords.py
@@ -29,7 +29,6 @@ from numpy.core.records import (
         )
 
 _byteorderconv = np.core.records._byteorderconv
-_typestr = ntypes._typestr
 
 import numpy.ma as ma
 from numpy.ma import (
@@ -48,24 +47,6 @@ __all__ = [
 reserved_fields = ['_data', '_mask', '_fieldmask', 'dtype']
 
 
-def _getformats(data):
-    """
-    Returns the formats of arrays in arraylist as a comma-separated string.
-
-    """
-    if hasattr(data, 'dtype'):
-        return ",".join([desc[1] for desc in data.dtype.descr])
-
-    formats = ''
-    for obj in data:
-        obj = np.asarray(obj)
-        formats += _typestr[obj.dtype.type]
-        if issubclass(obj.dtype.type, ntypes.flexible):
-            formats += repr(obj.itemsize)
-        formats += ','
-    return formats[:-1]
-
-
 def _checknames(descr, names=None):
     """
     Checks that field names ``descr`` are not reserved keywords.
diff --git a/numpy/ma/tests/test_core.py b/numpy/ma/tests/test_core.py
index a08a0d956..aa7672daa 100644
--- a/numpy/ma/tests/test_core.py
+++ b/numpy/ma/tests/test_core.py
@@ -10,7 +10,6 @@ __author__ = "Pierre GF Gerard-Marchant"
 
 import sys
 import warnings
-import pickle
 import operator
 import itertools
 import textwrap
@@ -50,6 +49,7 @@ from numpy.ma.core import (
     ravel, repeat, reshape, resize, shape, sin, sinh, sometrue, sort, sqrt,
     subtract, sum, take, tan, tanh, transpose, where, zeros,
     )
+from numpy.core.numeric import pickle
 
 pi = np.pi
 
@@ -555,50 +555,55 @@ class TestMaskedArray(object):
                      True,                            # Fully masked
                      False)                           # Fully unmasked
 
-            for mask in masks:
-                a.mask = mask
-                a_pickled = pickle.loads(a.dumps())
-                assert_equal(a_pickled._mask, a._mask)
-                assert_equal(a_pickled._data, a._data)
-                if dtype in (object, int):
-                    assert_equal(a_pickled.fill_value, 999)
-                else:
-                    assert_equal(a_pickled.fill_value, dtype(999))
-                assert_array_equal(a_pickled.mask, mask)
+            for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+                for mask in masks:
+                    a.mask = mask
+                    a_pickled = pickle.loads(pickle.dumps(a, protocol=proto))
+                    assert_equal(a_pickled._mask, a._mask)
+                    assert_equal(a_pickled._data, a._data)
+                    if dtype in (object, int):
+                        assert_equal(a_pickled.fill_value, 999)
+                    else:
+                        assert_equal(a_pickled.fill_value, dtype(999))
+                    assert_array_equal(a_pickled.mask, mask)
 
     def test_pickling_subbaseclass(self):
         # Test pickling w/ a subclass of ndarray
         x = np.array([(1.0, 2), (3.0, 4)],
                      dtype=[('x', float), ('y', int)]).view(np.recarray)
         a = masked_array(x, mask=[(True, False), (False, True)])
-        a_pickled = pickle.loads(a.dumps())
-        assert_equal(a_pickled._mask, a._mask)
-        assert_equal(a_pickled, a)
-        assert_(isinstance(a_pickled._data, np.recarray))
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            a_pickled = pickle.loads(pickle.dumps(a, protocol=proto))
+            assert_equal(a_pickled._mask, a._mask)
+            assert_equal(a_pickled, a)
+            assert_(isinstance(a_pickled._data, np.recarray))
 
     def test_pickling_maskedconstant(self):
         # Test pickling MaskedConstant
         mc = np.ma.masked
-        mc_pickled = pickle.loads(mc.dumps())
-        assert_equal(mc_pickled._baseclass, mc._baseclass)
-        assert_equal(mc_pickled._mask, mc._mask)
-        assert_equal(mc_pickled._data, mc._data)
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            mc_pickled = pickle.loads(pickle.dumps(mc, protocol=proto))
+            assert_equal(mc_pickled._baseclass, mc._baseclass)
+            assert_equal(mc_pickled._mask, mc._mask)
+            assert_equal(mc_pickled._data, mc._data)
 
     def test_pickling_wstructured(self):
         # Tests pickling w/ structured array
         a = array([(1, 1.), (2, 2.)], mask=[(0, 0), (0, 1)],
                   dtype=[('a', int), ('b', float)])
-        a_pickled = pickle.loads(a.dumps())
-        assert_equal(a_pickled._mask, a._mask)
-        assert_equal(a_pickled, a)
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            a_pickled = pickle.loads(pickle.dumps(a, protocol=proto))
+            assert_equal(a_pickled._mask, a._mask)
+            assert_equal(a_pickled, a)
 
     def test_pickling_keepalignment(self):
         # Tests pickling w/ F_CONTIGUOUS arrays
         a = arange(10)
         a.shape = (-1, 2)
         b = a.T
-        test = pickle.loads(pickle.dumps(b))
-        assert_equal(test, b)
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            test = pickle.loads(pickle.dumps(b, protocol=proto))
+            assert_equal(test, b)
 
     def test_single_element_subscript(self):
         # Tests single element subscripts of Maskedarrays.
@@ -3806,12 +3811,8 @@ class TestMaskedArrayFunctions(object):
 
     def test_masked_where_shape_constraint(self):
         a = arange(10)
-        try:
-            test = masked_equal(1, a)
-        except IndexError:
-            pass
-        else:
-            raise AssertionError("Should have failed...")
+        with assert_raises(IndexError):
+            masked_equal(1, a)
         test = masked_equal(a, 1)
         assert_equal(test.mask, [0, 1, 0, 0, 0, 0, 0, 0, 0, 0])
 
@@ -4805,13 +4806,13 @@ class TestMaskedConstant(object):
 
     def test_pickle(self):
         from io import BytesIO
-        import pickle
 
-        with BytesIO() as f:
-            pickle.dump(np.ma.masked, f)
-            f.seek(0)
-            res = pickle.load(f)
-        assert_(res is np.ma.masked)
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            with BytesIO() as f:
+                pickle.dump(np.ma.masked, f, protocol=proto)
+                f.seek(0)
+                res = pickle.load(f)
+            assert_(res is np.ma.masked)
 
     def test_copy(self):
         # gh-9328
diff --git a/numpy/ma/tests/test_mrecords.py b/numpy/ma/tests/test_mrecords.py
index e08dc1326..8b9e3fbc9 100644
--- a/numpy/ma/tests/test_mrecords.py
+++ b/numpy/ma/tests/test_mrecords.py
@@ -8,7 +8,6 @@
 from __future__ import division, absolute_import, print_function
 
 import warnings
-import pickle
 
 import numpy as np
 import numpy.ma as ma
@@ -26,6 +25,7 @@ from numpy.ma.testutils import (
     assert_, assert_equal,
     assert_equal_records,
     )
+from numpy.core.numeric import pickle
 
 
 class TestMRecords(object):
@@ -288,12 +288,13 @@ class TestMRecords(object):
         # Test pickling
         base = self.base.copy()
         mrec = base.view(mrecarray)
-        _ = pickle.dumps(mrec)
-        mrec_ = pickle.loads(_)
-        assert_equal(mrec_.dtype, mrec.dtype)
-        assert_equal_records(mrec_._data, mrec._data)
-        assert_equal(mrec_._mask, mrec._mask)
-        assert_equal_records(mrec_._mask, mrec._mask)
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            _ = pickle.dumps(mrec, protocol=proto)
+            mrec_ = pickle.loads(_)
+            assert_equal(mrec_.dtype, mrec.dtype)
+            assert_equal_records(mrec_._data, mrec._data)
+            assert_equal(mrec_._mask, mrec._mask)
+            assert_equal_records(mrec_._mask, mrec._mask)
 
     def test_filled(self):
         # Test filling the array
diff --git a/numpy/ma/tests/test_old_ma.py b/numpy/ma/tests/test_old_ma.py
index d7b1e3c18..807121184 100644
--- a/numpy/ma/tests/test_old_ma.py
+++ b/numpy/ma/tests/test_old_ma.py
@@ -22,6 +22,7 @@ from numpy.ma import (
     repeat, resize, shape, sin, sinh, sometrue, sort, sqrt, subtract, sum,
     take, tan, tanh, transpose, where, zeros,
     )
+from numpy.core.numeric import pickle
 
 pi = np.pi
 
@@ -549,13 +550,13 @@ class TestMa(object):
 
     def test_testPickle(self):
         # Test of pickling
-        import pickle
         x = arange(12)
         x[4:10:2] = masked
         x = x.reshape(4, 3)
-        s = pickle.dumps(x)
-        y = pickle.loads(s)
-        assert_(eq(x, y))
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            s = pickle.dumps(x, protocol=proto)
+            y = pickle.loads(s)
+            assert_(eq(x, y))
 
     def test_testMasked(self):
         # Test of masked element
diff --git a/numpy/matrixlib/setup.py b/numpy/matrixlib/setup.py
index 8c383cece..d0981d658 100644
--- a/numpy/matrixlib/setup.py
+++ b/numpy/matrixlib/setup.py
@@ -1,8 +1,6 @@
 #!/usr/bin/env python
 from __future__ import division, print_function
 
-import os
-
 def configuration(parent_package='', top_path=None):
     from numpy.distutils.misc_util import Configuration
     config = Configuration('matrixlib', parent_package, top_path)
diff --git a/numpy/matrixlib/tests/test_defmatrix.py b/numpy/matrixlib/tests/test_defmatrix.py
index 272cd8d52..f8a8ad511 100644
--- a/numpy/matrixlib/tests/test_defmatrix.py
+++ b/numpy/matrixlib/tests/test_defmatrix.py
@@ -1,12 +1,6 @@
 from __future__ import division, absolute_import, print_function
 
-# As we are testing matrices, we ignore its PendingDeprecationWarnings
-try:
-    import pytest
-    pytestmark = pytest.mark.filterwarnings(
-        'ignore:the matrix subclass is not:PendingDeprecationWarning')
-except ImportError:
-    pass
+import pytest
 
 try:
     # Accessing collections abstract classes from collections
@@ -274,21 +268,13 @@ class TestAlgebra(object):
                     [3., 4.]])
 
         # __rpow__
-        try:
+        with assert_raises(TypeError):
             1.0**A
-        except TypeError:
-            pass
-        else:
-            self.fail("matrix.__rpow__ doesn't raise a TypeError")
 
         # __mul__ with something not a list, ndarray, tuple, or scalar
-        try:
+        with assert_raises(TypeError):
             A*object()
-        except TypeError:
-            pass
-        else:
-            self.fail("matrix.__mul__ with non-numeric object doesn't raise"
-                      "a TypeError")
+
 
 class TestMatrixReturn(object):
     def test_instance_methods(self):
diff --git a/numpy/matrixlib/tests/test_interaction.py b/numpy/matrixlib/tests/test_interaction.py
index fb4d8f98c..088ae3c6a 100644
--- a/numpy/matrixlib/tests/test_interaction.py
+++ b/numpy/matrixlib/tests/test_interaction.py
@@ -4,13 +4,7 @@ Note that tests with MaskedArray and linalg are done in separate files.
 """
 from __future__ import division, absolute_import, print_function
 
-# As we are testing matrices, we ignore its PendingDeprecationWarnings
-try:
-    import pytest
-    pytestmark = pytest.mark.filterwarnings(
-        'ignore:the matrix subclass is not:PendingDeprecationWarning')
-except ImportError:
-    pass
+import pytest
 
 import textwrap
 import warnings
diff --git a/numpy/matrixlib/tests/test_masked_matrix.py b/numpy/matrixlib/tests/test_masked_matrix.py
index adc2e5419..1751020db 100644
--- a/numpy/matrixlib/tests/test_masked_matrix.py
+++ b/numpy/matrixlib/tests/test_masked_matrix.py
@@ -1,14 +1,6 @@
 from __future__ import division, absolute_import, print_function
 
-# As we are testing matrices, we ignore its PendingDeprecationWarnings
-try:
-    import pytest
-    pytestmark = pytest.mark.filterwarnings(
-        'ignore:the matrix subclass is not:PendingDeprecationWarning')
-except ImportError:
-    pass
-
-import pickle
+import pytest
 
 import numpy as np
 from numpy.ma.testutils import (assert_, assert_equal, assert_raises,
@@ -17,6 +9,7 @@ from numpy.ma.core import (masked_array, masked_values, masked, allequal,
                            MaskType, getmask, MaskedArray, nomask,
                            log, add, hypot, divide)
 from numpy.ma.extras import mr_
+from numpy.core.numeric import pickle
 
 
 class MMatrix(MaskedArray, np.matrix,):
@@ -86,10 +79,11 @@ class TestMaskedMatrix(object):
     def test_pickling_subbaseclass(self):
         # Test pickling w/ a subclass of ndarray
         a = masked_array(np.matrix(list(range(10))), mask=[1, 0, 1, 0, 0] * 2)
-        a_pickled = pickle.loads(a.dumps())
-        assert_equal(a_pickled._mask, a._mask)
-        assert_equal(a_pickled, a)
-        assert_(isinstance(a_pickled._data, np.matrix))
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            a_pickled = pickle.loads(pickle.dumps(a, protocol=proto))
+            assert_equal(a_pickled._mask, a._mask)
+            assert_equal(a_pickled, a)
+            assert_(isinstance(a_pickled._data, np.matrix))
 
     def test_count_mean_with_matrix(self):
         m = masked_array(np.matrix([[1, 2], [3, 4]]), mask=np.zeros((2, 2)))
diff --git a/numpy/matrixlib/tests/test_matrix_linalg.py b/numpy/matrixlib/tests/test_matrix_linalg.py
index 85c7693b4..8d31ec5b0 100644
--- a/numpy/matrixlib/tests/test_matrix_linalg.py
+++ b/numpy/matrixlib/tests/test_matrix_linalg.py
@@ -1,13 +1,7 @@
 """ Test functions for linalg module using the matrix class."""
 from __future__ import division, absolute_import, print_function
 
-# As we are testing matrices, we ignore its PendingDeprecationWarnings
-try:
-    import pytest
-    pytestmark = pytest.mark.filterwarnings(
-        'ignore:the matrix subclass is not:PendingDeprecationWarning')
-except ImportError:
-    pass
+import pytest
 
 import numpy as np
 
diff --git a/numpy/matrixlib/tests/test_multiarray.py b/numpy/matrixlib/tests/test_multiarray.py
index 2f04b49d6..8de0a7c6a 100644
--- a/numpy/matrixlib/tests/test_multiarray.py
+++ b/numpy/matrixlib/tests/test_multiarray.py
@@ -1,12 +1,6 @@
 from __future__ import division, absolute_import, print_function
 
-# As we are testing matrices, we ignore its PendingDeprecationWarnings
-try:
-    import pytest
-    pytestmark = pytest.mark.filterwarnings(
-        'ignore:the matrix subclass is not:PendingDeprecationWarning')
-except ImportError:
-    pass
+import pytest
 
 import numpy as np
 from numpy.testing import assert_, assert_equal, assert_array_equal
diff --git a/numpy/matrixlib/tests/test_numeric.py b/numpy/matrixlib/tests/test_numeric.py
index cfdada126..e9f44e747 100644
--- a/numpy/matrixlib/tests/test_numeric.py
+++ b/numpy/matrixlib/tests/test_numeric.py
@@ -1,12 +1,6 @@
 from __future__ import division, absolute_import, print_function
 
-# As we are testing matrices, we ignore its PendingDeprecationWarnings
-try:
-    import pytest
-    pytestmark = pytest.mark.filterwarnings(
-        'ignore:the matrix subclass is not:PendingDeprecationWarning')
-except ImportError:
-    pass
+import pytest
 
 import numpy as np
 from numpy.testing import assert_equal
diff --git a/numpy/matrixlib/tests/test_regression.py b/numpy/matrixlib/tests/test_regression.py
index 439704ccf..88654c76a 100644
--- a/numpy/matrixlib/tests/test_regression.py
+++ b/numpy/matrixlib/tests/test_regression.py
@@ -1,12 +1,6 @@
 from __future__ import division, absolute_import, print_function
 
-# As we are testing matrices, we ignore its PendingDeprecationWarnings
-try:
-    import pytest
-    pytestmark = pytest.mark.filterwarnings(
-        'ignore:the matrix subclass is not:PendingDeprecationWarning')
-except ImportError:
-    pass
+import pytest
 
 import numpy as np
 from numpy.testing import assert_, assert_equal, assert_raises
diff --git a/numpy/polynomial/_polybase.py b/numpy/polynomial/_polybase.py
index 42db9bfd1..c28e77e69 100644
--- a/numpy/polynomial/_polybase.py
+++ b/numpy/polynomial/_polybase.py
@@ -317,13 +317,6 @@ class ABCPolyBase(object):
             )
             needs_parens = True
 
-        # filter out uninteresting coefficients
-        filtered_coeffs = [
-            (i, c)
-            for i, c in enumerate(self.coef)
-            # if not (c == 0)  # handle NaN
-        ]
-
         mute = r"\color{{LightGray}}{{{}}}".format
 
         parts = []
diff --git a/numpy/polynomial/chebyshev.py b/numpy/polynomial/chebyshev.py
index f14ed988d..f1ddc9b06 100644
--- a/numpy/polynomial/chebyshev.py
+++ b/numpy/polynomial/chebyshev.py
@@ -89,7 +89,6 @@ References
 """
 from __future__ import division, absolute_import, print_function
 
-import numbers
 import warnings
 import numpy as np
 import numpy.linalg as la
diff --git a/numpy/random/mtrand/mtrand.pyx b/numpy/random/mtrand/mtrand.pyx
index 5097ad88f..6b054a20f 100644
--- a/numpy/random/mtrand/mtrand.pyx
+++ b/numpy/random/mtrand/mtrand.pyx
@@ -4143,15 +4143,15 @@ cdef class RandomState:
         if op.shape == ():
             fp = PyFloat_AsDouble(p)
 
-            if fp < 0.0:
-                raise ValueError("p < 0.0")
+            if fp <= 0.0:
+                raise ValueError("p <= 0.0")
             if fp > 1.0:
                 raise ValueError("p > 1.0")
             return discd_array_sc(self.internal_state, rk_geometric, size, fp,
                                   self.lock)
 
-        if np.any(np.less(op, 0.0)):
-            raise ValueError("p < 0.0")
+        if np.any(np.less_equal(op, 0.0)):
+            raise ValueError("p <= 0.0")
         if np.any(np.greater(op, 1.0)):
             raise ValueError("p > 1.0")
         return discd_array(self.internal_state, rk_geometric, size, op,
@@ -4836,9 +4836,8 @@ cdef class RandomState:
                     self._shuffle_raw(n, sizeof(npy_intp), stride, x_ptr, buf_ptr)
                 else:
                     self._shuffle_raw(n, itemsize, stride, x_ptr, buf_ptr)
-        elif isinstance(x, np.ndarray) and x.ndim > 1 and x.size:
-            # Multidimensional ndarrays require a bounce buffer.
-            buf = np.empty_like(x[0])
+        elif isinstance(x, np.ndarray) and x.ndim and x.size:
+            buf = np.empty_like(x[0,...])
             with self.lock:
                 for i in reversed(range(1, n)):
                     j = rk_interval(i, self.internal_state)
@@ -4907,8 +4906,8 @@ cdef class RandomState:
     
         # shuffle has fast-path for 1-d
         if arr.ndim == 1:
-            # must return a copy
-            if arr is x:
+            # Return a copy if same memory
+            if np.may_share_memory(arr, x):
                 arr = np.array(arr)
             self.shuffle(arr)
             return arr
diff --git a/numpy/random/setup.py b/numpy/random/setup.py
index 3f3b773a4..394a70ead 100644
--- a/numpy/random/setup.py
+++ b/numpy/random/setup.py
@@ -1,7 +1,6 @@
 from __future__ import division, print_function
 
-from os.path import join, split, dirname
-import os
+from os.path import join
 import sys
 from distutils.dep_util import newer
 from distutils.msvccompiler import get_build_version as get_msvc_build_version
diff --git a/numpy/random/tests/test_random.py b/numpy/random/tests/test_random.py
index 8328c69c0..276517363 100644
--- a/numpy/random/tests/test_random.py
+++ b/numpy/random/tests/test_random.py
@@ -9,7 +9,6 @@ from numpy.testing import (
         )
 from numpy import random
 import sys
-import warnings
 
 
 class TestSeed(object):
@@ -467,6 +466,10 @@ class TestRandomDist(object):
                      lambda x: [(i, i) for i in x],
                      lambda x: np.asarray([[i, i] for i in x]),
                      lambda x: np.vstack([x, x]).T,
+                     # gh-11442
+                     lambda x: (np.asarray([(i, i) for i in x],
+                                           [("a", int), ("b", int)])
+                                .view(np.recarray)),
                      # gh-4270
                      lambda x: np.asarray([(i, i) for i in x],
                                           [("a", object, 1),
diff --git a/numpy/random/tests/test_regression.py b/numpy/random/tests/test_regression.py
index 3b4b4ed40..ca9bbbc71 100644
--- a/numpy/random/tests/test_regression.py
+++ b/numpy/random/tests/test_regression.py
@@ -133,3 +133,25 @@ class TestRegression(object):
         # Force Garbage Collection - should not segfault.
         import gc
         gc.collect()
+
+    def test_permutation_subclass(self):
+        class N(np.ndarray):
+            pass
+
+        np.random.seed(1)
+        orig = np.arange(3).view(N)
+        perm = np.random.permutation(orig)
+        assert_array_equal(perm, np.array([0, 2, 1]))
+        assert_array_equal(orig, np.arange(3).view(N))
+
+        class M(object):
+            a = np.arange(5)
+
+            def __array__(self):
+                return self.a
+
+        np.random.seed(1)
+        m = M()
+        perm = np.random.permutation(m)
+        assert_array_equal(perm, np.array([2, 1, 4, 0, 3]))
+        assert_array_equal(m.__array__(), np.arange(5))
diff --git a/numpy/testing/decorators.py b/numpy/testing/decorators.py
index 68c1554b5..bf78be500 100644
--- a/numpy/testing/decorators.py
+++ b/numpy/testing/decorators.py
@@ -8,8 +8,8 @@ from __future__ import division, absolute_import, print_function
 import warnings
 
 # 2018-04-04, numpy 1.15.0
-warnings.warn("Importing from numpy.testing.decorators is deprecated, "
-              "import from numpy.testing instead.",
+warnings.warn("Importing from numpy.testing.decorators is deprecated "
+              "since numpy 1.15.0, import from numpy.testing instead.",
               DeprecationWarning, stacklevel=2)
 
 from ._private.decorators import *
diff --git a/numpy/testing/noseclasses.py b/numpy/testing/noseclasses.py
index e0e728a32..5748a9a0f 100644
--- a/numpy/testing/noseclasses.py
+++ b/numpy/testing/noseclasses.py
@@ -7,8 +7,8 @@ from __future__ import division, absolute_import, print_function
 import warnings
 
 # 2018-04-04, numpy 1.15.0
-warnings.warn("Importing from numpy.testing.noseclasses is deprecated, "
-              "import from numpy.testing instead",
+warnings.warn("Importing from numpy.testing.noseclasses is deprecated "
+              "since 1.15.0, import from numpy.testing instead",
               DeprecationWarning, stacklevel=2)
 
 from ._private.noseclasses import *
diff --git a/numpy/testing/nosetester.py b/numpy/testing/nosetester.py
index c8c7d6e68..2ac212eee 100644
--- a/numpy/testing/nosetester.py
+++ b/numpy/testing/nosetester.py
@@ -8,8 +8,8 @@ from __future__ import division, absolute_import, print_function
 import warnings
 
 # 2018-04-04, numpy 1.15.0
-warnings.warn("Importing from numpy.testing.nosetester is deprecated, "
-              "import from numpy.testing instead.",
+warnings.warn("Importing from numpy.testing.nosetester is deprecated "
+              "since 1.15.0, import from numpy.testing instead.",
               DeprecationWarning, stacklevel=2)
 
 from ._private.nosetester import *
diff --git a/numpy/testing/tests/test_decorators.py b/numpy/testing/tests/test_decorators.py
index d00820b80..b8283d9de 100644
--- a/numpy/testing/tests/test_decorators.py
+++ b/numpy/testing/tests/test_decorators.py
@@ -29,7 +29,6 @@ class TestNoseDecorators(object):
         pass
 
     def test_slow(self):
-        import nose
         @dec.slow
         def slow_func(x, y, z):
             pass
diff --git a/numpy/testing/tests/test_utils.py b/numpy/testing/tests/test_utils.py
index 2c60e2867..e0d3414f7 100644
--- a/numpy/testing/tests/test_utils.py
+++ b/numpy/testing/tests/test_utils.py
@@ -25,12 +25,8 @@ class _GenericTest(object):
         self._assert_func(a, b)
 
     def _test_not_equal(self, a, b):
-        try:
+        with assert_raises(AssertionError):
             self._assert_func(a, b)
-        except AssertionError:
-            pass
-        else:
-            raise AssertionError("a and b are found equal but are not")
 
     def test_array_rank1_eq(self):
         """Test two equal array of rank 1 are found equal."""
@@ -1090,7 +1086,18 @@ class TestStringEqual(object):
 
 
 def assert_warn_len_equal(mod, n_in_context, py34=None, py37=None):
-    mod_warns = mod.__warningregistry__
+    try:
+        mod_warns = mod.__warningregistry__
+    except AttributeError:
+        # the lack of a __warningregistry__
+        # attribute means that no warning has
+        # occurred; this can be triggered in
+        # a parallel test scenario, while in
+        # a serial test scenario an initial
+        # warning (and therefore the attribute)
+        # are always created first
+        mod_warns = {}
+
     num_warns = len(mod_warns)
     # Python 3.4 appears to clear any pre-existing warnings of the same type,
     # when raising warnings inside a catch_warnings block. So, there is a
@@ -1112,6 +1119,33 @@ def assert_warn_len_equal(mod, n_in_context, py34=None, py37=None):
                 n_in_context = py34
     assert_equal(num_warns, n_in_context)
 
+def test_warn_len_equal_call_scenarios():
+    # assert_warn_len_equal is called under
+    # varying circumstances depending on serial
+    # vs. parallel test scenarios; this test
+    # simply aims to probe both code paths and
+    # check that no assertion is uncaught
+
+    # parallel scenario -- no warning issued yet
+    class mod(object):
+        pass
+
+    mod_inst = mod()
+
+    assert_warn_len_equal(mod=mod_inst,
+                          n_in_context=0)
+
+    # serial test scenario -- the __warningregistry__
+    # attribute should be present
+    class mod(object):
+        def __init__(self):
+            self.__warningregistry__ = {'warning1':1,
+                                        'warning2':2}
+
+    mod_inst = mod()
+    assert_warn_len_equal(mod=mod_inst,
+                          n_in_context=2)
+
 
 def _get_fresh_mod():
     # Get this module, with warning registry empty
diff --git a/numpy/testing/utils.py b/numpy/testing/utils.py
index 184adcc74..98f19e348 100644
--- a/numpy/testing/utils.py
+++ b/numpy/testing/utils.py
@@ -8,8 +8,8 @@ from __future__ import division, absolute_import, print_function
 import warnings
 
 # 2018-04-04, numpy 1.15.0
-warnings.warn("Importing from numpy.testing.utils is deprecated, "
-              "import from numpy.testing instead.",
+warnings.warn("Importing from numpy.testing.utils is deprecated "
+              "since 1.15.0, import from numpy.testing instead.",
               ImportWarning, stacklevel=2)
 
 from ._private.utils import *
diff --git a/numpy/tests/test_reloading.py b/numpy/tests/test_reloading.py
index cd42252e3..a073d691f 100644
--- a/numpy/tests/test_reloading.py
+++ b/numpy/tests/test_reloading.py
@@ -1,9 +1,9 @@
 from __future__ import division, absolute_import, print_function
 
 import sys
-import pickle
 
 from numpy.testing import assert_raises, assert_, assert_equal
+from numpy.core.numeric import pickle
 
 if sys.version_info[:2] >= (3, 4):
     from importlib import reload
@@ -32,5 +32,7 @@ def test_numpy_reloading():
 
 def test_novalue():
     import numpy as np
-    assert_equal(repr(np._NoValue), '<no value>')
-    assert_(pickle.loads(pickle.dumps(np._NoValue)) is np._NoValue)
+    for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+        assert_equal(repr(np._NoValue), '<no value>')
+        assert_(pickle.loads(pickle.dumps(np._NoValue,
+                                          protocol=proto)) is np._NoValue)
diff --git a/numpy/tests/test_scripts.py b/numpy/tests/test_scripts.py
index 26e3ea745..33210cc42 100644
--- a/numpy/tests/test_scripts.py
+++ b/numpy/tests/test_scripts.py
@@ -60,6 +60,7 @@ def run_command(cmd, check_code=True):
 
 
 @pytest.mark.skipif(is_inplace, reason="Cannot test f2py command inplace")
+@pytest.mark.xfail(reason="Test is unreliable")
 def test_f2py():
     # test that we can run f2py script