129 files changed, 2791 insertions, 1542 deletions
diff --git a/numpy/__init__.py b/numpy/__init__.py
index fef8245de..349914b2f 100644
--- a/numpy/__init__.py
+++ b/numpy/__init__.py
@@ -158,6 +158,7 @@ else:
 
     # Make these accessible from numpy name-space
     # but not imported in from numpy import *
+    # TODO[gh-6103]: Deprecate these
     if sys.version_info[0] >= 3:
         from builtins import bool, int, float, complex, object, str
         unicode = str
@@ -168,14 +169,17 @@ else:
     # now that numpy modules are imported, can initialize limits
     core.getlimits._register_known_types()
 
-    __all__.extend(['bool', 'int', 'float', 'complex', 'object', 'unicode',
-                    'str'])
     __all__.extend(['__version__', 'show_config'])
     __all__.extend(core.__all__)
     __all__.extend(_mat.__all__)
     __all__.extend(lib.__all__)
     __all__.extend(['linalg', 'fft', 'random', 'ctypeslib', 'ma'])
 
+    # These are added by `from .core import *` and `core.__all__`, but we
+    # overwrite them above with builtins we do _not_ want to export.
+    __all__.remove('long')
+    __all__.remove('unicode')
+
     # Remove things that are in the numpy.lib but not in the numpy namespace
     # Note that there is a test (numpy/tests/test_public_api.py:test_numpy_namespace)
     # that prevents adding more things to the main namespace by accident.
@@ -216,7 +220,7 @@ else:
                                      "{!r}".format(__name__, attr))
 
         def __dir__():
-            return __all__ + ['Tester', 'testing']
+            return list(globals().keys()) + ['Tester', 'testing']
 
     else:
         # We don't actually use this ourselves anymore, but I'm not 100% sure that
diff --git a/numpy/core/_add_newdocs.py b/numpy/core/_add_newdocs.py
index dbe3d226f..2f1273904 100644
--- a/numpy/core/_add_newdocs.py
+++ b/numpy/core/_add_newdocs.py
@@ -1036,7 +1036,12 @@ add_newdoc('numpy.core.multiarray', 'fromstring',
         A string containing the data.
     dtype : data-type, optional
         The data type of the array; default: float.  For binary input data,
-        the data must be in exactly this format.
+        the data must be in exactly this format. Most builtin numeric types are 
+        supported and extension types may be supported.
+
+        .. versionadded:: 1.18.0
+            Complex dtypes.
+
     count : int, optional
         Read this number of `dtype` elements from the data.  If this is
         negative (the default), the count will be determined from the
@@ -1172,6 +1177,11 @@ add_newdoc('numpy.core.multiarray', 'fromfile',
         Data type of the returned array.
         For binary files, it is used to determine the size and byte-order
         of the items in the file.
+        Most builtin numeric types are supported and extension types may be supported.
+
+        .. versionadded:: 1.18.0
+            Complex dtypes.
+
     count : int
         Number of items to read. ``-1`` means all items (i.e., the complete
         file).
@@ -1196,7 +1206,7 @@ add_newdoc('numpy.core.multiarray', 'fromfile',
     Notes
     -----
     Do not rely on the combination of `tofile` and `fromfile` for
-    data storage, as the binary files generated are are not platform
+    data storage, as the binary files generated are not platform
     independent.  In particular, no byte-order or data-type information is
     saved.  Data can be stored in the platform independent ``.npy`` format
     using `save` and `load` instead.
@@ -1326,9 +1336,9 @@ add_newdoc('numpy.core.multiarray', 'arange',
 
     See Also
     --------
-    linspace : Evenly spaced numbers with careful handling of endpoints.
-    ogrid: Arrays of evenly spaced numbers in N-dimensions.
-    mgrid: Grid-shaped arrays of evenly spaced numbers in N-dimensions.
+    numpy.linspace : Evenly spaced numbers with careful handling of endpoints.
+    numpy.ogrid: Arrays of evenly spaced numbers in N-dimensions.
+    numpy.mgrid: Grid-shaped arrays of evenly spaced numbers in N-dimensions.
 
     Examples
     --------
@@ -3706,10 +3716,10 @@ add_newdoc('numpy.core.multiarray', 'ndarray', ('sort',
     See Also
     --------
     numpy.sort : Return a sorted copy of an array.
-    argsort : Indirect sort.
-    lexsort : Indirect stable sort on multiple keys.
-    searchsorted : Find elements in sorted array.
-    partition: Partial sort.
+    numpy.argsort : Indirect sort.
+    numpy.lexsort : Indirect stable sort on multiple keys.
+    numpy.searchsorted : Find elements in sorted array.
+    numpy.partition: Partial sort.
 
     Notes
     -----
@@ -3943,15 +3953,22 @@ add_newdoc('numpy.core.multiarray', 'ndarray', ('tolist',
 
     Examples
     --------
-    For a 1D array, ``a.tolist()`` is almost the same as ``list(a)``:
+    For a 1D array, ``a.tolist()`` is almost the same as ``list(a)``, 
+    except that ``tolist`` changes numpy scalars to Python scalars:
 
-    >>> a = np.array([1, 2])
-    >>> list(a)
+    >>> a = np.uint32([1, 2])
+    >>> a_list = list(a)
+    >>> a_list
     [1, 2]
-    >>> a.tolist()
+    >>> type(a_list[0])
+    <class 'numpy.uint32'>
+    >>> a_tolist = a.tolist()
+    >>> a_tolist
     [1, 2]
+    >>> type(a_tolist[0])
+    <class 'int'>
 
-    However, for a 2D array, ``tolist`` applies recursively:
+    Additionally, for a 2D array, ``tolist`` applies recursively:
 
     >>> a = np.array([[1, 2], [3, 4]])
     >>> list(a)
@@ -4236,7 +4253,7 @@ add_newdoc('numpy.core.umath', 'frompyfunc',
 
     See Also
     --------
-    vectorize : evaluates pyfunc over input arrays using broadcasting rules of numpy
+    vectorize : Evaluates pyfunc over input arrays using broadcasting rules of numpy.
 
     Notes
     -----
@@ -4497,7 +4514,7 @@ add_newdoc('numpy.core', 'ufunc',
         Alternate array object(s) in which to put the result; if provided, it
         must have a shape that the inputs broadcast to. A tuple of arrays
         (possible only as a keyword argument) must have length equal to the
-        number of outputs; use `None` for uninitialized outputs to be
+        number of outputs; use None for uninitialized outputs to be
         allocated by the ufunc.
     where : array_like, optional
         This condition is broadcast over the input. At locations where the
@@ -4691,7 +4708,7 @@ add_newdoc('numpy.core', 'ufunc', ('signature',
     -----
     Generalized ufuncs are used internally in many linalg functions, and in
     the testing suite; the examples below are taken from these.
-    For ufuncs that operate on scalars, the signature is `None`, which is
+    For ufuncs that operate on scalars, the signature is None, which is
     equivalent to '()' for every argument.
 
     Examples
@@ -4742,7 +4759,7 @@ add_newdoc('numpy.core', 'ufunc', ('reduce',
 
         .. versionadded:: 1.7.0
 
-        If this is `None`, a reduction is performed over all the axes.
+        If this is None, a reduction is performed over all the axes.
         If this is a tuple of ints, a reduction is performed on multiple
         axes, instead of a single axis or all the axes as before.
 
@@ -4755,7 +4772,7 @@ add_newdoc('numpy.core', 'ufunc', ('reduce',
         to the data-type of the output array if this is provided, or
         the data-type of the input array if no output array is provided.
     out : ndarray, None, or tuple of ndarray and None, optional
-        A location into which the result is stored. If not provided or `None`,
+        A location into which the result is stored. If not provided or None,
         a freshly-allocated array is returned. For consistency with
         ``ufunc.__call__``, if given as a keyword, this may be wrapped in a
         1-element tuple.
@@ -4872,7 +4889,7 @@ add_newdoc('numpy.core', 'ufunc', ('accumulate',
         to the data-type of the output array if such is provided, or the
         the data-type of the input array if no output array is provided.
     out : ndarray, None, or tuple of ndarray and None, optional
-        A location into which the result is stored. If not provided or `None`,
+        A location into which the result is stored. If not provided or None,
         a freshly-allocated array is returned. For consistency with
         ``ufunc.__call__``, if given as a keyword, this may be wrapped in a
         1-element tuple.
@@ -4954,7 +4971,7 @@ add_newdoc('numpy.core', 'ufunc', ('reduceat',
         to the data type of the output array if this is provided, or
         the data type of the input array if no output array is provided.
     out : ndarray, None, or tuple of ndarray and None, optional
-        A location into which the result is stored. If not provided or `None`,
+        A location into which the result is stored. If not provided or None,
         a freshly-allocated array is returned. For consistency with
         ``ufunc.__call__``, if given as a keyword, this may be wrapped in a
         1-element tuple.
@@ -5327,7 +5344,8 @@ add_newdoc('numpy.core.multiarray', 'dtype', ('descr',
     `__array_interface__` attribute.
 
     Warning: This attribute exists specifically for `__array_interface__`,
-    and is not a datatype description compatible with `np.dtype`.
+    and passing it directly to `np.dtype` will not accurately reconstruct
+    some dtypes (e.g., scalar and subarray dtypes).
 
     Examples
     --------
diff --git a/numpy/core/_internal.py b/numpy/core/_internal.py
index 5fd643505..05e401e0b 100644
--- a/numpy/core/_internal.py
+++ b/numpy/core/_internal.py
@@ -313,7 +313,7 @@ class _ctypes(object):
         crashing. User Beware! The value of this attribute is exactly the same
         as ``self._array_interface_['data'][0]``.
 
-        Note that unlike `data_as`, a reference will not be kept to the array:
+        Note that unlike ``data_as``, a reference will not be kept to the array:
         code like ``ctypes.c_void_p((a + b).ctypes.data)`` will result in a
         pointer to a deallocated array, and should be spelt
         ``(a + b).ctypes.data_as(ctypes.c_void_p)``
diff --git a/numpy/core/arrayprint.py b/numpy/core/arrayprint.py
index 8a7626d9d..401018015 100644
--- a/numpy/core/arrayprint.py
+++ b/numpy/core/arrayprint.py
@@ -111,7 +111,7 @@ def set_printoptions(precision=None, threshold=None, edgeitems=None,
     ----------
     precision : int or None, optional
         Number of digits of precision for floating point output (default 8).
-        May be `None` if `floatmode` is not `fixed`, to print as many digits as
+        May be None if `floatmode` is not `fixed`, to print as many digits as
         necessary to uniquely specify the value.
     threshold : int, optional
         Total number of array elements which trigger summarization
diff --git a/numpy/core/code_generators/genapi.py b/numpy/core/code_generators/genapi.py
index 7336e5e13..22afa0320 100644
--- a/numpy/core/code_generators/genapi.py
+++ b/numpy/core/code_generators/genapi.py
@@ -8,8 +8,11 @@ specified.
 """
 from __future__ import division, absolute_import, print_function
 
+from numpy.distutils.conv_template import process_file as process_c_file
+
 import sys, os, re
 import hashlib
+import io
 
 import textwrap
 
@@ -215,7 +218,10 @@ def find_functions(filename, tag='API'):
           This function does foo...
          */
     """
-    fo = open(filename, 'r')
+    if filename.endswith(('.c.src', '.h.src')):
+        fo = io.StringIO(process_c_file(filename))
+    else:
+        fo = open(filename, 'r')
     functions = []
     return_type = None
     function_name = None
diff --git a/numpy/core/code_generators/generate_umath.py b/numpy/core/code_generators/generate_umath.py
index 0d3bbffe9..6d76f7ca2 100644
--- a/numpy/core/code_generators/generate_umath.py
+++ b/numpy/core/code_generators/generate_umath.py
@@ -226,7 +226,9 @@ chartoname = {
     'P': 'OBJECT',
 }
 
-all = '?bBhHiIlLqQefdgFDGOMm'
+noobj = '?bBhHiIlLqQefdgFDGmM'
+all = '?bBhHiIlLqQefdgFDGOmM'
+
 O = 'O'
 P = 'P'
 ints = 'bBhHiIlLqQ'
@@ -246,10 +248,8 @@ inexactvec = 'fd'
 noint = inexact+O
 nointP = inexact+P
 allP = bints+times+flts+cmplxP
-nobool = all[1:]
-noobj = all[:-3]+all[-2:]
-nobool_or_obj = all[1:-3]+all[-2:]
-nobool_or_datetime = all[1:-2]+all[-1:]
+nobool_or_obj = noobj[1:]
+nobool_or_datetime = noobj[1:-1] + O # includes m - timedelta64
 intflt = ints+flts
 intfltcmplx = ints+flts+cmplx
 nocmplx = bints+times+flts
@@ -287,7 +287,7 @@ defdict = {
     Ufunc(2, 1, None, # Zero is only a unit to the right, not the left
           docstrings.get('numpy.core.umath.subtract'),
           'PyUFunc_SubtractionTypeResolver',
-          TD(notimes_or_obj, simd=[('avx2', ints)]),
+          TD(ints + inexact, simd=[('avx2', ints)]),
           [TypeDescription('M', FullTypeDescr, 'Mm', 'M'),
            TypeDescription('m', FullTypeDescr, 'mm', 'm'),
            TypeDescription('M', FullTypeDescr, 'MM', 'm'),
@@ -409,7 +409,7 @@ defdict = {
     Ufunc(1, 1, None,
           docstrings.get('numpy.core.umath.negative'),
           'PyUFunc_NegativeTypeResolver',
-          TD(bints+flts+timedeltaonly, simd=[('avx2', ints)]),
+          TD(ints+flts+timedeltaonly, simd=[('avx2', ints)]),
           TD(cmplx, f='neg'),
           TD(O, f='PyNumber_Negative'),
           ),
@@ -433,6 +433,7 @@ defdict = {
           'PyUFunc_SimpleBinaryComparisonTypeResolver',
           TD(all, out='?', simd=[('avx2', ints)]),
           [TypeDescription('O', FullTypeDescr, 'OO', 'O')],
+          TD('O', out='?'),
           ),
 'greater_equal':
     Ufunc(2, 1, None,
@@ -440,6 +441,7 @@ defdict = {
           'PyUFunc_SimpleBinaryComparisonTypeResolver',
           TD(all, out='?', simd=[('avx2', ints)]),
           [TypeDescription('O', FullTypeDescr, 'OO', 'O')],
+          TD('O', out='?'),
           ),
 'less':
     Ufunc(2, 1, None,
@@ -447,6 +449,7 @@ defdict = {
           'PyUFunc_SimpleBinaryComparisonTypeResolver',
           TD(all, out='?', simd=[('avx2', ints)]),
           [TypeDescription('O', FullTypeDescr, 'OO', 'O')],
+          TD('O', out='?'),
           ),
 'less_equal':
     Ufunc(2, 1, None,
@@ -454,6 +457,7 @@ defdict = {
           'PyUFunc_SimpleBinaryComparisonTypeResolver',
           TD(all, out='?', simd=[('avx2', ints)]),
           [TypeDescription('O', FullTypeDescr, 'OO', 'O')],
+          TD('O', out='?'),
           ),
 'equal':
     Ufunc(2, 1, None,
@@ -461,6 +465,7 @@ defdict = {
           'PyUFunc_SimpleBinaryComparisonTypeResolver',
           TD(all, out='?', simd=[('avx2', ints)]),
           [TypeDescription('O', FullTypeDescr, 'OO', 'O')],
+          TD('O', out='?'),
           ),
 'not_equal':
     Ufunc(2, 1, None,
@@ -468,6 +473,7 @@ defdict = {
           'PyUFunc_SimpleBinaryComparisonTypeResolver',
           TD(all, out='?', simd=[('avx2', ints)]),
           [TypeDescription('O', FullTypeDescr, 'OO', 'O')],
+          TD('O', out='?'),
           ),
 'logical_and':
     Ufunc(2, 1, True_,
@@ -475,6 +481,7 @@ defdict = {
           'PyUFunc_SimpleBinaryComparisonTypeResolver',
           TD(nodatetime_or_obj, out='?', simd=[('avx2', ints)]),
           TD(O, f='npy_ObjectLogicalAnd'),
+          TD(O, f='npy_ObjectLogicalAnd', out='?'),
           ),
 'logical_not':
     Ufunc(1, 1, None,
@@ -482,6 +489,7 @@ defdict = {
           None,
           TD(nodatetime_or_obj, out='?', simd=[('avx2', ints)]),
           TD(O, f='npy_ObjectLogicalNot'),
+          TD(O, f='npy_ObjectLogicalNot', out='?'),
           ),
 'logical_or':
     Ufunc(2, 1, False_,
@@ -489,6 +497,7 @@ defdict = {
           'PyUFunc_SimpleBinaryComparisonTypeResolver',
           TD(nodatetime_or_obj, out='?', simd=[('avx2', ints)]),
           TD(O, f='npy_ObjectLogicalOr'),
+          TD(O, f='npy_ObjectLogicalOr', out='?'),
           ),
 'logical_xor':
     Ufunc(2, 1, False_,
@@ -849,8 +858,8 @@ defdict = {
 'isnan':
     Ufunc(1, 1, None,
           docstrings.get('numpy.core.umath.isnan'),
-          None,
-          TD(nodatetime_or_obj, out='?'),
+          'PyUFunc_IsFiniteTypeResolver',
+          TD(noobj, out='?'),
           ),
 'isnat':
     Ufunc(1, 1, None,
@@ -861,8 +870,8 @@ defdict = {
 'isinf':
     Ufunc(1, 1, None,
           docstrings.get('numpy.core.umath.isinf'),
-          None,
-          TD(nodatetime_or_obj, out='?'),
+          'PyUFunc_IsFiniteTypeResolver',
+          TD(noobj, out='?'),
           ),
 'isfinite':
     Ufunc(1, 1, None,
diff --git a/numpy/core/code_generators/ufunc_docstrings.py b/numpy/core/code_generators/ufunc_docstrings.py
index 1ac477b54..4dec73505 100644
--- a/numpy/core/code_generators/ufunc_docstrings.py
+++ b/numpy/core/code_generators/ufunc_docstrings.py
@@ -22,7 +22,7 @@ subst = {
     'PARAMS': textwrap.dedent("""
         out : ndarray, None, or tuple of ndarray and None, optional
             A location into which the result is stored. If provided, it must have
-            a shape that the inputs broadcast to. If not provided or `None`,
+            a shape that the inputs broadcast to. If not provided or None,
             a freshly-allocated array is returned. A tuple (possible only as a
             keyword argument) must have length equal to the number of outputs.
         where : array_like, optional
@@ -2596,7 +2596,7 @@ add_newdoc('numpy.core.umath', 'matmul',
     out : ndarray, optional
         A location into which the result is stored. If provided, it must have
         a shape that matches the signature `(n,k),(k,m)->(n,m)`. If not
-        provided or `None`, a freshly-allocated array is returned.
+        provided or None, a freshly-allocated array is returned.
     **kwargs
         For other keyword-only arguments, see the
         :ref:`ufunc docs <ufuncs.kwargs>`.
diff --git a/numpy/core/defchararray.py b/numpy/core/defchararray.py
index a941c5b81..2d89d6fe0 100644
--- a/numpy/core/defchararray.py
+++ b/numpy/core/defchararray.py
@@ -82,7 +82,7 @@ def _clean_args(*args):
 
     Many of the Python string operations that have optional arguments
     do not use 'None' to indicate a default value.  In these cases,
-    we need to remove all `None` arguments, and those following them.
+    we need to remove all None arguments, and those following them.
     """
     newargs = []
     for chk in args:
@@ -1333,7 +1333,7 @@ def rsplit(a, sep=None, maxsplit=None):
     a : array_like of str or unicode
 
     sep : str or unicode, optional
-        If `sep` is not specified or `None`, any whitespace string
+        If `sep` is not specified or None, any whitespace string
         is a separator.
     maxsplit : int, optional
         If `maxsplit` is given, at most `maxsplit` splits are done,
@@ -1417,7 +1417,7 @@ def split(a, sep=None, maxsplit=None):
     a : array_like of str or unicode
 
     sep : str or unicode, optional
-       If `sep` is not specified or `None`, any whitespace string is a
+       If `sep` is not specified or None, any whitespace string is a
        separator.
 
     maxsplit : int, optional
@@ -2659,7 +2659,7 @@ def array(obj, itemsize=None, copy=True, unicode=None, order=None):
     unicode : bool, optional
         When true, the resulting `chararray` can contain Unicode
         characters, when false only 8-bit characters.  If unicode is
-        `None` and `obj` is one of the following:
+        None and `obj` is one of the following:
 
           - a `chararray`,
           - an ndarray of type `str` or `unicode`
@@ -2799,7 +2799,7 @@ def asarray(obj, itemsize=None, unicode=None, order=None):
     unicode : bool, optional
         When true, the resulting `chararray` can contain Unicode
         characters, when false only 8-bit characters.  If unicode is
-        `None` and `obj` is one of the following:
+        None and `obj` is one of the following:
 
           - a `chararray`,
           - an ndarray of type `str` or 'unicode`
diff --git a/numpy/core/fromnumeric.py b/numpy/core/fromnumeric.py
index 6c0b9cde9..6e5f3dabf 100644
--- a/numpy/core/fromnumeric.py
+++ b/numpy/core/fromnumeric.py
@@ -796,7 +796,9 @@ def argpartition(a, kth, axis=-1, kind='introselect', order=None):
     --------
     partition : Describes partition algorithms used.
     ndarray.partition : Inplace partition.
-    argsort : Full indirect sort
+    argsort : Full indirect sort.
+    take_along_axis : Apply ``index_array`` from argpartition 
+                      to an array as if by calling partition.
 
     Notes
     -----
@@ -816,6 +818,14 @@ def argpartition(a, kth, axis=-1, kind='introselect', order=None):
     >>> np.array(x)[np.argpartition(x, 3)]
     array([2, 1, 3, 4])
 
+    Multi-dimensional array:
+
+    >>> x = np.array([[3, 4, 2], [1, 3, 1]])
+    >>> index_array = np.argpartition(x, kth=1, axis=-1)
+    >>> np.take_along_axis(x, index_array, axis=-1)  # same as np.partition(x, kth=1)
+    array([[2, 3, 4],
+           [1, 1, 3]])
+
     """
     return _wrapfunc(a, 'argpartition', kth, axis=axis, kind=kind, order=order)
 
@@ -1025,6 +1035,8 @@ def argsort(a, axis=-1, kind=None, order=None):
     lexsort : Indirect stable sort with multiple keys.
     ndarray.sort : Inplace sort.
     argpartition : Indirect partial sort.
+    take_along_axis : Apply ``index_array`` from argsort 
+                      to an array as if by calling sort.
 
     Notes
     -----
@@ -1120,6 +1132,8 @@ def argmax(a, axis=None, out=None):
     ndarray.argmax, argmin
     amax : The maximum value along a given axis.
     unravel_index : Convert a flat index into an index tuple.
+    take_along_axis : Apply ``np.expand_dims(index_array, axis)`` 
+                      from argmax to an array as if by calling max.
 
     Notes
     -----
@@ -1154,6 +1168,16 @@ def argmax(a, axis=None, out=None):
     >>> np.argmax(b)  # Only the first occurrence is returned.
     1
 
+    >>> x = np.array([[4,2,3], [1,0,3]])
+    >>> index_array = np.argmax(x, axis=-1)
+    >>> # Same as np.max(x, axis=-1, keepdims=True)
+    >>> np.take_along_axis(x, np.expand_dims(index_array, axis=-1), axis=-1)
+    array([[4],
+           [3]])
+    >>> # Same as np.max(x, axis=-1)
+    >>> np.take_along_axis(x, np.expand_dims(index_array, axis=-1), axis=-1).squeeze(axis=-1)
+    array([4, 3])
+
     """
     return _wrapfunc(a, 'argmax', axis=axis, out=out)
 
@@ -1189,6 +1213,8 @@ def argmin(a, axis=None, out=None):
     ndarray.argmin, argmax
     amin : The minimum value along a given axis.
     unravel_index : Convert a flat index into an index tuple.
+    take_along_axis : Apply ``np.expand_dims(index_array, axis)`` 
+                      from argmin to an array as if by calling min.
 
     Notes
     -----
@@ -1223,6 +1249,16 @@ def argmin(a, axis=None, out=None):
     >>> np.argmin(b)  # Only the first occurrence is returned.
     0
 
+    >>> x = np.array([[4,2,3], [1,0,3]])
+    >>> index_array = np.argmin(x, axis=-1)
+    >>> # Same as np.min(x, axis=-1, keepdims=True)
+    >>> np.take_along_axis(x, np.expand_dims(index_array, axis=-1), axis=-1)
+    array([[2],
+           [0]])
+    >>> # Same as np.max(x, axis=-1)
+    >>> np.take_along_axis(x, np.expand_dims(index_array, axis=-1), axis=-1).squeeze(axis=-1)
+    array([2, 0])
+
     """
     return _wrapfunc(a, 'argmin', axis=axis, out=out)
 
@@ -1409,7 +1445,7 @@ def squeeze(a, axis=None):
     Raises
     ------
     ValueError
-        If `axis` is not `None`, and an axis being squeezed is not of length 1
+        If `axis` is not None, and an axis being squeezed is not of length 1
 
     See Also
     --------
@@ -1945,7 +1981,7 @@ def compress(condition, a, axis=None, out=None):
     take, choose, diag, diagonal, select
     ndarray.compress : Equivalent method in ndarray
     np.extract: Equivalent method when working on 1-D arrays
-    numpy.doc.ufuncs : Section "Output arguments"
+    ufuncs-output-type
 
     Examples
     --------
@@ -1995,14 +2031,14 @@ def clip(a, a_min, a_max, out=None, **kwargs):
     ----------
     a : array_like
         Array containing elements to clip.
-    a_min : scalar or array_like or `None`
-        Minimum value. If `None`, clipping is not performed on lower
+    a_min : scalar or array_like or None
+        Minimum value. If None, clipping is not performed on lower
         interval edge. Not more than one of `a_min` and `a_max` may be
-        `None`.
-    a_max : scalar or array_like or `None`
-        Maximum value. If `None`, clipping is not performed on upper
+        None.
+    a_max : scalar or array_like or None
+        Maximum value. If None, clipping is not performed on upper
         interval edge. Not more than one of `a_min` and `a_max` may be
-        `None`. If `a_min` or `a_max` are array_like, then the three
+        None. If `a_min` or `a_max` are array_like, then the three
         arrays will be broadcasted to match their shapes.
     out : ndarray, optional
         The results will be placed in this array. It may be the input
@@ -2023,7 +2059,7 @@ def clip(a, a_min, a_max, out=None, **kwargs):
 
     See Also
     --------
-    numpy.doc.ufuncs : Section "Output arguments"
+    ufuncs-output-type
 
     Examples
     --------
@@ -2206,7 +2242,7 @@ def any(a, axis=None, out=None, keepdims=np._NoValue):
         Input array or object that can be converted to an array.
     axis : None or int or tuple of ints, optional
         Axis or axes along which a logical OR reduction is performed.
-        The default (`axis` = `None`) is to perform a logical OR over all
+        The default (``axis=None``) is to perform a logical OR over all
         the dimensions of the input array. `axis` may be negative, in
         which case it counts from the last to the first axis.
 
@@ -2219,7 +2255,7 @@ def any(a, axis=None, out=None, keepdims=np._NoValue):
         the same shape as the expected output and its type is preserved
         (e.g., if it is of type float, then it will remain so, returning
         1.0 for True and 0.0 for False, regardless of the type of `a`).
-        See `doc.ufuncs` (Section "Output arguments") for details.
+        See `ufuncs-output-type` for more details.
 
     keepdims : bool, optional
         If this is set to True, the axes which are reduced are left
@@ -2292,7 +2328,7 @@ def all(a, axis=None, out=None, keepdims=np._NoValue):
         Input array or object that can be converted to an array.
     axis : None or int or tuple of ints, optional
         Axis or axes along which a logical AND reduction is performed.
-        The default (`axis` = `None`) is to perform a logical AND over all
+        The default (``axis=None``) is to perform a logical AND over all
         the dimensions of the input array. `axis` may be negative, in
         which case it counts from the last to the first axis.
 
@@ -2304,8 +2340,8 @@ def all(a, axis=None, out=None, keepdims=np._NoValue):
         Alternate output array in which to place the result.
         It must have the same shape as the expected output and its
         type is preserved (e.g., if ``dtype(out)`` is float, the result
-        will consist of 0.0's and 1.0's).  See `doc.ufuncs` (Section
-        "Output arguments") for more details.
+        will consist of 0.0's and 1.0's). See `ufuncs-output-type` for more
+        details.
 
     keepdims : bool, optional
         If this is set to True, the axes which are reduced are left
@@ -2383,8 +2419,8 @@ def cumsum(a, axis=None, dtype=None, out=None):
     out : ndarray, optional
         Alternative output array in which to place the result. It must
         have the same shape and buffer length as the expected output
-        but the type will be cast if necessary. See `doc.ufuncs`
-        (Section "Output arguments") for more details.
+        but the type will be cast if necessary. See `ufuncs-output-type` for
+        more details.
 
     Returns
     -------
@@ -2529,7 +2565,7 @@ def amax(a, axis=None, out=None, keepdims=np._NoValue, initial=np._NoValue,
     out : ndarray, optional
         Alternative output array in which to place the result.  Must
         be of the same shape and buffer length as the expected output.
-        See `doc.ufuncs` (Section "Output arguments") for more details.
+        See `ufuncs-output-type` for more details.
 
     keepdims : bool, optional
         If this is set to True, the axes which are reduced are left
@@ -2654,7 +2690,7 @@ def amin(a, axis=None, out=None, keepdims=np._NoValue, initial=np._NoValue,
     out : ndarray, optional
         Alternative output array in which to place the result.  Must
         be of the same shape and buffer length as the expected output.
-        See `doc.ufuncs` (Section "Output arguments") for more details.
+        See `ufuncs-output-type` for more details.
 
     keepdims : bool, optional
         If this is set to True, the axes which are reduced are left
@@ -2861,7 +2897,7 @@ def prod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue,
     See Also
     --------
     ndarray.prod : equivalent method
-    numpy.doc.ufuncs : Section "Output arguments"
+    ufuncs-output-type
 
     Notes
     -----
@@ -2957,7 +2993,7 @@ def cumprod(a, axis=None, dtype=None, out=None):
 
     See Also
     --------
-    numpy.doc.ufuncs : Section "Output arguments"
+    ufuncs-output-type
 
     Notes
     -----
@@ -3103,8 +3139,8 @@ def around(a, decimals=0, out=None):
     out : ndarray, optional
         Alternative output array in which to place the result. It must have
         the same shape as the expected output, but the type of the output
-        values will be cast if necessary. See `doc.ufuncs` (Section
-        "Output arguments") for details.
+        values will be cast if necessary. See `ufuncs-output-type` for more
+        details.
 
     Returns
     -------
@@ -3218,7 +3254,7 @@ def mean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
         Alternate output array in which to place the result.  The default
         is ``None``; if provided, it must have the same shape as the
         expected output, but the type will be cast if necessary.
-        See `doc.ufuncs` for details.
+        See `ufuncs-output-type` for more details.
 
     keepdims : bool, optional
         If this is set to True, the axes which are reduced are left
@@ -3353,7 +3389,7 @@ def std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
     See Also
     --------
     var, mean, nanmean, nanstd, nanvar
-    numpy.doc.ufuncs : Section "Output arguments"
+    ufuncs-output-type
 
     Notes
     -----
@@ -3478,7 +3514,7 @@ def var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
     See Also
     --------
     std, mean, nanmean, nanstd, nanvar
-    numpy.doc.ufuncs : Section "Output arguments"
+    ufuncs-output-type
 
     Notes
     -----
diff --git a/numpy/core/function_base.py b/numpy/core/function_base.py
index 42604ec3f..538ac8b84 100644
--- a/numpy/core/function_base.py
+++ b/numpy/core/function_base.py
@@ -139,7 +139,7 @@ def linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None,
     # from overriding what class is produced, and thus prevents, e.g. use of Quantities,
     # see gh-7142. Hence, we multiply in place only for standard scalar types.
     _mult_inplace = _nx.isscalar(delta)
-    if num > 1:
+    if div > 0:
         step = delta / div
         if _nx.any(step == 0):
             # Special handling for denormal numbers, gh-5437
@@ -154,7 +154,8 @@ def linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None,
             else:
                 y = y * step
     else:
-        # 0 and 1 item long sequences have an undefined step
+        # sequences with 0 items or 1 item with endpoint=True (i.e. div <= 0)
+        # have an undefined step
         step = NaN
         # Multiply with delta to allow possible override of output class.
         y = y * delta
diff --git a/numpy/random/src/bitgen.h b/numpy/core/include/numpy/random/bitgen.h
index 0adaaf2ee..83c2858dd 100644
--- a/numpy/random/src/bitgen.h
+++ b/numpy/core/include/numpy/random/bitgen.h
@@ -6,7 +6,7 @@
 #include <stdbool.h>
 #include <stdint.h>
 
-/* Must match the declaration in numpy/random/common.pxd */
+/* Must match the declaration in numpy/random/<any>.pxd */
 
 typedef struct bitgen {
   void *state;
diff --git a/numpy/random/src/distributions/distributions.h b/numpy/core/include/numpy/random/distributions.h
index 2a6b2a045..e489e69b8 100644
--- a/numpy/random/src/distributions/distributions.h
+++ b/numpy/core/include/numpy/random/distributions.h
@@ -8,7 +8,7 @@
 #include <stdint.h>
 
 #include "numpy/npy_math.h"
-#include "src/bitgen.h"
+#include "numpy/random/bitgen.h"
 
 /*
  * RAND_INT_TYPE is used to share integer generators with RandomState which
@@ -59,28 +59,10 @@ typedef struct s_binomial_t {
   double p4;
 } binomial_t;
 
-/* Inline generators for internal use */
-static NPY_INLINE uint32_t next_uint32(bitgen_t *bitgen_state) {
-  return bitgen_state->next_uint32(bitgen_state->state);
-}
-
-static NPY_INLINE uint64_t next_uint64(bitgen_t *bitgen_state) {
-  return bitgen_state->next_uint64(bitgen_state->state);
-}
-
-static NPY_INLINE float next_float(bitgen_t *bitgen_state) {
-  return (next_uint32(bitgen_state) >> 9) * (1.0f / 8388608.0f);
-}
-
-static NPY_INLINE double next_double(bitgen_t *bitgen_state) {
-  return bitgen_state->next_double(bitgen_state->state);
-}
-
-DECLDIR double loggam(double x);
-
-DECLDIR float random_float(bitgen_t *bitgen_state);
-DECLDIR double random_double(bitgen_t *bitgen_state);
-DECLDIR void random_double_fill(bitgen_t *bitgen_state, npy_intp cnt, double *out);
+DECLDIR float random_standard_uniform_f(bitgen_t *bitgen_state);
+DECLDIR double random_standard_uniform(bitgen_t *bitgen_state);
+DECLDIR void random_standard_uniform_fill(bitgen_t *, npy_intp, double *);
+DECLDIR void random_standard_uniform_fill_f(bitgen_t *, npy_intp, float *);
 
 DECLDIR int64_t random_positive_int64(bitgen_t *bitgen_state);
 DECLDIR int32_t random_positive_int32(bitgen_t *bitgen_state);
@@ -88,37 +70,25 @@ DECLDIR int64_t random_positive_int(bitgen_t *bitgen_state);
 DECLDIR uint64_t random_uint(bitgen_t *bitgen_state);
 
 DECLDIR double random_standard_exponential(bitgen_t *bitgen_state);
-DECLDIR void random_standard_exponential_fill(bitgen_t *bitgen_state, npy_intp cnt,
-                                              double *out);
 DECLDIR float random_standard_exponential_f(bitgen_t *bitgen_state);
 DECLDIR double random_standard_exponential_zig(bitgen_t *bitgen_state);
-DECLDIR void random_standard_exponential_zig_fill(bitgen_t *bitgen_state,
-                                                  npy_intp cnt, double *out);
 DECLDIR float random_standard_exponential_zig_f(bitgen_t *bitgen_state);
-
-/*
-DECLDIR double random_gauss(bitgen_t *bitgen_state);
-DECLDIR float random_gauss_f(bitgen_t *bitgen_state);
-*/
-DECLDIR double random_gauss_zig(bitgen_t *bitgen_state);
-DECLDIR float random_gauss_zig_f(bitgen_t *bitgen_state);
-DECLDIR void random_gauss_zig_fill(bitgen_t *bitgen_state, npy_intp cnt,
-                                   double *out);
-
-/*
+DECLDIR void random_standard_exponential_fill(bitgen_t *, npy_intp, double *);
+DECLDIR void random_standard_exponential_fill_f(bitgen_t *, npy_intp, float *);
+DECLDIR void random_standard_exponential_zig_fill(bitgen_t *, npy_intp, double *);
+DECLDIR void random_standard_exponential_zig_fill_f(bitgen_t *, npy_intp, float *);
+
+DECLDIR double random_standard_normal(bitgen_t *bitgen_state);
+DECLDIR float random_standard_normal_f(bitgen_t *bitgen_state);
+DECLDIR void random_standard_normal_fill(bitgen_t *, npy_intp, double *);
+DECLDIR void random_standard_normal_fill_f(bitgen_t *, npy_intp, float *);
 DECLDIR double random_standard_gamma(bitgen_t *bitgen_state, double shape);
 DECLDIR float random_standard_gamma_f(bitgen_t *bitgen_state, float shape);
-*/
-DECLDIR double random_standard_gamma_zig(bitgen_t *bitgen_state, double shape);
-DECLDIR float random_standard_gamma_zig_f(bitgen_t *bitgen_state, float shape);
 
-/*
 DECLDIR double random_normal(bitgen_t *bitgen_state, double loc, double scale);
-*/
-DECLDIR double random_normal_zig(bitgen_t *bitgen_state, double loc, double scale);
 
 DECLDIR double random_gamma(bitgen_t *bitgen_state, double shape, double scale);
-DECLDIR float random_gamma_float(bitgen_t *bitgen_state, float shape, float scale);
+DECLDIR float random_gamma_f(bitgen_t *bitgen_state, float shape, float scale);
 
 DECLDIR double random_exponential(bitgen_t *bitgen_state, double scale);
 DECLDIR double random_uniform(bitgen_t *bitgen_state, double lower, double range);
@@ -146,27 +116,16 @@ DECLDIR double random_triangular(bitgen_t *bitgen_state, double left, double mod
 
 DECLDIR RAND_INT_TYPE random_poisson(bitgen_t *bitgen_state, double lam);
 DECLDIR RAND_INT_TYPE random_negative_binomial(bitgen_t *bitgen_state, double n,
-                                         double p);
-
-DECLDIR RAND_INT_TYPE random_binomial_btpe(bitgen_t *bitgen_state,
-                                           RAND_INT_TYPE n,
-                                           double p,
-                                           binomial_t *binomial);
-DECLDIR RAND_INT_TYPE random_binomial_inversion(bitgen_t *bitgen_state,
-                                                RAND_INT_TYPE n,
-                                                double p,
-                                                binomial_t *binomial);
+                                 double p);
+
 DECLDIR int64_t random_binomial(bitgen_t *bitgen_state, double p,
                                 int64_t n, binomial_t *binomial);
 
 DECLDIR RAND_INT_TYPE random_logseries(bitgen_t *bitgen_state, double p);
-DECLDIR RAND_INT_TYPE random_geometric_search(bitgen_t *bitgen_state, double p);
-DECLDIR RAND_INT_TYPE random_geometric_inversion(bitgen_t *bitgen_state, double p);
 DECLDIR RAND_INT_TYPE random_geometric(bitgen_t *bitgen_state, double p);
 DECLDIR RAND_INT_TYPE random_zipf(bitgen_t *bitgen_state, double a);
 DECLDIR int64_t random_hypergeometric(bitgen_t *bitgen_state,
                                       int64_t good, int64_t bad, int64_t sample);
-
 DECLDIR uint64_t random_interval(bitgen_t *bitgen_state, uint64_t max);
 
 /* Generate random uint64 numbers in closed interval [off, off + rng]. */
@@ -211,4 +170,33 @@ DECLDIR void random_bounded_bool_fill(bitgen_t *bitgen_state, npy_bool off,
 DECLDIR void random_multinomial(bitgen_t *bitgen_state, RAND_INT_TYPE n, RAND_INT_TYPE *mnix,
                                 double *pix, npy_intp d, binomial_t *binomial);
 
+/* multivariate hypergeometric, "count" method */
+DECLDIR int random_mvhg_count(bitgen_t *bitgen_state,
+                              int64_t total,
+                              size_t num_colors, int64_t *colors,
+                              int64_t nsample,
+                              size_t num_variates, int64_t *variates);
+
+/* multivariate hypergeometric, "marginals" method */
+DECLDIR void random_mvhg_marginals(bitgen_t *bitgen_state,
+                                   int64_t total,
+                                   size_t num_colors, int64_t *colors,
+                                   int64_t nsample,
+                                   size_t num_variates, int64_t *variates);
+
+/* Common to legacy-distributions.c and distributions.c but not exported */
+
+RAND_INT_TYPE random_binomial_btpe(bitgen_t *bitgen_state,
+                                   RAND_INT_TYPE n,
+                                   double p,
+                                   binomial_t *binomial);
+RAND_INT_TYPE random_binomial_inversion(bitgen_t *bitgen_state,
+                                        RAND_INT_TYPE n,
+                                        double p,
+                                        binomial_t *binomial);
+double random_loggam(double x);
+static NPY_INLINE double next_double(bitgen_t *bitgen_state) {
+    return bitgen_state->next_double(bitgen_state->state);
+}
+
 #endif
diff --git a/numpy/core/numeric.py b/numpy/core/numeric.py
index b5568fd86..1e011e2e7 100644
--- a/numpy/core/numeric.py
+++ b/numpy/core/numeric.py
@@ -292,7 +292,7 @@ def full(shape, fill_value, dtype=None, order='C'):
     fill_value : scalar
         Fill value.
     dtype : data-type, optional
-        The desired data-type for the array  The default, `None`, means
+        The desired data-type for the array  The default, None, means
          `np.array(fill_value).dtype`.
     order : {'C', 'F'}, optional
         Whether to store multidimensional data in C- or Fortran-contiguous
@@ -1784,19 +1784,19 @@ def _frombuffer(buf, dtype, shape, order):
 
 
 @set_module('numpy')
-def isscalar(num):
+def isscalar(element):
     """
-    Returns True if the type of `num` is a scalar type.
+    Returns True if the type of `element` is a scalar type.
 
     Parameters
     ----------
-    num : any
+    element : any
         Input argument, can be of any type and shape.
 
     Returns
     -------
     val : bool
-        True if `num` is a scalar type, False if it is not.
+        True if `element` is a scalar type, False if it is not.
 
     See Also
     --------
@@ -1804,10 +1804,14 @@ def isscalar(num):
 
     Notes
     -----
-    In almost all cases ``np.ndim(x) == 0`` should be used instead of this
-    function, as that will also return true for 0d arrays. This is how
-    numpy overloads functions in the style of the ``dx`` arguments to `gradient`
-    and the ``bins`` argument to `histogram`. Some key differences:
+    If you need a stricter way to identify a *numerical* scalar, use
+    ``isinstance(x, numbers.Number)``, as that returns ``False`` for most
+    non-numerical elements such as strings.
+
+    In most cases ``np.ndim(x) == 0`` should be used instead of this function,
+    as that will also return true for 0d arrays. This is how numpy overloads
+    functions in the style of the ``dx`` arguments to `gradient` and the ``bins``
+    argument to `histogram`. Some key differences:
 
     +--------------------------------------+---------------+-------------------+
     | x                                    |``isscalar(x)``|``np.ndim(x) == 0``|
@@ -1855,9 +1859,9 @@ def isscalar(num):
     True
 
     """
-    return (isinstance(num, generic)
-            or type(num) in ScalarType
-            or isinstance(num, numbers.Number))
+    return (isinstance(element, generic)
+            or type(element) in ScalarType
+            or isinstance(element, numbers.Number))
 
 
 @set_module('numpy')
@@ -2094,9 +2098,9 @@ def allclose(a, b, rtol=1.e-5, atol=1.e-8, equal_nan=False):
     `atol` are added together to compare against the absolute difference
     between `a` and `b`.
 
-    If either array contains one or more NaNs, False is returned.
-    Infs are treated as equal if they are in the same place and of the same
-    sign in both arrays.
+    NaNs are treated as equal if they are in the same place and if
+    ``equal_nan=True``.  Infs are treated as equal if they are in the same
+    place and of the same sign in both arrays.
 
     Parameters
     ----------
@@ -2108,7 +2112,7 @@ def allclose(a, b, rtol=1.e-5, atol=1.e-8, equal_nan=False):
         The absolute tolerance parameter (see Notes).
     equal_nan : bool
         Whether to compare NaN's as equal.  If True, NaN's in `a` will be
-        considered equal to NaN's in `b`.
+        considered equal to NaN's in `b` in the output array.
 
         .. versionadded:: 1.10.0
 
diff --git a/numpy/core/numerictypes.py b/numpy/core/numerictypes.py
index ab1ff65a4..761c7087c 100644
--- a/numpy/core/numerictypes.py
+++ b/numpy/core/numerictypes.py
@@ -485,7 +485,7 @@ def sctype2char(sctype):
 
     Examples
     --------
-    >>> for sctype in [np.int32, np.double, np.complex, np.string_, np.ndarray]:
+    >>> for sctype in [np.int32, np.double, np.complex_, np.string_, np.ndarray]:
     ...     print(np.sctype2char(sctype))
     l # may vary
     d
diff --git a/numpy/core/setup.py b/numpy/core/setup.py
index 5f2f4a7b2..a4b5cfe5f 100644
--- a/numpy/core/setup.py
+++ b/numpy/core/setup.py
@@ -655,6 +655,9 @@ def configuration(parent_package='',top_path=None):
         # compiler does not work).
         st = config_cmd.try_link('int main(void) { return 0;}')
         if not st:
+            # rerun the failing command in verbose mode
+            config_cmd.compiler.verbose = True
+            config_cmd.try_link('int main(void) { return 0;}')
             raise RuntimeError("Broken toolchain: cannot link a simple C program")
         mlibs = check_mathlib(config_cmd)
 
@@ -771,7 +774,7 @@ def configuration(parent_package='',top_path=None):
             join('src', 'multiarray', 'arrayobject.h'),
             join('src', 'multiarray', 'arraytypes.h'),
             join('src', 'multiarray', 'arrayfunction_override.h'),
-            join('src', 'multiarray', 'buffer.h'),
+            join('src', 'multiarray', 'npy_buffer.h'),
             join('src', 'multiarray', 'calculation.h'),
             join('src', 'multiarray', 'common.h'),
             join('src', 'multiarray', 'convert_datatype.h'),
diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py
index 84b78b585..6356f08ba 100644
--- a/numpy/core/setup_common.py
+++ b/numpy/core/setup_common.py
@@ -266,8 +266,9 @@ def check_long_double_representation(cmd):
     except ValueError:
         # try linking to support CC="gcc -flto" or icc -ipo
         # struct needs to be volatile so it isn't optimized away
+        # additionally "clang -flto" requires the foo struct to be used
         body = body.replace('struct', 'volatile struct')
-        body += "int main(void) { return 0; }\n"
+        body += "int main(void) { return foo.before[0]; }\n"
         src, obj = cmd._compile(body, None, None, 'c')
         cmd.temp_files.append("_configtest")
         cmd.compiler.link_executable([obj], "_configtest")
diff --git a/numpy/core/shape_base.py b/numpy/core/shape_base.py
index d7e769e62..369d956fb 100644
--- a/numpy/core/shape_base.py
+++ b/numpy/core/shape_base.py
@@ -472,7 +472,7 @@ def _block_check_depths_match(arrays, parent_index=[]):
     first_index : list of int
         The full index of an element from the bottom of the nesting in
         `arrays`. If any element at the bottom is an empty list, this will
-        refer to it, and the last index along the empty axis will be `None`.
+        refer to it, and the last index along the empty axis will be None.
     max_arr_ndim : int
         The maximum of the ndims of the arrays nested in `arrays`.
     final_size: int
diff --git a/numpy/core/src/common/binop_override.h b/numpy/core/src/common/binop_override.h
index 47df63e38..c5e7ab808 100644
--- a/numpy/core/src/common/binop_override.h
+++ b/numpy/core/src/common/binop_override.h
@@ -129,11 +129,14 @@ binop_should_defer(PyObject *self, PyObject *other, int inplace)
      * check whether __array_ufunc__ equals None.
      */
     attr = PyArray_LookupSpecial(other, "__array_ufunc__");
-    if (attr) {
+    if (attr != NULL) {
         defer = !inplace && (attr == Py_None);
         Py_DECREF(attr);
         return defer;
     }
+    else if (PyErr_Occurred()) {
+        PyErr_Clear(); /* TODO[gh-14801]: propagate crashes during attribute access? */
+    }
     /*
      * Otherwise, we need to check for the legacy __array_priority__. But if
      * other.__class__ is a subtype of self.__class__, then it's already had
diff --git a/numpy/core/src/common/get_attr_string.h b/numpy/core/src/common/get_attr_string.h
index d458d9550..d3401aea6 100644
--- a/numpy/core/src/common/get_attr_string.h
+++ b/numpy/core/src/common/get_attr_string.h
@@ -40,18 +40,14 @@ _is_basic_python_type(PyTypeObject *tp)
 }
 
 /*
- * Stripped down version of PyObject_GetAttrString,
- * avoids lookups for None, tuple, and List objects,
- * and doesn't create a PyErr since this code ignores it.
+ * Stripped down version of PyObject_GetAttrString(obj, name) that does not
+ * raise PyExc_AttributeError.
  *
- * This can be much faster then PyObject_GetAttrString where
- * exceptions are not used by caller.
+ * This allows it to avoid creating then discarding exception objects when
+ * performing lookups on objects without any attributes.
  *
- * 'obj' is the object to search for attribute.
- *
- * 'name' is the attribute to search for.
- *
- * Returns attribute value on success, NULL on failure.
+ * Returns attribute value on success, NULL without an exception set if
+ * there is no such attribute, and NULL with an exception on failure.
  */
 static NPY_INLINE PyObject *
 maybe_get_attr(PyObject *obj, char *name)
@@ -62,7 +58,7 @@ maybe_get_attr(PyObject *obj, char *name)
     /* Attribute referenced by (char *)name */
     if (tp->tp_getattr != NULL) {
         res = (*tp->tp_getattr)(obj, name);
-        if (res == NULL) {
+        if (res == NULL && PyErr_ExceptionMatches(PyExc_AttributeError)) {
             PyErr_Clear();
         }
     }
@@ -78,7 +74,7 @@ maybe_get_attr(PyObject *obj, char *name)
         }
         res = (*tp->tp_getattro)(obj, w);
         Py_DECREF(w);
-        if (res == NULL) {
+        if (res == NULL && PyErr_ExceptionMatches(PyExc_AttributeError)) {
             PyErr_Clear();
         }
     }
diff --git a/numpy/core/src/common/ufunc_override.c b/numpy/core/src/common/ufunc_override.c
index 89f08a9cb..3f699bcdd 100644
--- a/numpy/core/src/common/ufunc_override.c
+++ b/numpy/core/src/common/ufunc_override.c
@@ -36,6 +36,9 @@ PyUFuncOverride_GetNonDefaultArrayUfunc(PyObject *obj)
      */
     cls_array_ufunc = PyArray_LookupSpecial(obj, "__array_ufunc__");
     if (cls_array_ufunc == NULL) {
+        if (PyErr_Occurred()) {
+            PyErr_Clear(); /* TODO[gh-14801]: propagate crashes during attribute access? */
+        }
         return NULL;
     }
     /* Ignore if the same as ndarray.__array_ufunc__ */
diff --git a/numpy/core/src/multiarray/arrayfunction_override.c b/numpy/core/src/multiarray/arrayfunction_override.c
index 62e597764..9ea8efdd9 100644
--- a/numpy/core/src/multiarray/arrayfunction_override.c
+++ b/numpy/core/src/multiarray/arrayfunction_override.c
@@ -26,6 +26,7 @@ static PyObject *
 get_array_function(PyObject *obj)
 {
     static PyObject *ndarray_array_function = NULL;
+    PyObject *array_function;
 
     if (ndarray_array_function == NULL) {
         ndarray_array_function = get_ndarray_array_function();
@@ -37,7 +38,12 @@ get_array_function(PyObject *obj)
         return ndarray_array_function;
     }
 
-    return PyArray_LookupSpecial(obj, "__array_function__");
+    array_function = PyArray_LookupSpecial(obj, "__array_function__");
+    if (array_function == NULL && PyErr_Occurred()) {
+        PyErr_Clear(); /* TODO[gh-14801]: propagate crashes during attribute access? */
+    }
+
+    return array_function;
 }
 
 
diff --git a/numpy/core/src/multiarray/arrayobject.c b/numpy/core/src/multiarray/arrayobject.c
index 4e229e321..a5cebfbd8 100644
--- a/numpy/core/src/multiarray/arrayobject.c
+++ b/numpy/core/src/multiarray/arrayobject.c
@@ -48,7 +48,7 @@ maintainer email:  oliphant.travis@ieee.org
 #include "mapping.h"
 #include "getset.h"
 #include "sequence.h"
-#include "buffer.h"
+#include "npy_buffer.h"
 #include "array_assign.h"
 #include "alloc.h"
 #include "mem_overlap.h"
@@ -557,7 +557,7 @@ PyArray_DebugPrint(PyArrayObject *obj)
     printf(" ndim   : %d\n", fobj->nd);
     printf(" shape  :");
     for (i = 0; i < fobj->nd; ++i) {
-        printf(" %d", (int)fobj->dimensions[i]);
+        printf(" %" NPY_INTP_FMT, fobj->dimensions[i]);
     }
     printf("\n");
 
@@ -567,7 +567,7 @@ PyArray_DebugPrint(PyArrayObject *obj)
     printf(" data   : %p\n", fobj->data);
     printf(" strides:");
     for (i = 0; i < fobj->nd; ++i) {
-        printf(" %d", (int)fobj->strides[i]);
+        printf(" %" NPY_INTP_FMT, fobj->strides[i]);
     }
     printf("\n");
 
diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src
index 5d9e990e8..e36b95c00 100644
--- a/numpy/core/src/multiarray/arraytypes.c.src
+++ b/numpy/core/src/multiarray/arraytypes.c.src
@@ -36,7 +36,7 @@
 
 #include "cblasfuncs.h"
 #include "npy_cblas.h"
-#include "buffer.h"
+#include "npy_buffer.h"
 
 /* check for sequences, but ignore the types numpy considers scalars */
 static NPY_INLINE npy_bool
@@ -1081,6 +1081,7 @@ TIMEDELTA_setitem(PyObject *op, void *ov, void *vap)
  *           npy_long, npy_ulong, npy_longlong, npy_ulonglong,
  *           npy_float, npy_double, npy_longdouble,
  *           npy_datetime, npy_timedelta#
+ * #supports_nat = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1#
  */
 
 /**begin repeat1
@@ -1092,6 +1093,7 @@ TIMEDELTA_setitem(PyObject *op, void *ov, void *vap)
  *             npy_long, npy_ulong, npy_longlong, npy_ulonglong,
  *             npy_float, npy_double, npy_longdouble,
  *             npy_datetime, npy_timedelta#
+ * #floatingpoint = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0#
  */
 static void
 @FROMTYPE@_to_@TOTYPE@(void *input, void *output, npy_intp n,
@@ -1101,7 +1103,15 @@ static void
     @totype@ *op = output;
 
     while (n--) {
-        *op++ = (@totype@)*ip++;
+        @fromtype@ f = *ip++;
+        @totype@ t = (@totype@)f;
+#if @supports_nat@ && @floatingpoint@
+        /* Avoid undefined behaviour for NaN -> NaT */
+        if (npy_isnan(f)) {
+            t = (@totype@)NPY_DATETIME_NAT;
+        }
+#endif
+        *op++ = t;
     }
 }
 /**end repeat1**/
@@ -1119,7 +1129,15 @@ static void
     @totype@ *op = output;
 
     while (n--) {
-        *op++ = (@totype@)*ip;
+        @fromtype@ f = *ip;
+        @totype@ t = (@totype@)f;
+#if @supports_nat@
+        /* Avoid undefined behaviour for NaN -> NaT */
+        if (npy_isnan(f)) {
+            t = (@totype@)NPY_DATETIME_NAT;
+        }
+#endif
+        *op++ = t;
         ip += 2;
     }
 }
@@ -1757,7 +1775,58 @@ BOOL_scan(FILE *fp, npy_bool *ip, void *NPY_UNUSED(ignore),
 }
 
 /**begin repeat
- * #fname = CFLOAT, CDOUBLE, CLONGDOUBLE,
+ * #fname = CFLOAT, CDOUBLE#
+ * #type = npy_cfloat, npy_cdouble#
+ */
+static int
+@fname@_scan(FILE *fp, @type@ *ip, void *NPY_UNUSED(ignore),
+             PyArray_Descr *NPY_UNUSED(ignored))
+{
+    double result;
+    int ret_real, ret_imag;
+
+    ret_real = NumPyOS_ascii_ftolf(fp, &result);
+    @type@ output;
+    // Peek next character
+    char next = getc(fp);
+    if ((next == '+') || (next == '-')) {
+        // Imaginary component specified
+        output.real = result;
+        // Revert peek and read imaginary component
+        ungetc(next, fp);
+        ret_imag = NumPyOS_ascii_ftolf(fp, &result);
+        // Peak next character
+        next = getc(fp);
+        if ((ret_imag == 1) && (next == 'j')) {
+            // If read is successful and the immediate following char is j
+            output.imag = result;
+        }
+        else {
+            output.imag = 0;
+            // Push an invalid char to trigger the not everything is read error
+            ungetc('a', fp);
+        }
+    }
+    else if (next == 'j') {
+        // Real component not specified
+        output.real = 0;
+        output.imag = result;
+    }
+    else {
+        // Imaginary component not specified
+        output.real = result;
+        output.imag = 0.;
+        // Next character is not + / - / j. Revert peek.
+        ungetc(next, fp);
+    }
+    *(@type@ *)ip = output;
+    return ret_real;
+}
+/**end repeat**/
+
+
+/**begin repeat
+ * #fname = CLONGDOUBLE,
  *          OBJECT, STRING, UNICODE, VOID,
  *          DATETIME, TIMEDELTA#
  */
@@ -1849,7 +1918,60 @@ BOOL_fromstr(char *str, void *ip, char **endptr,
 }
 
 /**begin repeat
- * #fname = CFLOAT, CDOUBLE, CLONGDOUBLE,
+ * #fname = CFLOAT, CDOUBLE#
+ * #type = npy_cfloat, npy_cdouble#
+ */
+static int
+@fname@_fromstr(char *str, void *ip, char **endptr,
+        PyArray_Descr *NPY_UNUSED(ignore))
+{
+    double result;
+
+    result = NumPyOS_ascii_strtod(str, endptr);
+    @type@ output;
+
+    if (endptr && ((*endptr[0] == '+') || (*endptr[0] == '-'))) {
+        // Imaginary component specified
+        output.real = result;
+        // Reading imaginary component
+        char **prev = endptr;
+        str = *endptr;
+        result = NumPyOS_ascii_strtod(str, endptr);
+        if (endptr && *endptr[0] == 'j') {
+            // Read is successful if the immediate following char is j
+            output.imag = result;
+            // Skip j
+            ++*endptr;
+        }
+        else {
+            /*
+             * Set endptr to previous char to trigger the not everything is
+             * read error
+             */
+            endptr = prev;
+            output.imag = 0;
+        }
+    }
+    else if (endptr && *endptr[0] == 'j') {
+        // Real component not specified
+        output.real = 0;
+        output.imag = result;
+        // Skip j
+        ++*endptr;
+    }
+    else {
+        // Imaginary component not specified
+        output.real = result;
+        output.imag = 0.;
+    }
+    *(@type@ *)ip = output;
+    return 0;
+}
+/**end repeat**/
+
+
+/**begin repeat
+ * #fname = CLONGDOUBLE,
  *          OBJECT, STRING, UNICODE, VOID#
  */
 
@@ -3078,6 +3200,7 @@ BOOL_argmax(npy_bool *ip, npy_intp n, npy_intp *max_ind,
  * #le = _LESS_THAN_OR_EQUAL*10, npy_half_le, _LESS_THAN_OR_EQUAL*8#
  * #iscomplex = 0*14, 1*3, 0*2#
  * #incr = ip++*14, ip+=2*3, ip++*2#
+ * #isdatetime = 0*17, 1*2#
  */
 static int
 @fname@_argmax(@type@ *ip, npy_intp n, npy_intp *max_ind,
@@ -3103,6 +3226,12 @@ static int
         return 0;
     }
 #endif
+#if @isdatetime@
+    if (mp == NPY_DATETIME_NAT) {
+        /* NaT encountered, it's maximal */
+        return 0;
+    }
+#endif
 
     for (i = 1; i < n; i++) {
         @incr@;
@@ -3122,6 +3251,13 @@ static int
             }
         }
 #else
+#if @isdatetime@
+        if (*ip == NPY_DATETIME_NAT) {
+            /* NaT encountered, it's maximal */
+            *max_ind = i;
+            break;
+        }
+#endif
         if (!@le@(*ip, mp)) {  /* negated, for correct nan handling */
             mp = *ip;
             *max_ind = i;
@@ -3158,16 +3294,19 @@ BOOL_argmin(npy_bool *ip, npy_intp n, npy_intp *min_ind,
  * #fname = BYTE, UBYTE, SHORT, USHORT, INT, UINT,
  *          LONG, ULONG, LONGLONG, ULONGLONG,
  *          HALF, FLOAT, DOUBLE, LONGDOUBLE,
- *          CFLOAT, CDOUBLE, CLONGDOUBLE#
+ *          CFLOAT, CDOUBLE, CLONGDOUBLE,
+ *          DATETIME, TIMEDELTA#
  * #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint,
  *         npy_long, npy_ulong, npy_longlong, npy_ulonglong,
  *         npy_half, npy_float, npy_double, npy_longdouble,
- *         npy_float, npy_double, npy_longdouble#
- * #isfloat = 0*10, 1*7#
- * #isnan = nop*10, npy_half_isnan, npy_isnan*6#
- * #le = _LESS_THAN_OR_EQUAL*10, npy_half_le, _LESS_THAN_OR_EQUAL*6#
- * #iscomplex = 0*14, 1*3#
- * #incr = ip++*14, ip+=2*3#
+ *         npy_float, npy_double, npy_longdouble,
+ *         npy_datetime, npy_timedelta#
+ * #isfloat = 0*10, 1*7, 0*2#
+ * #isnan = nop*10, npy_half_isnan, npy_isnan*6, nop*2#
+ * #le = _LESS_THAN_OR_EQUAL*10, npy_half_le, _LESS_THAN_OR_EQUAL*8#
+ * #iscomplex = 0*14, 1*3, 0*2#
+ * #incr = ip++*14, ip+=2*3, ip++*2#
+ * #isdatetime = 0*17, 1*2#
  */
 static int
 @fname@_argmin(@type@ *ip, npy_intp n, npy_intp *min_ind,
@@ -3193,6 +3332,12 @@ static int
         return 0;
     }
 #endif
+#if @isdatetime@
+    if (mp == NPY_DATETIME_NAT) {
+        /* NaT encountered, it's minimal */
+        return 0;
+    }
+#endif
 
     for (i = 1; i < n; i++) {
         @incr@;
@@ -3212,6 +3357,13 @@ static int
             }
         }
 #else
+#if @isdatetime@
+        if (*ip == NPY_DATETIME_NAT) {
+            /* NaT encountered, it's minimal */
+            *min_ind = i;
+            break;
+        }
+#endif 
         if (!@le@(mp, *ip)) {  /* negated, for correct nan handling */
             mp = *ip;
             *min_ind = i;
@@ -3231,43 +3383,6 @@ static int
 
 #undef _LESS_THAN_OR_EQUAL
 
-/**begin repeat
- *
- * #fname = DATETIME, TIMEDELTA#
- * #type = npy_datetime, npy_timedelta#
- */
-static int
-@fname@_argmin(@type@ *ip, npy_intp n, npy_intp *min_ind,
-        PyArrayObject *NPY_UNUSED(aip))
-{
-    /* NPY_DATETIME_NAT is smaller than every other value, we skip
-     * it for consistency with min().
-     */
-    npy_intp i;
-    @type@ mp = NPY_DATETIME_NAT;
-
-    i = 0;
-    while (i < n && mp == NPY_DATETIME_NAT) {
-        mp = ip[i];
-        i++;
-    }
-    if (i == n) {
-        /* All NaTs: return 0 */
-        *min_ind = 0;
-        return 0;
-    }
-    *min_ind = i - 1;
-    for (; i < n; i++) {
-        if (mp > ip[i] && ip[i] != NPY_DATETIME_NAT) {
-            mp = ip[i];
-            *min_ind = i;
-        }
-    }
-    return 0;
-}
-
-/**end repeat**/
-
 static int
 OBJECT_argmax(PyObject **ip, npy_intp n, npy_intp *max_ind,
               PyArrayObject *NPY_UNUSED(aip))
diff --git a/numpy/core/src/multiarray/buffer.c b/numpy/core/src/multiarray/buffer.c
index b729027ad..0edadee98 100644
--- a/numpy/core/src/multiarray/buffer.c
+++ b/numpy/core/src/multiarray/buffer.c
@@ -11,7 +11,7 @@
 
 #include "npy_pycompat.h"
 
-#include "buffer.h"
+#include "npy_buffer.h"
 #include "common.h"
 #include "numpyos.h"
 #include "arrayobject.h"
diff --git a/numpy/core/src/multiarray/common.c b/numpy/core/src/multiarray/common.c
index 3270bc20d..c991f7428 100644
--- a/numpy/core/src/multiarray/common.c
+++ b/numpy/core/src/multiarray/common.c
@@ -12,7 +12,7 @@
 #include "usertypes.h"
 
 #include "common.h"
-#include "buffer.h"
+#include "npy_buffer.h"
 
 #include "get_attr_string.h"
 #include "mem_overlap.h"
@@ -367,6 +367,10 @@ PyArray_DTypeFromObjectHelper(PyObject *obj, int maxdims,
         }
         Py_DECREF(ip);
     }
+    else if (PyErr_Occurred()) {
+        PyErr_Clear(); /* TODO[gh-14801]: propagate crashes during attribute access? */
+    }
+
 
     /* The array struct interface */
     ip = PyArray_LookupSpecial_OnInstance(obj, "__array_struct__");
@@ -389,6 +393,9 @@ PyArray_DTypeFromObjectHelper(PyObject *obj, int maxdims,
         }
         Py_DECREF(ip);
     }
+    else if (PyErr_Occurred()) {
+        PyErr_Clear(); /* TODO[gh-14801]: propagate crashes during attribute access? */
+    }
 
     /* The old buffer interface */
 #if !defined(NPY_PY3K)
@@ -419,6 +426,9 @@ PyArray_DTypeFromObjectHelper(PyObject *obj, int maxdims,
             goto fail;
         }
     }
+    else if (PyErr_Occurred()) {
+        PyErr_Clear(); /* TODO[gh-14801]: propagate crashes during attribute access? */
+    }
 
     /*
      * If we reached the maximum recursion depth without hitting one
diff --git a/numpy/core/src/multiarray/conversion_utils.c b/numpy/core/src/multiarray/conversion_utils.c
index 4baa02052..ca126b4b1 100644
--- a/numpy/core/src/multiarray/conversion_utils.c
+++ b/numpy/core/src/multiarray/conversion_utils.c
@@ -16,7 +16,7 @@
 
 #include "conversion_utils.h"
 #include "alloc.h"
-#include "buffer.h"
+#include "npy_buffer.h"
 
 static int
 PyArray_PyIntAsInt_ErrMsg(PyObject *o, const char * msg) NPY_GCC_NONNULL(2);
@@ -667,8 +667,8 @@ PyArray_ConvertClipmodeSequence(PyObject *object, NPY_CLIPMODE *modes, int n)
     if (object && (PyTuple_Check(object) || PyList_Check(object))) {
         if (PySequence_Size(object) != n) {
             PyErr_Format(PyExc_ValueError,
-                    "list of clipmodes has wrong length (%d instead of %d)",
-                    (int)PySequence_Size(object), n);
+                    "list of clipmodes has wrong length (%zd instead of %d)",
+                    PySequence_Size(object), n);
             return NPY_FAIL;
         }
 
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index 5174bd889..64933ae1b 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -19,7 +19,7 @@
 #include "ctors.h"
 #include "convert_datatype.h"
 #include "shape.h"
-#include "buffer.h"
+#include "npy_buffer.h"
 #include "lowlevel_strided_loops.h"
 #include "methods.h"
 #include "_datetime.h"
@@ -544,8 +544,8 @@ setArrayFromSequence(PyArrayObject *a, PyObject *s,
      */
     if (slen != PyArray_DIMS(a)[dim] && slen != 1) {
         PyErr_Format(PyExc_ValueError,
-                 "cannot copy sequence with size %d to array axis "
-                 "with dimension %d", (int)slen, (int)PyArray_DIMS(a)[dim]);
+                 "cannot copy sequence with size %zd to array axis "
+                 "with dimension %" NPY_INTP_FMT, slen, PyArray_DIMS(a)[dim]);
         goto fail;
     }
 
@@ -852,6 +852,10 @@ discover_dimensions(PyObject *obj, int *maxndim, npy_intp *d, int check_it,
             return 0;
         }
     }
+    else if (PyErr_Occurred()) {
+        PyErr_Clear(); /* TODO[gh-14801]: propagate crashes during attribute access? */
+    }
+
 
     /* obj has the __array_interface__ interface */
     e = PyArray_LookupSpecial_OnInstance(obj, "__array_interface__");
@@ -881,6 +885,9 @@ discover_dimensions(PyObject *obj, int *maxndim, npy_intp *d, int check_it,
             return 0;
         }
     }
+    else if (PyErr_Occurred()) {
+        PyErr_Clear(); /* TODO[gh-14801]: propagate crashes during attribute access? */
+    }
 
     seq = PySequence_Fast(obj, "Could not convert object to sequence");
     if (seq == NULL) {
@@ -2351,7 +2358,11 @@ PyArray_FromStructInterface(PyObject *input)
 
     attr = PyArray_LookupSpecial_OnInstance(input, "__array_struct__");
     if (attr == NULL) {
-        return Py_NotImplemented;
+        if (PyErr_Occurred()) {
+            return NULL;
+        } else {
+            return Py_NotImplemented;
+        }
     }
     if (!NpyCapsule_Check(attr)) {
         goto fail;
@@ -2463,6 +2474,9 @@ PyArray_FromInterface(PyObject *origin)
     iface = PyArray_LookupSpecial_OnInstance(origin,
                                                     "__array_interface__");
     if (iface == NULL) {
+        if (PyErr_Occurred()) {
+            PyErr_Clear(); /* TODO[gh-14801]: propagate crashes during attribute access? */
+        }
         return Py_NotImplemented;
     }
     if (!PyDict_Check(iface)) {
@@ -2716,6 +2730,9 @@ PyArray_FromArrayAttr(PyObject *op, PyArray_Descr *typecode, PyObject *context)
 
     array_meth = PyArray_LookupSpecial_OnInstance(op, "__array__");
     if (array_meth == NULL) {
+        if (PyErr_Occurred()) {
+            PyErr_Clear(); /* TODO[gh-14801]: propagate crashes during attribute access? */
+        }
         return Py_NotImplemented;
     }
     if (context == NULL) {
@@ -2894,8 +2911,8 @@ PyArray_CopyAsFlat(PyArrayObject *dst, PyArrayObject *src, NPY_ORDER order)
     src_size = PyArray_SIZE(src);
     if (dst_size != src_size) {
         PyErr_Format(PyExc_ValueError,
-                "cannot copy from array of size %d into an array "
-                "of size %d", (int)src_size, (int)dst_size);
+                "cannot copy from array of size %" NPY_INTP_FMT " into an array "
+                "of size %" NPY_INTP_FMT, src_size, dst_size);
         return -1;
     }
 
diff --git a/numpy/core/src/multiarray/datetime.c b/numpy/core/src/multiarray/datetime.c
index d21bb9776..72a3df89c 100644
--- a/numpy/core/src/multiarray/datetime.c
+++ b/numpy/core/src/multiarray/datetime.c
@@ -758,8 +758,8 @@ parse_datetime_extended_unit_from_string(char *str, Py_ssize_t len,
 bad_input:
     if (metastr != NULL) {
         PyErr_Format(PyExc_TypeError,
-                "Invalid datetime metadata string \"%s\" at position %d",
-                metastr, (int)(substr-metastr));
+                "Invalid datetime metadata string \"%s\" at position %zd",
+                metastr, substr-metastr);
     }
     else {
         PyErr_Format(PyExc_TypeError,
@@ -820,8 +820,8 @@ parse_datetime_metadata_from_metastr(char *metastr, Py_ssize_t len,
 bad_input:
     if (substr != metastr) {
         PyErr_Format(PyExc_TypeError,
-                "Invalid datetime metadata string \"%s\" at position %d",
-                metastr, (int)(substr-metastr));
+                "Invalid datetime metadata string \"%s\" at position %zd",
+                metastr, substr - metastr);
     }
     else {
         PyErr_Format(PyExc_TypeError,
@@ -2273,15 +2273,15 @@ convert_pydatetime_to_datetimestruct(PyObject *obj, npy_datetimestruct *out,
 
 invalid_date:
     PyErr_Format(PyExc_ValueError,
-            "Invalid date (%d,%d,%d) when converting to NumPy datetime",
-            (int)out->year, (int)out->month, (int)out->day);
+            "Invalid date (%" NPY_INT64_FMT ",%" NPY_INT32_FMT ",%" NPY_INT32_FMT ") when converting to NumPy datetime",
+            out->year, out->month, out->day);
     return -1;
 
 invalid_time:
     PyErr_Format(PyExc_ValueError,
-            "Invalid time (%d,%d,%d,%d) when converting "
+            "Invalid time (%" NPY_INT32_FMT ",%" NPY_INT32_FMT ",%" NPY_INT32_FMT ",%" NPY_INT32_FMT ") when converting "
             "to NumPy datetime",
-            (int)out->hour, (int)out->min, (int)out->sec, (int)out->us);
+            out->hour, out->min, out->sec, out->us);
     return -1;
 }
 
@@ -3221,18 +3221,6 @@ NPY_NO_EXPORT PyArrayObject *
 datetime_arange(PyObject *start, PyObject *stop, PyObject *step,
                 PyArray_Descr *dtype)
 {
-    PyArray_DatetimeMetaData meta;
-    /*
-     * Both datetime and timedelta are stored as int64, so they can
-     * share value variables.
-     */
-    npy_int64 values[3];
-    PyObject *objs[3];
-    int type_nums[3];
-
-    npy_intp i, length;
-    PyArrayObject *ret;
-    npy_int64 *ret_data;
 
     /*
      * First normalize the input parameters so there is no Py_None,
@@ -3265,6 +3253,8 @@ datetime_arange(PyObject *start, PyObject *stop, PyObject *step,
     /* Check if the units of the given dtype are generic, in which
      * case we use the code path that detects the units
      */
+    int type_nums[3];
+    PyArray_DatetimeMetaData meta;
     if (dtype != NULL) {
         PyArray_DatetimeMetaData *meta_tmp;
 
@@ -3313,6 +3303,7 @@ datetime_arange(PyObject *start, PyObject *stop, PyObject *step,
     }
 
     /* Set up to convert the objects to a common datetime unit metadata */
+    PyObject *objs[3];
     objs[0] = start;
     objs[1] = stop;
     objs[2] = step;
@@ -3333,11 +3324,22 @@ datetime_arange(PyObject *start, PyObject *stop, PyObject *step,
         type_nums[2] = NPY_TIMEDELTA;
     }
 
-    /* Convert all the arguments */
+    /* Convert all the arguments
+     *
+     * Both datetime and timedelta are stored as int64, so they can
+     * share value variables.
+     */
+    npy_int64 values[3];
     if (convert_pyobjects_to_datetimes(3, objs, type_nums,
                                 NPY_SAME_KIND_CASTING, values, &meta) < 0) {
         return NULL;
     }
+    /* If no start was provided, default to 0 */
+    if (start == NULL) {
+        /* enforced above */
+        assert(type_nums[0] == NPY_TIMEDELTA);
+        values[0] = 0;
+    }
 
     /* If no step was provided, default to 1 */
     if (step == NULL) {
@@ -3362,6 +3364,7 @@ datetime_arange(PyObject *start, PyObject *stop, PyObject *step,
     }
 
     /* Calculate the array length */
+    npy_intp length;
     if (values[2] > 0 && values[1] > values[0]) {
         length = (values[1] - values[0] + (values[2] - 1)) / values[2];
     }
@@ -3389,19 +3392,20 @@ datetime_arange(PyObject *start, PyObject *stop, PyObject *step,
     }
 
     /* Create the result array */
-    ret = (PyArrayObject *)PyArray_NewFromDescr(
-                            &PyArray_Type, dtype, 1, &length, NULL,
-                            NULL, 0, NULL);
+    PyArrayObject *ret = (PyArrayObject *)PyArray_NewFromDescr(
+            &PyArray_Type, dtype, 1, &length, NULL,
+            NULL, 0, NULL);
+
     if (ret == NULL) {
         return NULL;
     }
 
     if (length > 0) {
         /* Extract the data pointer */
-        ret_data = (npy_int64 *)PyArray_DATA(ret);
+        npy_int64 *ret_data = (npy_int64 *)PyArray_DATA(ret);
 
         /* Create the timedeltas or datetimes */
-        for (i = 0; i < length; ++i) {
+        for (npy_intp i = 0; i < length; ++i) {
             *ret_data = values[0];
             values[0] += values[2];
             ret_data++;
diff --git a/numpy/core/src/multiarray/datetime_strings.c b/numpy/core/src/multiarray/datetime_strings.c
index 95b7bb3dc..dfc01494f 100644
--- a/numpy/core/src/multiarray/datetime_strings.c
+++ b/numpy/core/src/multiarray/datetime_strings.c
@@ -743,8 +743,8 @@ finish:
 
 parse_error:
     PyErr_Format(PyExc_ValueError,
-            "Error parsing datetime string \"%s\" at position %d",
-            str, (int)(substr-str));
+            "Error parsing datetime string \"%s\" at position %zd",
+            str, substr - str);
     return -1;
 
 error:
diff --git a/numpy/core/src/multiarray/descriptor.c b/numpy/core/src/multiarray/descriptor.c
index 23d140cf6..d4e18e457 100644
--- a/numpy/core/src/multiarray/descriptor.c
+++ b/numpy/core/src/multiarray/descriptor.c
@@ -19,7 +19,7 @@
 #include "descriptor.h"
 #include "alloc.h"
 #include "assert.h"
-#include "buffer.h"
+#include "npy_buffer.h"
 
 /*
  * offset:    A starting offset.
@@ -1149,8 +1149,8 @@ _convert_from_dict(PyObject *obj, int align)
             }
             Py_DECREF(off);
             if (offset < 0) {
-                PyErr_Format(PyExc_ValueError, "offset %d cannot be negative",
-                             (int)offset);
+                PyErr_Format(PyExc_ValueError, "offset %ld cannot be negative",
+                             offset);
                 Py_DECREF(tup);
                 Py_DECREF(ind);
                 goto fail;
@@ -1164,10 +1164,10 @@ _convert_from_dict(PyObject *obj, int align)
             /* If align=True, enforce field alignment */
             if (align && offset % newdescr->alignment != 0) {
                 PyErr_Format(PyExc_ValueError,
-                        "offset %d for NumPy dtype with fields is "
+                        "offset %ld for NumPy dtype with fields is "
                         "not divisible by the field alignment %d "
                         "with align=True",
-                        (int)offset, (int)newdescr->alignment);
+                        offset, newdescr->alignment);
                 ret = NPY_FAIL;
             }
             else if (offset + newdescr->elsize > totalsize) {
@@ -1286,7 +1286,7 @@ _convert_from_dict(PyObject *obj, int align)
             PyErr_Format(PyExc_ValueError,
                     "NumPy dtype descriptor requires %d bytes, "
                     "cannot override to smaller itemsize of %d",
-                    (int)new->elsize, (int)itemsize);
+                    new->elsize, itemsize);
             Py_DECREF(new);
             goto fail;
         }
@@ -1295,7 +1295,7 @@ _convert_from_dict(PyObject *obj, int align)
             PyErr_Format(PyExc_ValueError,
                     "NumPy dtype descriptor requires alignment of %d bytes, "
                     "which is not divisible into the specified itemsize %d",
-                    (int)new->alignment, (int)itemsize);
+                    new->alignment, itemsize);
             Py_DECREF(new);
             goto fail;
         }
diff --git a/numpy/core/src/multiarray/einsum.c.src b/numpy/core/src/multiarray/einsum.c.src
index e7bbc3d0b..58af44091 100644
--- a/numpy/core/src/multiarray/einsum.c.src
+++ b/numpy/core/src/multiarray/einsum.c.src
@@ -1876,7 +1876,7 @@ parse_operand_subscripts(char *subscripts, int length,
      * later where it matters the char is cast to a signed char.
      */
     for (idim = 0; idim < ndim - 1; ++idim) {
-        int label = op_labels[idim];
+        int label = (signed char)op_labels[idim];
         /* If it is a proper label, find any duplicates of it. */
         if (label > 0) {
             /* Search for the next matching label. */
diff --git a/numpy/core/src/multiarray/getset.c b/numpy/core/src/multiarray/getset.c
index 116e37ce5..6e5d480d0 100644
--- a/numpy/core/src/multiarray/getset.c
+++ b/numpy/core/src/multiarray/getset.c
@@ -20,7 +20,7 @@
 #include "arrayobject.h"
 #include "mem_overlap.h"
 #include "alloc.h"
-#include "buffer.h"
+#include "npy_buffer.h"
 
 /*******************  array attribute get and set routines ******************/
 
diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c
index 247864775..8dcd28c84 100644
--- a/numpy/core/src/multiarray/mapping.c
+++ b/numpy/core/src/multiarray/mapping.c
@@ -1198,9 +1198,9 @@ array_assign_boolean_subscript(PyArrayObject *self,
         if (size != PyArray_DIMS(v)[0]) {
             PyErr_Format(PyExc_ValueError,
                     "NumPy boolean array indexing assignment "
-                    "cannot assign %d input values to "
-                    "the %d output values where the mask is true",
-                    (int)PyArray_DIMS(v)[0], (int)size);
+                    "cannot assign %" NPY_INTP_FMT " input values to "
+                    "the %" NPY_INTP_FMT " output values where the mask is true",
+                    PyArray_DIMS(v)[0], size);
             return -1;
         }
         v_stride = PyArray_STRIDES(v)[0];
diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c
index 441567049..9169814c2 100644
--- a/numpy/core/src/multiarray/multiarraymodule.c
+++ b/numpy/core/src/multiarray/multiarraymodule.c
@@ -118,6 +118,9 @@ PyArray_GetPriority(PyObject *obj, double default_)
 
     ret = PyArray_LookupSpecial_OnInstance(obj, "__array_priority__");
     if (ret == NULL) {
+        if (PyErr_Occurred()) {
+            PyErr_Clear(); /* TODO[gh-14801]: propagate crashes during attribute access? */
+        }
         return default_;
     }
 
@@ -1112,6 +1115,14 @@ _pyarray_correlate(PyArrayObject *ap1, PyArrayObject *ap2, int typenum,
 
     n1 = PyArray_DIMS(ap1)[0];
     n2 = PyArray_DIMS(ap2)[0];
+    if (n1 == 0) {
+        PyErr_SetString(PyExc_ValueError, "first array argument cannot be empty");
+        return NULL;
+    }
+    if (n2 == 0) {
+        PyErr_SetString(PyExc_ValueError, "second array argument cannot be empty");
+        return NULL;
+    }
     if (n1 < n2) {
         ret = ap1;
         ap1 = ap2;
@@ -1562,8 +1573,7 @@ _array_fromobject(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *kws)
     PyArrayObject *oparr = NULL, *ret = NULL;
     npy_bool subok = NPY_FALSE;
     npy_bool copy = NPY_TRUE;
-    int nd;
-    npy_intp ndmin = 0;
+    int ndmin = 0, nd;
     PyArray_Descr *type = NULL;
     PyArray_Descr *oldtype = NULL;
     NPY_ORDER order = NPY_KEEPORDER;
@@ -1625,13 +1635,14 @@ _array_fromobject(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *kws)
 
             ndmin_obj = PyDict_GetItem(kws, npy_ma_str_ndmin);
             if (ndmin_obj) {
-                ndmin = PyLong_AsLong(ndmin_obj);
-                if (error_converting(ndmin)) {
+                long t = PyLong_AsLong(ndmin_obj);
+                if (error_converting(t)) {
                     goto clean_type;
                 }
-                else if (ndmin > NPY_MAXDIMS) {
+                else if (t > NPY_MAXDIMS) {
                     goto full_path;
                 }
+                ndmin = t;
             }
 
             /* copy=False with default dtype, order (any is OK) and ndim */
@@ -2063,7 +2074,7 @@ array_fromfile(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *keywds)
     if (file == NULL) {
         return NULL;
     }
-    
+
     if (offset != 0 && strcmp(sep, "") != 0) {
         PyErr_SetString(PyExc_TypeError, "'offset' argument only permitted for binary files");
         Py_XDECREF(type);
@@ -3265,7 +3276,7 @@ array_datetime_data(PyObject *NPY_UNUSED(dummy), PyObject *args)
     }
 
     meta = get_datetime_metadata_from_dtype(dtype);
-    Py_DECREF(dtype);    
+    Py_DECREF(dtype);
     if (meta == NULL) {
         return NULL;
     }
diff --git a/numpy/core/src/multiarray/nditer_api.c b/numpy/core/src/multiarray/nditer_api.c
index db0bfcece..e7fe0fa50 100644
--- a/numpy/core/src/multiarray/nditer_api.c
+++ b/numpy/core/src/multiarray/nditer_api.c
@@ -371,8 +371,8 @@ NpyIter_ResetToIterIndexRange(NpyIter *iter,
         }
         if (errmsg == NULL) {
             PyErr_Format(PyExc_ValueError,
-                    "Out-of-bounds range [%d, %d) passed to "
-                    "ResetToIterIndexRange", (int)istart, (int)iend);
+                    "Out-of-bounds range [%" NPY_INTP_FMT ", %" NPY_INTP_FMT ") passed to "
+                    "ResetToIterIndexRange", istart, iend);
         }
         else {
             *errmsg = "Out-of-bounds range passed to ResetToIterIndexRange";
@@ -382,8 +382,8 @@ NpyIter_ResetToIterIndexRange(NpyIter *iter,
     else if (iend < istart) {
         if (errmsg == NULL) {
             PyErr_Format(PyExc_ValueError,
-                    "Invalid range [%d, %d) passed to ResetToIterIndexRange",
-                    (int)istart, (int)iend);
+                    "Invalid range [%" NPY_INTP_FMT ", %" NPY_INTP_FMT ") passed to ResetToIterIndexRange",
+                    istart, iend);
         }
         else {
             *errmsg = "Invalid range passed to ResetToIterIndexRange";
@@ -1429,8 +1429,8 @@ NpyIter_DebugPrint(NpyIter *iter)
         printf("REUSE_REDUCE_LOOPS ");
 
     printf("\n");
-    printf("| NDim: %d\n", (int)ndim);
-    printf("| NOp: %d\n", (int)nop);
+    printf("| NDim: %d\n", ndim);
+    printf("| NOp: %d\n", nop);
     if (NIT_MASKOP(iter) >= 0) {
         printf("| MaskOp: %d\n", (int)NIT_MASKOP(iter));
     }
diff --git a/numpy/core/src/multiarray/nditer_constr.c b/numpy/core/src/multiarray/nditer_constr.c
index d40836dc2..5e770338d 100644
--- a/numpy/core/src/multiarray/nditer_constr.c
+++ b/numpy/core/src/multiarray/nditer_constr.c
@@ -154,7 +154,7 @@ NpyIter_AdvancedNew(int nop, PyArrayObject **op_in, npy_uint32 flags,
     if (nop > NPY_MAXARGS) {
         PyErr_Format(PyExc_ValueError,
             "Cannot construct an iterator with more than %d operands "
-            "(%d were requested)", (int)NPY_MAXARGS, (int)nop);
+            "(%d were requested)", NPY_MAXARGS, nop);
         return NULL;
     }
 
@@ -810,7 +810,7 @@ npyiter_check_op_axes(int nop, int oa_ndim, int **op_axes,
         PyErr_Format(PyExc_ValueError,
                 "Cannot construct an iterator with more than %d dimensions "
                 "(%d were requested for op_axes)",
-                (int)NPY_MAXDIMS, oa_ndim);
+                NPY_MAXDIMS, oa_ndim);
         return 0;
     }
     if (op_axes == NULL) {
@@ -826,14 +826,14 @@ npyiter_check_op_axes(int nop, int oa_ndim, int **op_axes,
         if (axes != NULL) {
             memset(axes_dupcheck, 0, NPY_MAXDIMS);
             for (idim = 0; idim < oa_ndim; ++idim) {
-                npy_intp i = axes[idim];
+                int i = axes[idim];
                 if (i >= 0) {
                     if (i >= NPY_MAXDIMS) {
                         PyErr_Format(PyExc_ValueError,
                                 "The 'op_axes' provided to the iterator "
                                 "constructor for operand %d "
                                 "contained invalid "
-                                "values %d", (int)iop, (int)i);
+                                "values %d", iop, i);
                         return 0;
                     }
                     else if (axes_dupcheck[i] == 1) {
@@ -841,7 +841,7 @@ npyiter_check_op_axes(int nop, int oa_ndim, int **op_axes,
                                 "The 'op_axes' provided to the iterator "
                                 "constructor for operand %d "
                                 "contained duplicate "
-                                "value %d", (int)iop, (int)i);
+                                "value %d", iop, i);
                         return 0;
                     }
                     else {
@@ -1311,7 +1311,7 @@ npyiter_check_casting(int nop, PyArrayObject **op,
                 PyObject *errmsg;
                 errmsg = PyUString_FromFormat(
                         "Iterator operand %d dtype could not be cast from ",
-                        (int)iop);
+                        iop);
                 PyUString_ConcatAndDel(&errmsg,
                         PyObject_Repr((PyObject *)PyArray_DESCR(op[iop])));
                 PyUString_ConcatAndDel(&errmsg,
@@ -1342,7 +1342,7 @@ npyiter_check_casting(int nop, PyArrayObject **op,
                 PyUString_ConcatAndDel(&errmsg,
                         PyUString_FromFormat(", the operand %d dtype, "
                                 "according to the rule %s",
-                                (int)iop,
+                                iop,
                                 npyiter_casting_to_string(casting)));
                 PyErr_SetObject(PyExc_TypeError, errmsg);
                 Py_DECREF(errmsg);
@@ -1500,8 +1500,8 @@ npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, npyiter_opitflags *op_itf
                                     "Iterator input op_axes[%d][%d] (==%d) "
                                     "is not a valid axis of op[%d], which "
                                     "has %d dimensions ",
-                                    (int)iop, (int)(ndim-idim-1), (int)i,
-                                    (int)iop, (int)ondim);
+                                    iop, (ndim-idim-1), i,
+                                    iop, ondim);
                             return 0;
                         }
                     }
diff --git a/numpy/core/src/multiarray/nditer_pywrap.c b/numpy/core/src/multiarray/nditer_pywrap.c
index 4b9d41aa4..246f9d382 100644
--- a/numpy/core/src/multiarray/nditer_pywrap.c
+++ b/numpy/core/src/multiarray/nditer_pywrap.c
@@ -2016,7 +2016,7 @@ npyiter_seq_item(NewNpyArrayIterObject *self, Py_ssize_t i)
 
     if (i < 0 || i >= nop) {
         PyErr_Format(PyExc_IndexError,
-                "Iterator operand index %d is out of bounds", (int)i_orig);
+                "Iterator operand index %zd is out of bounds", i_orig);
         return NULL;
     }
 
@@ -2030,7 +2030,7 @@ npyiter_seq_item(NewNpyArrayIterObject *self, Py_ssize_t i)
      */
     if (!self->readflags[i]) {
         PyErr_Format(PyExc_RuntimeError,
-                "Iterator operand %d is write-only", (int)i);
+                "Iterator operand %zd is write-only", i);
         return NULL;
     }
 #endif
@@ -2147,12 +2147,12 @@ npyiter_seq_ass_item(NewNpyArrayIterObject *self, Py_ssize_t i, PyObject *v)
 
     if (i < 0 || i >= nop) {
         PyErr_Format(PyExc_IndexError,
-                "Iterator operand index %d is out of bounds", (int)i_orig);
+                "Iterator operand index %zd is out of bounds", i_orig);
         return -1;
     }
     if (!self->writeflags[i]) {
         PyErr_Format(PyExc_RuntimeError,
-                "Iterator operand %d is not writeable", (int)i_orig);
+                "Iterator operand %zd is not writeable", i_orig);
         return -1;
     }
 
diff --git a/numpy/core/src/multiarray/buffer.h b/numpy/core/src/multiarray/npy_buffer.h
index fae413c85..fae413c85 100644
--- a/numpy/core/src/multiarray/buffer.h
+++ b/numpy/core/src/multiarray/npy_buffer.h
diff --git a/numpy/core/src/multiarray/scalartypes.c.src b/numpy/core/src/multiarray/scalartypes.c.src
index 9adca6773..32d712e0c 100644
--- a/numpy/core/src/multiarray/scalartypes.c.src
+++ b/numpy/core/src/multiarray/scalartypes.c.src
@@ -28,7 +28,7 @@
 #include "npy_import.h"
 #include "dragon4.h"
 #include "npy_longdouble.h"
-#include "buffer.h"
+#include "npy_buffer.h"
 
 #include <stdlib.h>
 
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
index d948e25bb..32aac3ff7 100644
--- a/numpy/core/src/umath/loops.c.src
+++ b/numpy/core/src/umath/loops.c.src
@@ -54,210 +54,123 @@
  **                          GENERIC FLOAT LOOPS                             **
  *****************************************************************************/
 
+/* direct loops using a suitable callback */
 
-typedef float halfUnaryFunc(npy_half x);
-typedef float floatUnaryFunc(float x);
-typedef double doubleUnaryFunc(double x);
-typedef npy_longdouble longdoubleUnaryFunc(npy_longdouble x);
-typedef npy_half halfBinaryFunc(npy_half x, npy_half y);
-typedef float floatBinaryFunc(float x, float y);
-typedef double doubleBinaryFunc(double x, double y);
-typedef npy_longdouble longdoubleBinaryFunc(npy_longdouble x, npy_longdouble y);
-
+/**begin repeat
+ * #c = e, f, d, g#
+ * #type = npy_half, npy_float, npy_double, npy_longdouble#
+ **/
 
 /*UFUNC_API*/
 NPY_NO_EXPORT void
-PyUFunc_e_e(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
+PyUFunc_@c@_@c@(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
 {
-    halfUnaryFunc *f = (halfUnaryFunc *)func;
+    typedef @type@ func_type(@type@);
+    func_type *f = (func_type *)func;
     UNARY_LOOP {
-        const npy_half in1 = *(npy_half *)ip1;
-        *(npy_half *)op1 = f(in1);
+        const @type@ in1 = *(@type@ *)ip1;
+        *(@type@ *)op1 = f(in1);
     }
 }
 
 /*UFUNC_API*/
 NPY_NO_EXPORT void
-PyUFunc_e_e_As_f_f(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
+PyUFunc_@c@@c@_@c@(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
 {
-    floatUnaryFunc *f = (floatUnaryFunc *)func;
-    UNARY_LOOP {
-        const float in1 = npy_half_to_float(*(npy_half *)ip1);
-        *(npy_half *)op1 = npy_float_to_half(f(in1));
+    typedef @type@ func_type(@type@, @type@);
+    func_type *f = (func_type *)func;
+    BINARY_LOOP {
+        @type@ in1 = *(@type@ *)ip1;
+        @type@ in2 = *(@type@ *)ip2;
+        *(@type@ *)op1 = f(in1, in2);
     }
 }
 
-/*UFUNC_API*/
-NPY_NO_EXPORT void
-PyUFunc_e_e_As_d_d(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
-{
-    doubleUnaryFunc *f = (doubleUnaryFunc *)func;
-    UNARY_LOOP {
-        const double in1 = npy_half_to_double(*(npy_half *)ip1);
-        *(npy_half *)op1 = npy_double_to_half(f(in1));
-    }
-}
+/**end repeat**/
 
-/*UFUNC_API*/
-NPY_NO_EXPORT void
-PyUFunc_f_f(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
-{
-    floatUnaryFunc *f = (floatUnaryFunc *)func;
-    UNARY_LOOP {
-        const float in1 = *(float *)ip1;
-        *(float *)op1 = f(in1);
-    }
-}
+/* indirect loops with casting */
+/**begin repeat
+ * #c1    = e,         e,          f#
+ * #type1 = npy_half,  npy_half,   npy_float#
+ * #c2    = f,         d,          d#
+ * #type2 = npy_float, npy_double, npy_double#
+ *
+ * #conv12  = npy_half_to_float, npy_half_to_double, (double)#
+ * #conv21  = npy_float_to_half, npy_double_to_half, (float)#
+ **/
 
 /*UFUNC_API*/
 NPY_NO_EXPORT void
-PyUFunc_f_f_As_d_d(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
+PyUFunc_@c1@_@c1@_As_@c2@_@c2@(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
 {
-    doubleUnaryFunc *f = (doubleUnaryFunc *)func;
+    typedef @type2@ func_type(@type2@);
+    func_type *f = (func_type *)func;
     UNARY_LOOP {
-        const float in1 = *(float *)ip1;
-        *(float *)op1 = (float)f((double)in1);
-    }
-}
-
-/*UFUNC_API*/
-NPY_NO_EXPORT void
-PyUFunc_ee_e(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
-{
-    halfBinaryFunc *f = (halfBinaryFunc *)func;
-    BINARY_LOOP {
-        npy_half in1 = *(npy_half *)ip1;
-        npy_half in2 = *(npy_half *)ip2;
-        *(npy_half *)op1 = f(in1, in2);
-    }
-}
-
-/*UFUNC_API*/
-NPY_NO_EXPORT void
-PyUFunc_ee_e_As_ff_f(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
-{
-    floatBinaryFunc *f = (floatBinaryFunc *)func;
-    BINARY_LOOP {
-        float in1 = npy_half_to_float(*(npy_half *)ip1);
-        float in2 = npy_half_to_float(*(npy_half *)ip2);
-        *(npy_half *)op1 = npy_float_to_half(f(in1, in2));
+        const @type2@ in1 = @conv12@(*(@type1@ *)ip1);
+        *(@type1@ *)op1 = @conv21@(f(in1));
     }
 }
-
 /*UFUNC_API*/
 NPY_NO_EXPORT void
-PyUFunc_ee_e_As_dd_d(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
+PyUFunc_@c1@@c1@_@c1@_As_@c2@@c2@_@c2@(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
 {
-    doubleBinaryFunc *f = (doubleBinaryFunc *)func;
+    typedef @type2@ func_type(@type2@, @type2@);
+    func_type *f = (func_type *)func;
     BINARY_LOOP {
-        double in1 = npy_half_to_double(*(npy_half *)ip1);
-        double in2 = npy_half_to_double(*(npy_half *)ip2);
-        *(npy_half *)op1 = npy_double_to_half(f(in1, in2));
+        const @type2@ in1 = @conv12@(*(@type1@ *)ip1);
+        const @type2@ in2 = @conv12@(*(@type1@ *)ip2);
+        *(@type1@ *)op1 = @conv21@(f(in1, in2));
     }
 }
 
-/*UFUNC_API*/
-NPY_NO_EXPORT void
-PyUFunc_ff_f(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
-{
-    floatBinaryFunc *f = (floatBinaryFunc *)func;
-    BINARY_LOOP {
-        float in1 = *(float *)ip1;
-        float in2 = *(float *)ip2;
-        *(float *)op1 = f(in1, in2);
-    }
-}
-
-/*UFUNC_API*/
-NPY_NO_EXPORT void
-PyUFunc_ff_f_As_dd_d(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
-{
-    doubleBinaryFunc *f = (doubleBinaryFunc *)func;
-    BINARY_LOOP {
-        float in1 = *(float *)ip1;
-        float in2 = *(float *)ip2;
-        *(float *)op1 = (double)f((double)in1, (double)in2);
-    }
-}
+/**end repeat**/
 
-/*UFUNC_API*/
-NPY_NO_EXPORT void
-PyUFunc_d_d(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
-{
-    doubleUnaryFunc *f = (doubleUnaryFunc *)func;
-    UNARY_LOOP {
-        double in1 = *(double *)ip1;
-        *(double *)op1 = f(in1);
-    }
-}
+/******************************************************************************
+ **                          GENERIC COMPLEX LOOPS                           **
+ *****************************************************************************/
 
-/*UFUNC_API*/
-NPY_NO_EXPORT void
-PyUFunc_dd_d(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
-{
-    doubleBinaryFunc *f = (doubleBinaryFunc *)func;
-    BINARY_LOOP {
-        double in1 = *(double *)ip1;
-        double in2 = *(double *)ip2;
-        *(double *)op1 = f(in1, in2);
-    }
-}
+/* direct loops using a suitable callback */
+/**begin repeat
+ * #c = F, D, G#
+ * #type = npy_cfloat, npy_cdouble, npy_clongdouble#
+ **/
 
 /*UFUNC_API*/
 NPY_NO_EXPORT void
-PyUFunc_g_g(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
+PyUFunc_@c@_@c@(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
 {
-    longdoubleUnaryFunc *f = (longdoubleUnaryFunc *)func;
+    typedef void func_type(@type@ *, @type@ *);
+    func_type *f = (func_type *)func;
     UNARY_LOOP {
-        npy_longdouble in1 = *(npy_longdouble *)ip1;
-        *(npy_longdouble *)op1 = f(in1);
+        @type@ in1 = *(@type@ *)ip1;
+        @type@ *out = (@type@ *)op1;
+        f(&in1, out);
     }
 }
 
 /*UFUNC_API*/
 NPY_NO_EXPORT void
-PyUFunc_gg_g(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
+PyUFunc_@c@@c@_@c@(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
 {
-    longdoubleBinaryFunc *f = (longdoubleBinaryFunc *)func;
+    typedef void func_type(@type@ *, @type@ *, @type@ *);
+    func_type *f = (func_type *)func;
     BINARY_LOOP {
-        npy_longdouble in1 = *(npy_longdouble *)ip1;
-        npy_longdouble in2 = *(npy_longdouble *)ip2;
-        *(npy_longdouble *)op1 = f(in1, in2);
+        @type@ in1 = *(@type@ *)ip1;
+        @type@ in2 = *(@type@ *)ip2;
+        @type@ *out = (@type@ *)op1;
+        f(&in1, &in2, out);
     }
 }
+/**end repeat**/
 
 
-
-/******************************************************************************
- **                          GENERIC COMPLEX LOOPS                           **
- *****************************************************************************/
-
-
-typedef void cdoubleUnaryFunc(npy_cdouble *x, npy_cdouble *r);
-typedef void cfloatUnaryFunc(npy_cfloat *x, npy_cfloat *r);
-typedef void clongdoubleUnaryFunc(npy_clongdouble *x, npy_clongdouble *r);
-typedef void cdoubleBinaryFunc(npy_cdouble *x, npy_cdouble *y, npy_cdouble *r);
-typedef void cfloatBinaryFunc(npy_cfloat *x, npy_cfloat *y, npy_cfloat *r);
-typedef void clongdoubleBinaryFunc(npy_clongdouble *x, npy_clongdouble *y,
-                                   npy_clongdouble *r);
-
-/*UFUNC_API*/
-NPY_NO_EXPORT void
-PyUFunc_F_F(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
-{
-    cfloatUnaryFunc *f = (cfloatUnaryFunc *)func;
-    UNARY_LOOP {
-        npy_cfloat in1 = *(npy_cfloat *)ip1;
-        npy_cfloat *out = (npy_cfloat *)op1;
-        f(&in1, out);
-    }
-}
-
+/* indirect loops with casting */
 /*UFUNC_API*/
 NPY_NO_EXPORT void
 PyUFunc_F_F_As_D_D(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
 {
-    cdoubleUnaryFunc *f = (cdoubleUnaryFunc *)func;
+    typedef void func_type(npy_cdouble *, npy_cdouble *);
+    func_type *f = (func_type *)func;
     UNARY_LOOP {
         npy_cdouble tmp, out;
         tmp.real = (double)((float *)ip1)[0];
@@ -270,22 +183,10 @@ PyUFunc_F_F_As_D_D(char **args, npy_intp *dimensions, npy_intp *steps, void *fun
 
 /*UFUNC_API*/
 NPY_NO_EXPORT void
-PyUFunc_FF_F(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
-{
-    cfloatBinaryFunc *f = (cfloatBinaryFunc *)func;
-    BINARY_LOOP {
-        npy_cfloat in1 = *(npy_cfloat *)ip1;
-        npy_cfloat in2 = *(npy_cfloat *)ip2;
-        npy_cfloat *out = (npy_cfloat *)op1;
-        f(&in1, &in2, out);
-    }
-}
-
-/*UFUNC_API*/
-NPY_NO_EXPORT void
 PyUFunc_FF_F_As_DD_D(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
 {
-    cdoubleBinaryFunc *f = (cdoubleBinaryFunc *)func;
+    typedef void func_type(npy_cdouble *, npy_cdouble *, npy_cdouble *);
+    func_type *f = (func_type *)func;
     BINARY_LOOP {
         npy_cdouble tmp1, tmp2, out;
         tmp1.real = (double)((float *)ip1)[0];
@@ -298,56 +199,6 @@ PyUFunc_FF_F_As_DD_D(char **args, npy_intp *dimensions, npy_intp *steps, void *f
     }
 }
 
-/*UFUNC_API*/
-NPY_NO_EXPORT void
-PyUFunc_D_D(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
-{
-    cdoubleUnaryFunc *f = (cdoubleUnaryFunc *)func;
-    UNARY_LOOP {
-        npy_cdouble in1 = *(npy_cdouble *)ip1;
-        npy_cdouble *out = (npy_cdouble *)op1;
-        f(&in1, out);
-    }
-}
-
-/*UFUNC_API*/
-NPY_NO_EXPORT void
-PyUFunc_DD_D(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
-{
-    cdoubleBinaryFunc *f = (cdoubleBinaryFunc *)func;
-    BINARY_LOOP {
-        npy_cdouble in1 = *(npy_cdouble *)ip1;
-        npy_cdouble in2 = *(npy_cdouble *)ip2;
-        npy_cdouble *out = (npy_cdouble *)op1;
-        f(&in1, &in2, out);
-    }
-}
-
-/*UFUNC_API*/
-NPY_NO_EXPORT void
-PyUFunc_G_G(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
-{
-    clongdoubleUnaryFunc *f = (clongdoubleUnaryFunc *)func;
-    UNARY_LOOP {
-        npy_clongdouble in1 = *(npy_clongdouble *)ip1;
-        npy_clongdouble *out = (npy_clongdouble *)op1;
-        f(&in1, out);
-    }
-}
-
-/*UFUNC_API*/
-NPY_NO_EXPORT void
-PyUFunc_GG_G(char **args, npy_intp *dimensions, npy_intp *steps, void *func)
-{
-    clongdoubleBinaryFunc *f = (clongdoubleBinaryFunc *)func;
-    BINARY_LOOP {
-        npy_clongdouble in1 = *(npy_clongdouble *)ip1;
-        npy_clongdouble in2 = *(npy_clongdouble *)ip2;
-        npy_clongdouble *out = (npy_clongdouble *)op1;
-        f(&in1, &in2, out);
-    }
-}
-
 
 /******************************************************************************
  **                         GENERIC OBJECT lOOPS                             **
@@ -1246,6 +1097,12 @@ NPY_NO_EXPORT void
 }
 
 NPY_NO_EXPORT void
+@TYPE@_isinf(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
+{
+    UNARY_LOOP_FAST(npy_bool, npy_bool, (void)in; *out = NPY_FALSE);
+}
+
+NPY_NO_EXPORT void
 @TYPE@__ones_like(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data))
 {
     OUTPUT_LOOP {
@@ -1294,13 +1151,36 @@ NPY_NO_EXPORT void
         const @type@ in1 = *(@type@ *)ip1;
         const @type@ in2 = *(@type@ *)ip2;
         if (in1 == NPY_DATETIME_NAT) {
+            *((@type@ *)op1) = in1;
+        }
+        else if (in2 == NPY_DATETIME_NAT) {
+            *((@type@ *)op1) = in2;
+        }
+        else {
+            *((@type@ *)op1) = (in1 @OP@ in2) ? in1 : in2;
+        }
+    }
+}
+/**end repeat1**/
+
+/**begin repeat1
+ * #kind = fmax, fmin#
+ * #OP =  >=, <=#
+ **/
+NPY_NO_EXPORT void
+@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
+{
+    BINARY_LOOP {
+        const @type@ in1 = *(@type@ *)ip1;
+        const @type@ in2 = *(@type@ *)ip2;
+        if (in1 == NPY_DATETIME_NAT) {
             *((@type@ *)op1) = in2;
         }
         else if (in2 == NPY_DATETIME_NAT) {
             *((@type@ *)op1) = in1;
         }
         else {
-            *((@type@ *)op1) = (in1 @OP@ in2) ? in1 : in2;
+            *((@type@ *)op1) = in1 @OP@ in2 ? in1 : in2;
         }
     }
 }
diff --git a/numpy/core/src/umath/loops.h.src b/numpy/core/src/umath/loops.h.src
index e98a1ac3c..7558de0bb 100644
--- a/numpy/core/src/umath/loops.h.src
+++ b/numpy/core/src/umath/loops.h.src
@@ -7,14 +7,12 @@
 #define _NPY_UMATH_LOOPS_H_
 
 #define BOOL_invert BOOL_logical_not
-#define BOOL_negative BOOL_logical_not
 #define BOOL_add BOOL_logical_or
 #define BOOL_bitwise_and BOOL_logical_and
 #define BOOL_bitwise_or BOOL_logical_or
 #define BOOL_logical_xor BOOL_not_equal
 #define BOOL_bitwise_xor BOOL_logical_xor
 #define BOOL_multiply BOOL_logical_and
-#define BOOL_subtract BOOL_logical_xor
 #define BOOL_maximum BOOL_logical_or
 #define BOOL_minimum BOOL_logical_and
 #define BOOL_fmax BOOL_maximum
@@ -480,6 +478,11 @@ NPY_NO_EXPORT void
 @TYPE@_isfinite(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func));
 
 NPY_NO_EXPORT void
+@TYPE@_isinf(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func));
+
+#define @TYPE@_isnan @TYPE@_isnat
+
+NPY_NO_EXPORT void
 @TYPE@__ones_like(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data));
 
 /**begin repeat1
@@ -491,8 +494,7 @@ NPY_NO_EXPORT void
 /**end repeat1**/
 
 /**begin repeat1
- * #kind = maximum, minimum#
- * #OP =  >, <#
+ * #kind = maximum, minimum, fmin, fmax#
  **/
 NPY_NO_EXPORT void
 @TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func));
@@ -556,10 +558,6 @@ TIMEDELTA_mm_qm_divmod(char **args, npy_intp *dimensions, npy_intp *steps, void
 #define TIMEDELTA_mq_m_floor_divide TIMEDELTA_mq_m_divide
 #define TIMEDELTA_md_m_floor_divide TIMEDELTA_md_m_divide
 /* #define TIMEDELTA_mm_d_floor_divide TIMEDELTA_mm_d_divide */
-#define TIMEDELTA_fmin TIMEDELTA_minimum
-#define TIMEDELTA_fmax TIMEDELTA_maximum
-#define DATETIME_fmin DATETIME_minimum
-#define DATETIME_fmax DATETIME_maximum
 
 /*
  *****************************************************************************
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index e4ad3dc84..1dc581977 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -4058,8 +4058,8 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind,
     for (i = 0; i < ind_size; ++i) {
         if (reduceat_ind[i] < 0 || reduceat_ind[i] >= red_axis_size) {
             PyErr_Format(PyExc_IndexError,
-                "index %d out-of-bounds in %s.%s [0, %d)",
-                (int)reduceat_ind[i], ufunc_name, opname, (int)red_axis_size);
+                "index %" NPY_INTP_FMT " out-of-bounds in %s.%s [0, %" NPY_INTP_FMT ")",
+                reduceat_ind[i], ufunc_name, opname, red_axis_size);
             return NULL;
         }
     }
diff --git a/numpy/core/src/umath/ufunc_type_resolution.c b/numpy/core/src/umath/ufunc_type_resolution.c
index 9be7b63a0..f93d8229e 100644
--- a/numpy/core/src/umath/ufunc_type_resolution.c
+++ b/numpy/core/src/umath/ufunc_type_resolution.c
@@ -883,7 +883,7 @@ PyUFunc_SubtractionTypeResolver(PyUFuncObject *ufunc,
         /* The type resolver would have upcast already */
         if (out_dtypes[0]->type_num == NPY_BOOL) {
             PyErr_Format(PyExc_TypeError,
-                "numpy boolean subtract, the `-` operator, is deprecated, "
+                "numpy boolean subtract, the `-` operator, is not supported, "
                 "use the bitwise_xor, the `^` operator, or the logical_xor "
                 "function instead.");
             return -1;
diff --git a/numpy/core/tests/test_api.py b/numpy/core/tests/test_api.py
index 32e2ea537..89fc2b0b9 100644
--- a/numpy/core/tests/test_api.py
+++ b/numpy/core/tests/test_api.py
@@ -296,7 +296,7 @@ def test_array_astype():
 )
 def test_array_astype_warning(t):
     # test ComplexWarning when casting from complex to float or int
-    a = np.array(10, dtype=np.complex)
+    a = np.array(10, dtype=np.complex_)
     assert_warns(np.ComplexWarning, a.astype, t)
 
 def test_copyto_fromscalar():
diff --git a/numpy/core/tests/test_datetime.py b/numpy/core/tests/test_datetime.py
index f99c0f72b..e8ffbbb9d 100644
--- a/numpy/core/tests/test_datetime.py
+++ b/numpy/core/tests/test_datetime.py
@@ -483,6 +483,30 @@ class TestDateTime(object):
         assert_equal(np.datetime64(a, '[Y]'), np.datetime64('NaT', '[Y]'))
         assert_equal(np.datetime64(a, '[W]'), np.datetime64('NaT', '[W]'))
 
+        # NaN -> NaT
+        nan = np.array([np.nan] * 8)
+        fnan = nan.astype('f')
+        lnan = nan.astype('g')
+        cnan = nan.astype('D')
+        cfnan = nan.astype('F')
+        clnan = nan.astype('G')
+
+        nat = np.array([np.datetime64('NaT')] * 8)
+        assert_equal(nan.astype('M8[ns]'), nat)
+        assert_equal(fnan.astype('M8[ns]'), nat)
+        assert_equal(lnan.astype('M8[ns]'), nat)
+        assert_equal(cnan.astype('M8[ns]'), nat)
+        assert_equal(cfnan.astype('M8[ns]'), nat)
+        assert_equal(clnan.astype('M8[ns]'), nat)
+
+        nat = np.array([np.timedelta64('NaT')] * 8)
+        assert_equal(nan.astype('timedelta64[ns]'), nat)
+        assert_equal(fnan.astype('timedelta64[ns]'), nat)
+        assert_equal(lnan.astype('timedelta64[ns]'), nat)
+        assert_equal(cnan.astype('timedelta64[ns]'), nat)
+        assert_equal(cfnan.astype('timedelta64[ns]'), nat)
+        assert_equal(clnan.astype('timedelta64[ns]'), nat)
+
     def test_days_creation(self):
         assert_equal(np.array('1599', dtype='M8[D]').astype('i8'),
                 (1600-1970)*365 - (1972-1600)/4 + 3 - 365)
@@ -1333,10 +1357,14 @@ class TestDateTime(object):
         # Interaction with NaT
         a = np.array('1999-03-12T13', dtype='M8[2m]')
         dtnat = np.array('NaT', dtype='M8[h]')
-        assert_equal(np.minimum(a, dtnat), a)
-        assert_equal(np.minimum(dtnat, a), a)
-        assert_equal(np.maximum(a, dtnat), a)
-        assert_equal(np.maximum(dtnat, a), a)
+        assert_equal(np.minimum(a, dtnat), dtnat)
+        assert_equal(np.minimum(dtnat, a), dtnat)
+        assert_equal(np.maximum(a, dtnat), dtnat)
+        assert_equal(np.maximum(dtnat, a), dtnat)
+        assert_equal(np.fmin(dtnat, a), a)
+        assert_equal(np.fmin(a, dtnat), a)
+        assert_equal(np.fmax(dtnat, a), a)
+        assert_equal(np.fmax(a, dtnat), a)
 
         # Also do timedelta
         a = np.array(3, dtype='m8[h]')
@@ -1831,7 +1859,7 @@ class TestDateTime(object):
     def test_timedelta_arange_no_dtype(self):
         d = np.array(5, dtype="m8[D]")
         assert_equal(np.arange(d, d + 1), d)
-        assert_raises(ValueError, np.arange, d)
+        assert_equal(np.arange(d), np.arange(0, d))
 
     def test_datetime_maximum_reduce(self):
         a = np.array(['2010-01-02', '1999-03-14', '1833-03'], dtype='M8[D]')
@@ -2208,7 +2236,7 @@ class TestDateTime(object):
                 continue
             assert_raises(TypeError, np.isnat, np.zeros(10, t))
 
-    def test_isfinite(self):
+    def test_isfinite_scalar(self):
         assert_(not np.isfinite(np.datetime64('NaT', 'ms')))
         assert_(not np.isfinite(np.datetime64('NaT', 'ns')))
         assert_(np.isfinite(np.datetime64('2038-01-19T03:14:07')))
@@ -2216,18 +2244,25 @@ class TestDateTime(object):
         assert_(not np.isfinite(np.timedelta64('NaT', "ms")))
         assert_(np.isfinite(np.timedelta64(34, "ms")))
 
-        res = np.array([True, True,  False])
-        for unit in ['Y', 'M', 'W', 'D',
-                     'h', 'm', 's', 'ms', 'us',
-                     'ns', 'ps', 'fs', 'as']:
-            arr = np.array([123, -321, "NaT"], dtype='<datetime64[%s]' % unit)
-            assert_equal(np.isfinite(arr), res)
-            arr = np.array([123, -321, "NaT"], dtype='>datetime64[%s]' % unit)
-            assert_equal(np.isfinite(arr), res)
-            arr = np.array([123, -321, "NaT"], dtype='<timedelta64[%s]' % unit)
-            assert_equal(np.isfinite(arr), res)
-            arr = np.array([123, -321, "NaT"], dtype='>timedelta64[%s]' % unit)
-            assert_equal(np.isfinite(arr), res)
+    @pytest.mark.parametrize('unit', ['Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms',
+                                      'us', 'ns', 'ps', 'fs', 'as'])
+    @pytest.mark.parametrize('dstr', ['<datetime64[%s]', '>datetime64[%s]',
+                                      '<timedelta64[%s]', '>timedelta64[%s]'])
+    def test_isfinite_isinf_isnan_units(self, unit, dstr):
+        '''check isfinite, isinf, isnan for all units of <M, >M, <m, >m dtypes
+        '''
+        arr_val = [123, -321, "NaT"]
+        arr = np.array(arr_val,  dtype= dstr % unit)
+        pos = np.array([True, True,  False])
+        neg = np.array([False, False,  True])
+        false = np.array([False, False,  False])
+        assert_equal(np.isfinite(arr), pos)
+        assert_equal(np.isinf(arr), false)
+        assert_equal(np.isnan(arr), neg)
+
+    def test_assert_equal(self):
+        assert_raises(AssertionError, assert_equal,
+                np.datetime64('nat'), np.timedelta64('nat'))
 
     def test_corecursive_input(self):
         # construct a co-recursive list
diff --git a/numpy/core/tests/test_deprecations.py b/numpy/core/tests/test_deprecations.py
index 8bffaa9af..363ff26db 100644
--- a/numpy/core/tests/test_deprecations.py
+++ b/numpy/core/tests/test_deprecations.py
@@ -172,7 +172,7 @@ class TestComparisonDeprecations(_DeprecationTestCase):
             # (warning is issued a couple of times here)
             self.assert_deprecated(op, args=(a, a[:-1]), num=None)
 
-            # Element comparison error (numpy array can't be compared).
+            # ragged array comparison returns True/False
             a = np.array([1, np.array([1,2,3])], dtype=object)
             b = np.array([1, np.array([1,2,3])], dtype=object)
             self.assert_deprecated(op, args=(a, b), num=None)
diff --git a/numpy/core/tests/test_dtype.py b/numpy/core/tests/test_dtype.py
index d2fbbae5b..e18e66c64 100644
--- a/numpy/core/tests/test_dtype.py
+++ b/numpy/core/tests/test_dtype.py
@@ -25,7 +25,7 @@ def assert_dtype_not_equal(a, b):
 
 class TestBuiltin(object):
     @pytest.mark.parametrize('t', [int, float, complex, np.int32, str, object,
-                                   np.unicode])
+                                   np.compat.unicode])
     def test_run(self, t):
         """Only test hash runs at all."""
         dt = np.dtype(t)
@@ -986,7 +986,7 @@ class TestPickling(object):
             assert_equal(x[0], y[0])
 
     @pytest.mark.parametrize('t', [int, float, complex, np.int32, str, object,
-                                   np.unicode, bool])
+                                   np.compat.unicode, bool])
     def test_builtin(self, t):
         self.check_pickling(np.dtype(t))
 
diff --git a/numpy/core/tests/test_einsum.py b/numpy/core/tests/test_einsum.py
index cfeeb8a90..1b5b4cb26 100644
--- a/numpy/core/tests/test_einsum.py
+++ b/numpy/core/tests/test_einsum.py
@@ -5,7 +5,7 @@ import itertools
 import numpy as np
 from numpy.testing import (
     assert_, assert_equal, assert_array_equal, assert_almost_equal,
-    assert_raises, suppress_warnings, assert_raises_regex
+    assert_raises, suppress_warnings, assert_raises_regex, assert_allclose
     )
 
 # Setup for optimize einsum
@@ -700,6 +700,14 @@ class TestEinsum(object):
         y2 = x[idx[:, None], idx[:, None], idx, idx]
         assert_equal(y1, y2)
 
+    def test_einsum_failed_on_p9_and_s390x(self):
+        # Issues gh-14692 and gh-12689
+        # Bug with signed vs unsigned char errored on power9 and s390x Linux
+        tensor = np.random.random_sample((10, 10, 10, 10))
+        x = np.einsum('ijij->', tensor)
+        y = tensor.trace(axis1=0, axis2=2).trace()
+        assert_allclose(x, y)
+
     def test_einsum_all_contig_non_contig_output(self):
         # Issue gh-5907, tests that the all contiguous special case
         # actually checks the contiguity of the output
diff --git a/numpy/core/tests/test_function_base.py b/numpy/core/tests/test_function_base.py
index 84b60b19c..c8a7cb6ce 100644
--- a/numpy/core/tests/test_function_base.py
+++ b/numpy/core/tests/test_function_base.py
@@ -351,14 +351,20 @@ class TestLinspace(object):
                          arange(j+1, dtype=int))
 
     def test_retstep(self):
-        y = linspace(0, 1, 2, retstep=True)
-        assert_(isinstance(y, tuple) and len(y) == 2)
-        for num in (0, 1):
-            for ept in (False, True):
+        for num in [0, 1, 2]:
+            for ept in [False, True]:
                 y = linspace(0, 1, num, endpoint=ept, retstep=True)
-                assert_(isinstance(y, tuple) and len(y) == 2 and
-                        len(y[0]) == num and isnan(y[1]),
-                        'num={0}, endpoint={1}'.format(num, ept))
+                assert isinstance(y, tuple) and len(y) == 2
+                if num == 2:
+                    y0_expect = [0.0, 1.0] if ept else [0.0, 0.5]
+                    assert_array_equal(y[0], y0_expect)
+                    assert_equal(y[1], y0_expect[1])
+                elif num == 1 and not ept:
+                    assert_array_equal(y[0], [0.0])
+                    assert_equal(y[1], 1.0)
+                else:
+                    assert_array_equal(y[0], [0.0][:num])
+                    assert isnan(y[1])
 
     def test_object(self):
         start = array(1, dtype='O')
diff --git a/numpy/core/tests/test_issue14735.py b/numpy/core/tests/test_issue14735.py
new file mode 100644
index 000000000..6105c8e6a
--- /dev/null
+++ b/numpy/core/tests/test_issue14735.py
@@ -0,0 +1,29 @@
+import pytest
+import warnings
+import numpy as np
+
+
+class Wrapper:
+    def __init__(self, array):
+        self.array = array
+
+    def __len__(self):
+        return len(self.array)
+
+    def __getitem__(self, item):
+        return type(self)(self.array[item])
+
+    def __getattr__(self, name):
+        if name.startswith("__array_"):
+            warnings.warn("object got converted", UserWarning, stacklevel=1)
+
+        return getattr(self.array, name)
+
+    def __repr__(self):
+        return "<Wrapper({self.array})>".format(self=self)
+
+@pytest.mark.filterwarnings("error")
+def test_getattr_warning():
+    array = Wrapper(np.arange(10))
+    with pytest.raises(UserWarning, match="object got converted"):
+        np.asarray(array)
diff --git a/numpy/core/tests/test_longdouble.py b/numpy/core/tests/test_longdouble.py
index 59ac5923c..2b6e1c5a2 100644
--- a/numpy/core/tests/test_longdouble.py
+++ b/numpy/core/tests/test_longdouble.py
@@ -71,6 +71,38 @@ def test_fromstring():
                  err_msg="reading '%s'" % s)
 
 
+def test_fromstring_complex():
+    for ctype in ["complex", "cdouble", "cfloat"]:
+        # Check spacing between separator
+        assert_equal(np.fromstring("1, 2 ,  3  ,4", sep=",", dtype=ctype),
+                     np.array([1., 2., 3., 4.]))
+        # Real component not specified
+        assert_equal(np.fromstring("1j, -2j,  3j, 4e1j", sep=",", dtype=ctype),
+                     np.array([1.j, -2.j, 3.j, 40.j]))
+        # Both components specified
+        assert_equal(np.fromstring("1+1j,2-2j, -3+3j,  -4e1+4j", sep=",", dtype=ctype),
+                     np.array([1. + 1.j, 2. - 2.j, - 3. + 3.j, - 40. + 4j]))
+        # Spaces at wrong places
+        with assert_warns(DeprecationWarning):
+            assert_equal(np.fromstring("1+2 j,3", dtype=ctype, sep=","),
+                         np.array([1.]))
+        with assert_warns(DeprecationWarning):
+            assert_equal(np.fromstring("1+ 2j,3", dtype=ctype, sep=","),
+                         np.array([1.]))
+        with assert_warns(DeprecationWarning):
+            assert_equal(np.fromstring("1 +2j,3", dtype=ctype, sep=","),
+                         np.array([1.]))
+        with assert_warns(DeprecationWarning):
+            assert_equal(np.fromstring("1+j", dtype=ctype, sep=","),
+                         np.array([1.]))
+        with assert_warns(DeprecationWarning):
+            assert_equal(np.fromstring("1+", dtype=ctype, sep=","),
+                         np.array([1.]))
+        with assert_warns(DeprecationWarning):
+            assert_equal(np.fromstring("1j+1", dtype=ctype, sep=","),
+                         np.array([1j]))
+
+
 def test_fromstring_bogus():
     with assert_warns(DeprecationWarning):
         assert_equal(np.fromstring("1. 2. 3. flop 4.", dtype=float, sep=" "),
@@ -104,6 +136,88 @@ class TestFileBased(object):
                 res = np.fromfile(path, dtype=float, sep=" ")
         assert_equal(res, np.array([1., 2., 3.]))
 
+    def test_fromfile_complex(self):
+        for ctype in ["complex", "cdouble", "cfloat"]:
+            # Check spacing between separator and only real component specified
+            with temppath() as path:
+                with open(path, 'wt') as f:
+                    f.write("1, 2 ,  3  ,4\n")
+
+                res = np.fromfile(path, dtype=ctype, sep=",")
+            assert_equal(res, np.array([1., 2., 3., 4.]))
+
+            # Real component not specified
+            with temppath() as path:
+                with open(path, 'wt') as f:
+                    f.write("1j, -2j,  3j, 4e1j\n")
+
+                res = np.fromfile(path, dtype=ctype, sep=",")
+            assert_equal(res, np.array([1.j, -2.j, 3.j, 40.j]))
+
+            # Both components specified
+            with temppath() as path:
+                with open(path, 'wt') as f:
+                    f.write("1+1j,2-2j, -3+3j,  -4e1+4j\n")
+
+                res = np.fromfile(path, dtype=ctype, sep=",")
+            assert_equal(res, np.array([1. + 1.j, 2. - 2.j, - 3. + 3.j, - 40. + 4j]))
+
+            # Spaces at wrong places
+            with temppath() as path:
+                with open(path, 'wt') as f:
+                    f.write("1+2 j,3\n")
+
+                with assert_warns(DeprecationWarning):
+                    res = np.fromfile(path, dtype=ctype, sep=",")
+            assert_equal(res, np.array([1.]))
+
+            # Spaces at wrong places
+            with temppath() as path:
+                with open(path, 'wt') as f:
+                    f.write("1+ 2j,3\n")
+
+                with assert_warns(DeprecationWarning):
+                    res = np.fromfile(path, dtype=ctype, sep=",")
+            assert_equal(res, np.array([1.]))
+
+            # Spaces at wrong places
+            with temppath() as path:
+                with open(path, 'wt') as f:
+                    f.write("1 +2j,3\n")
+
+                with assert_warns(DeprecationWarning):
+                    res = np.fromfile(path, dtype=ctype, sep=",")
+            assert_equal(res, np.array([1.]))
+
+            # Spaces at wrong places
+            with temppath() as path:
+                with open(path, 'wt') as f:
+                    f.write("1+j\n")
+
+                with assert_warns(DeprecationWarning):
+                    res = np.fromfile(path, dtype=ctype, sep=",")
+            assert_equal(res, np.array([1.]))
+
+            # Spaces at wrong places
+            with temppath() as path:
+                with open(path, 'wt') as f:
+                    f.write("1+\n")
+
+                with assert_warns(DeprecationWarning):
+                    res = np.fromfile(path, dtype=ctype, sep=",")
+            assert_equal(res, np.array([1.]))
+
+            # Spaces at wrong places
+            with temppath() as path:
+                with open(path, 'wt') as f:
+                    f.write("1j+1\n")
+
+                with assert_warns(DeprecationWarning):
+                    res = np.fromfile(path, dtype=ctype, sep=",")
+            assert_equal(res, np.array([1.j]))
+
+
+
     @pytest.mark.skipif(string_to_longdouble_inaccurate,
                         reason="Need strtold_l")
     def test_fromfile(self):
diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py
index 66e3e3c60..218106a63 100644
--- a/numpy/core/tests/test_multiarray.py
+++ b/numpy/core/tests/test_multiarray.py
@@ -20,6 +20,7 @@ import gc
 import weakref
 import pytest
 from contextlib import contextmanager
+from test.support import no_tracing
 
 from numpy.compat import pickle
 
@@ -964,7 +965,7 @@ class TestCreation(object):
 
     @pytest.mark.skipif(sys.version_info[0] >= 3, reason="Not Python 2")
     def test_sequence_long(self):
-        assert_equal(np.array([long(4), long(4)]).dtype, np.long)
+        assert_equal(np.array([long(4), long(4)]).dtype, long)
         assert_equal(np.array([long(4), 2**80]).dtype, object)
         assert_equal(np.array([long(4), 2**80, long(4)]).dtype, object)
         assert_equal(np.array([2**80, long(4)]).dtype, object)
@@ -1807,7 +1808,7 @@ class TestMethods(object):
 
         # test unicode sorts.
         s = 'aaaaaaaa'
-        a = np.array([s + chr(i) for i in range(101)], dtype=np.unicode)
+        a = np.array([s + chr(i) for i in range(101)], dtype=np.unicode_)
         b = a[::-1].copy()
         for kind in self.sort_kinds:
             msg = "unicode sort, kind=%s" % kind
@@ -2059,7 +2060,7 @@ class TestMethods(object):
 
         # test unicode argsorts.
         s = 'aaaaaaaa'
-        a = np.array([s + chr(i) for i in range(101)], dtype=np.unicode)
+        a = np.array([s + chr(i) for i in range(101)], dtype=np.unicode_)
         b = a[::-1]
         r = np.arange(101)
         rr = r[::-1]
@@ -2142,7 +2143,7 @@ class TestMethods(object):
         a = np.array(['aaaaaaaaa' for i in range(100)])
         assert_equal(a.argsort(kind='m'), r)
         # unicode
-        a = np.array(['aaaaaaaaa' for i in range(100)], dtype=np.unicode)
+        a = np.array(['aaaaaaaaa' for i in range(100)], dtype=np.unicode_)
         assert_equal(a.argsort(kind='m'), r)
 
     def test_sort_unicode_kind(self):
@@ -2271,7 +2272,7 @@ class TestMethods(object):
                       'P:\\20x_dapi_cy3\\20x_dapi_cy3_20100197_1',
                       'P:\\20x_dapi_cy3\\20x_dapi_cy3_20100198_1',
                       'P:\\20x_dapi_cy3\\20x_dapi_cy3_20100199_1'],
-                     dtype=np.unicode)
+                     dtype=np.unicode_)
         ind = np.arange(len(a))
         assert_equal([a.searchsorted(v, 'left') for v in a], ind)
         assert_equal([a.searchsorted(v, 'right') for v in a], ind + 1)
@@ -4105,17 +4106,17 @@ class TestArgmax(object):
           np.datetime64('2010-01-03T05:14:12'),
           np.datetime64('NaT'),
           np.datetime64('2015-09-23T10:10:13'),
-          np.datetime64('1932-10-10T03:50:30')], 4),
+          np.datetime64('1932-10-10T03:50:30')], 0),
         ([np.datetime64('2059-03-14T12:43:12'),
           np.datetime64('1996-09-21T14:43:15'),
           np.datetime64('NaT'),
           np.datetime64('2022-12-25T16:02:16'),
           np.datetime64('1963-10-04T03:14:12'),
-          np.datetime64('2013-05-08T18:15:23')], 0),
+          np.datetime64('2013-05-08T18:15:23')], 2),
         ([np.timedelta64(2, 's'),
           np.timedelta64(1, 's'),
           np.timedelta64('NaT', 's'),
-          np.timedelta64(3, 's')], 3),
+          np.timedelta64(3, 's')], 2),
         ([np.timedelta64('NaT', 's')] * 3, 0),
 
         ([timedelta(days=5, seconds=14), timedelta(days=2, seconds=35),
@@ -4240,17 +4241,17 @@ class TestArgmin(object):
           np.datetime64('2010-01-03T05:14:12'),
           np.datetime64('NaT'),
           np.datetime64('2015-09-23T10:10:13'),
-          np.datetime64('1932-10-10T03:50:30')], 5),
+          np.datetime64('1932-10-10T03:50:30')], 0),
         ([np.datetime64('2059-03-14T12:43:12'),
           np.datetime64('1996-09-21T14:43:15'),
           np.datetime64('NaT'),
           np.datetime64('2022-12-25T16:02:16'),
           np.datetime64('1963-10-04T03:14:12'),
-          np.datetime64('2013-05-08T18:15:23')], 4),
+          np.datetime64('2013-05-08T18:15:23')], 2),
         ([np.timedelta64(2, 's'),
           np.timedelta64(1, 's'),
           np.timedelta64('NaT', 's'),
-          np.timedelta64(3, 's')], 1),
+          np.timedelta64(3, 's')], 2),
         ([np.timedelta64('NaT', 's')] * 3, 0),
 
         ([timedelta(days=5, seconds=14), timedelta(days=2, seconds=35),
@@ -4366,18 +4367,14 @@ class TestMinMax(object):
         assert_equal(np.amax([[1, 2, 3]], axis=1), 3)
 
     def test_datetime(self):
-        # NaTs are ignored
+        # Do not ignore NaT
         for dtype in ('m8[s]', 'm8[Y]'):
             a = np.arange(10).astype(dtype)
-            a[3] = 'NaT'
             assert_equal(np.amin(a), a[0])
             assert_equal(np.amax(a), a[9])
-            a[0] = 'NaT'
-            assert_equal(np.amin(a), a[1])
-            assert_equal(np.amax(a), a[9])
-            a.fill('NaT')
-            assert_equal(np.amin(a), a[0])
-            assert_equal(np.amax(a), a[0])
+            a[3] = 'NaT'
+            assert_equal(np.amin(a), a[3])
+            assert_equal(np.amax(a), a[3])
 
 
 class TestNewaxis(object):
@@ -5123,6 +5120,8 @@ class TestFlat(object):
 
 
 class TestResize(object):
+
+    @no_tracing
     def test_basic(self):
         x = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
         if IS_PYPY:
@@ -5139,6 +5138,7 @@ class TestResize(object):
         assert_raises(ValueError, x.resize, (5, 1))
         del y  # avoid pyflakes unused variable warning.
 
+    @no_tracing
     def test_int_shape(self):
         x = np.eye(3)
         if IS_PYPY:
@@ -5172,6 +5172,7 @@ class TestResize(object):
         assert_raises(TypeError, np.eye(3).resize, order=1)
         assert_raises(TypeError, np.eye(3).resize, refcheck='hi')
 
+    @no_tracing
     def test_freeform_shape(self):
         x = np.eye(3)
         if IS_PYPY:
@@ -5180,6 +5181,7 @@ class TestResize(object):
             x.resize(3, 2, 1)
         assert_(x.shape == (3, 2, 1))
 
+    @no_tracing
     def test_zeros_appended(self):
         x = np.eye(3)
         if IS_PYPY:
@@ -5189,6 +5191,7 @@ class TestResize(object):
         assert_array_equal(x[0], np.eye(3))
         assert_array_equal(x[1], np.zeros((3, 3)))
 
+    @no_tracing
     def test_obj_obj(self):
         # check memory is initialized on resize, gh-4857
         a = np.ones(10, dtype=[('k', object, 2)])
@@ -7800,6 +7803,7 @@ if not IS_PYPY:
             d = np.ones(100)
             assert_(sys.getsizeof(d) < sys.getsizeof(d.reshape(100, 1, 1).copy()))
 
+        @no_tracing
         def test_resize(self):
             d = np.ones(100)
             old = sys.getsizeof(d)
@@ -7934,20 +7938,20 @@ class TestBytestringArrayNonzero(object):
 class TestUnicodeArrayNonzero(object):
 
     def test_empty_ustring_array_is_falsey(self):
-        assert_(not np.array([''], dtype=np.unicode))
+        assert_(not np.array([''], dtype=np.unicode_))
 
     def test_whitespace_ustring_array_is_falsey(self):
-        a = np.array(['eggs'], dtype=np.unicode)
+        a = np.array(['eggs'], dtype=np.unicode_)
         a[0] = '  \0\0'
         assert_(not a)
 
     def test_all_null_ustring_array_is_falsey(self):
-        a = np.array(['eggs'], dtype=np.unicode)
+        a = np.array(['eggs'], dtype=np.unicode_)
         a[0] = '\0\0\0\0'
         assert_(not a)
 
     def test_null_inside_ustring_array_is_truthy(self):
-        a = np.array(['eggs'], dtype=np.unicode)
+        a = np.array(['eggs'], dtype=np.unicode_)
         a[0] = ' \0 \0'
         assert_(a)
 
diff --git a/numpy/core/tests/test_nditer.py b/numpy/core/tests/test_nditer.py
index cf66751f8..daec9ce6d 100644
--- a/numpy/core/tests/test_nditer.py
+++ b/numpy/core/tests/test_nditer.py
@@ -2104,7 +2104,7 @@ def test_iter_buffering_string():
     assert_equal(i[0], b'abc')
     assert_equal(i[0].dtype, np.dtype('S6'))
 
-    a = np.array(['abc', 'a', 'abcd'], dtype=np.unicode)
+    a = np.array(['abc', 'a', 'abcd'], dtype=np.unicode_)
     assert_equal(a.dtype, np.dtype('U4'))
     assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
                     op_dtypes='U2')
diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py
index 1358b45e9..ffebdf648 100644
--- a/numpy/core/tests/test_numeric.py
+++ b/numpy/core/tests/test_numeric.py
@@ -2567,6 +2567,11 @@ class TestCorrelate(object):
         z = np.correlate(y, x, mode='full')
         assert_array_almost_equal(z, r_z)
 
+    def test_zero_size(self):
+        with pytest.raises(ValueError):
+            np.correlate(np.array([]), np.ones(1000), mode='full')
+        with pytest.raises(ValueError):
+            np.correlate(np.ones(1000), np.array([]), mode='full')
 
 class TestConvolve(object):
     def test_object(self):
@@ -2948,7 +2953,7 @@ class TestIndices(object):
         assert_array_equal(x, np.array([[0], [1], [2], [3]]))
         assert_array_equal(y, np.array([[0, 1, 2]]))
 
-    @pytest.mark.parametrize("dtype", [np.int, np.float32, np.float64])
+    @pytest.mark.parametrize("dtype", [np.int32, np.int64, np.float32, np.float64])
     @pytest.mark.parametrize("dims", [(), (0,), (4, 3)])
     def test_return_type(self, dtype, dims):
         inds = np.indices(dims, dtype=dtype)
diff --git a/numpy/core/tests/test_regression.py b/numpy/core/tests/test_regression.py
index 9dc231deb..d5de0f2b2 100644
--- a/numpy/core/tests/test_regression.py
+++ b/numpy/core/tests/test_regression.py
@@ -17,6 +17,7 @@ from numpy.testing import (
         _assert_valid_refcount, HAS_REFCOUNT,
         )
 from numpy.compat import asbytes, asunicode, long, pickle
+from test.support import no_tracing
 
 try:
     RecursionError
@@ -1316,6 +1317,7 @@ class TestRegression(object):
             assert_(pickle.loads(
                 pickle.dumps(test_record, protocol=proto)) == test_record)
 
+    @no_tracing
     def test_blasdot_uninitialized_memory(self):
         # Ticket #950
         for m in [0, 1, 2]:
@@ -1511,7 +1513,7 @@ class TestRegression(object):
             min //= -1
 
         with np.errstate(divide="ignore"):
-            for t in (np.int8, np.int16, np.int32, np.int64, int, np.long):
+            for t in (np.int8, np.int16, np.int32, np.int64, int, np.compat.long):
                 test_type(t)
 
     def test_buffer_hashlib(self):
@@ -2112,7 +2114,7 @@ class TestRegression(object):
         # Ticket #1578, the mismatch only showed up when running
         # python-debug for python versions >= 2.7, and then as
         # a core dump and error message.
-        a = np.array(['abc'], dtype=np.unicode)[0]
+        a = np.array(['abc'], dtype=np.unicode_)[0]
         del a
 
     def test_refcount_error_in_clip(self):
diff --git a/numpy/core/tests/test_scalarinherit.py b/numpy/core/tests/test_scalarinherit.py
index 9e32cf624..6a5c4fde9 100644
--- a/numpy/core/tests/test_scalarinherit.py
+++ b/numpy/core/tests/test_scalarinherit.py
@@ -68,8 +68,7 @@ class TestCharacter(object):
     def test_char_repeat(self):
         np_s = np.string_('abc')
         np_u = np.unicode_('abc')
-        np_i = np.int(5)
         res_s = b'abc' * 5
         res_u = u'abc' * 5
-        assert_(np_s * np_i == res_s)
-        assert_(np_u * np_i == res_u)
+        assert_(np_s * 5 == res_s)
+        assert_(np_u * 5 == res_u)
diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py
index 707c690dd..526925ece 100644
--- a/numpy/core/tests/test_ufunc.py
+++ b/numpy/core/tests/test_ufunc.py
@@ -980,7 +980,7 @@ class TestUfunc(object):
         assert_array_equal(out, mm_row_col_vec.squeeze())
 
     def test_matrix_multiply(self):
-        self.compare_matrix_multiply_results(np.long)
+        self.compare_matrix_multiply_results(np.int64)
         self.compare_matrix_multiply_results(np.double)
 
     def test_matrix_multiply_umath_empty(self):
@@ -1092,7 +1092,6 @@ class TestUfunc(object):
         arr0d = np.array(HasComparisons())
         assert_equal(arr0d == arr0d, True)
         assert_equal(np.equal(arr0d, arr0d), True)  # normal behavior is a cast
-        assert_equal(np.equal(arr0d, arr0d, dtype=object), '==')
 
         arr1d = np.array([HasComparisons()])
         assert_equal(arr1d == arr1d, np.array([True]))
diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py
index 9b4ce9e47..e892e81d2 100644
--- a/numpy/core/tests/test_umath.py
+++ b/numpy/core/tests/test_umath.py
@@ -170,7 +170,7 @@ class TestOut(object):
 
 class TestComparisons(object):
     def test_ignore_object_identity_in_equal(self):
-        # Check error raised when comparing identical objects whose comparison
+        # Check comparing identical objects whose comparison
         # is not a simple boolean, e.g., arrays that are compared elementwise.
         a = np.array([np.array([1, 2, 3]), None], dtype=object)
         assert_raises(ValueError, np.equal, a, a)
@@ -188,7 +188,7 @@ class TestComparisons(object):
         assert_equal(np.equal(a, a), [False])
 
     def test_ignore_object_identity_in_not_equal(self):
-        # Check error raised when comparing identical objects whose comparison
+        # Check comparing identical objects whose comparison
         # is not a simple boolean, e.g., arrays that are compared elementwise.
         a = np.array([np.array([1, 2, 3]), None], dtype=object)
         assert_raises(ValueError, np.not_equal, a, a)
@@ -792,7 +792,7 @@ class TestAVXFloat32Transcendental(object):
     def test_sincos_float32(self):
         np.random.seed(42)
         N = 1000000
-        M = np.int(N/20)
+        M = np.int_(N/20)
         index = np.random.randint(low=0, high=N, size=M)
         x_f32 = np.float32(np.random.uniform(low=-100.,high=100.,size=N))
         # test coverage for elements > 117435.992f for which glibc is used
diff --git a/numpy/core/tests/test_umath_accuracy.py b/numpy/core/tests/test_umath_accuracy.py
index 0bab04df2..fec180786 100644
--- a/numpy/core/tests/test_umath_accuracy.py
+++ b/numpy/core/tests/test_umath_accuracy.py
@@ -38,7 +38,7 @@ class TestAccuracy(object):
                 with open(filepath) as fid:
                     file_without_comments = (r for r in fid if not r[0] in ('$', '#'))
                 data = np.genfromtxt(file_without_comments,
-                                     dtype=('|S39','|S39','|S39',np.int),
+                                     dtype=('|S39','|S39','|S39',int),
                                      names=('type','input','output','ulperr'),
                                      delimiter=',',
                                      skip_header=1)
diff --git a/numpy/distutils/ccompiler.py b/numpy/distutils/ccompiler.py
index 643879023..684c7535b 100644
--- a/numpy/distutils/ccompiler.py
+++ b/numpy/distutils/ccompiler.py
@@ -532,6 +532,11 @@ def CCompiler_customize(self, dist, need_cxx=0):
                                       'g++' in self.compiler[0] or
                                       'clang' in self.compiler[0]):
         self._auto_depends = True
+        if 'gcc' in self.compiler[0]:
+            # add std=c99 flag for gcc
+            # TODO: does this need to be more specific?
+            self.compiler.append('-std=c99')
+            self.compiler_so.append('-std=c99')
     elif os.name == 'posix':
         import tempfile
         import shutil
diff --git a/numpy/distutils/system_info.py b/numpy/distutils/system_info.py
index 5fd1003ab..c2b3e118b 100644
--- a/numpy/distutils/system_info.py
+++ b/numpy/distutils/system_info.py
@@ -156,7 +156,7 @@ from numpy.distutils.misc_util import (is_sequence, is_string,
                                        get_shared_lib_extension)
 from numpy.distutils.command.config import config as cmd_config
 from numpy.distutils.compat import get_exception
-from numpy.distutils import customized_ccompiler
+from numpy.distutils import customized_ccompiler as _customized_ccompiler
 from numpy.distutils import _shell_utils
 import distutils.ccompiler
 import tempfile
@@ -169,6 +169,15 @@ _bits = {'32bit': 32, '64bit': 64}
 platform_bits = _bits[platform.architecture()[0]]
 
 
+global_compiler = None
+
+def customized_ccompiler():
+    global global_compiler
+    if not global_compiler:
+        global_compiler = _customized_ccompiler()
+    return global_compiler
+
+
 def _c_string_literal(s):
     """
     Convert a python string into a literal suitable for inclusion into C code
@@ -1580,7 +1589,7 @@ def get_atlas_version(**config):
             log.info('Status: %d', s)
             log.info('Output: %s', o)
 
-    if atlas_version == '3.2.1_pre3.3.6':
+    elif atlas_version == '3.2.1_pre3.3.6':
         dict_append(info, define_macros=[('NO_ATLAS_INFO', -2)])
     else:
         dict_append(info, define_macros=[(
diff --git a/numpy/doc/basics.py b/numpy/doc/basics.py
index 1871512bf..c05f347a1 100644
--- a/numpy/doc/basics.py
+++ b/numpy/doc/basics.py
@@ -18,7 +18,7 @@ The primitive types supported are tied closely to those in C:
       - C type
       - Description
 
-    * - `np.bool`
+    * - `np.bool_`
       - ``bool``
       - Boolean (True or False) stored as a byte
 
@@ -283,7 +283,7 @@ NumPy provides `numpy.iinfo` and `numpy.finfo` to verify the
 minimum or maximum values of NumPy integer and floating point values
 respectively ::
 
-    >>> np.iinfo(np.int) # Bounds of the default integer on this system.
+    >>> np.iinfo(int) # Bounds of the default integer on this system.
     iinfo(min=-9223372036854775808, max=9223372036854775807, dtype=int64)
     >>> np.iinfo(np.int32) # Bounds of a 32-bit integer
     iinfo(min=-2147483648, max=2147483647, dtype=int32)
diff --git a/numpy/fft/__init__.py b/numpy/fft/__init__.py
index fe95d8b17..37b3f0da6 100644
--- a/numpy/fft/__init__.py
+++ b/numpy/fft/__init__.py
@@ -118,8 +118,16 @@ The inverse DFT is defined as
 It differs from the forward transform by the sign of the exponential
 argument and the default normalization by :math:`1/n`.
 
+Type Promotion
+--------------
+
+`numpy.fft` promotes ``float32`` and ``complex64`` arrays to ``float64`` and
+``complex128`` arrays respectively. For an FFT implementation that does not
+promote input arrays, see `scipy.fftpack`.
+
 Normalization
 -------------
+
 The default normalization has the direct transforms unscaled and the inverse
 transforms are scaled by :math:`1/n`. It is possible to obtain unitary
 transforms by setting the keyword argument ``norm`` to ``"ortho"`` (default is
diff --git a/numpy/lib/financial.py b/numpy/lib/financial.py
index d72384e99..a011e52a9 100644
--- a/numpy/lib/financial.py
+++ b/numpy/lib/financial.py
@@ -12,6 +12,7 @@ otherwise stated.
 """
 from __future__ import division, absolute_import, print_function
 
+import warnings
 from decimal import Decimal
 import functools
 
@@ -19,6 +20,10 @@ import numpy as np
 from numpy.core import overrides
 
 
+_depmsg = ("numpy.{name} is deprecated and will be removed from NumPy 1.20. "
+           "Use numpy_financial.{name} instead "
+           "(https://pypi.org/project/numpy-financial/).")
+
 array_function_dispatch = functools.partial(
     overrides.array_function_dispatch, module='numpy')
 
@@ -45,6 +50,8 @@ def _convert_when(when):
 
 
 def _fv_dispatcher(rate, nper, pmt, pv, when=None):
+    warnings.warn(_depmsg.format(name='fv'),
+                  DeprecationWarning, stacklevel=3)
     return (rate, nper, pmt, pv)
 
 
@@ -53,6 +60,12 @@ def fv(rate, nper, pmt, pv, when='end'):
     """
     Compute the future value.
 
+    .. deprecated:: 1.18
+
+       `fv` is deprecated; for details, see NEP 32 [1]_.
+       Use the corresponding function in the numpy-financial library,
+       https://pypi.org/project/numpy-financial.
+
     Given:
      * a present value, `pv`
      * an interest `rate` compounded once per period, of which
@@ -100,7 +113,9 @@ def fv(rate, nper, pmt, pv, when='end'):
 
     References
     ----------
-    .. [WRW] Wheeler, D. A., E. Rathke, and R. Weir (Eds.) (2009, May).
+    .. [1] NumPy Enhancement Proposal (NEP) 32,
+       https://numpy.org/neps/nep-0032-remove-financial-functions.html
+    .. [2] Wheeler, D. A., E. Rathke, and R. Weir (Eds.) (2009, May).
        Open Document Format for Office Applications (OpenDocument)v1.2,
        Part 2: Recalculated Formula (OpenFormula) Format - Annotated Version,
        Pre-Draft 12. Organization for the Advancement of Structured Information
@@ -109,6 +124,7 @@ def fv(rate, nper, pmt, pv, when='end'):
        http://www.oasis-open.org/committees/documents.php?wg_abbrev=office-formula
        OpenDocument-formula-20090508.odt
 
+
     Examples
     --------
     What is the future value after 10 years of saving $100 now, with
@@ -139,6 +155,8 @@ def fv(rate, nper, pmt, pv, when='end'):
 
 
 def _pmt_dispatcher(rate, nper, pv, fv=None, when=None):
+    warnings.warn(_depmsg.format(name='pmt'),
+                  DeprecationWarning, stacklevel=3)
     return (rate, nper, pv, fv)
 
 
@@ -147,6 +165,12 @@ def pmt(rate, nper, pv, fv=0, when='end'):
     """
     Compute the payment against loan principal plus interest.
 
+    .. deprecated:: 1.18
+
+       `pmt` is deprecated; for details, see NEP 32 [1]_.
+       Use the corresponding function in the numpy-financial library,
+       https://pypi.org/project/numpy-financial.
+
     Given:
      * a present value, `pv` (e.g., an amount borrowed)
      * a future value, `fv` (e.g., 0)
@@ -204,7 +228,9 @@ def pmt(rate, nper, pv, fv=0, when='end'):
 
     References
     ----------
-    .. [WRW] Wheeler, D. A., E. Rathke, and R. Weir (Eds.) (2009, May).
+    .. [1] NumPy Enhancement Proposal (NEP) 32,
+       https://numpy.org/neps/nep-0032-remove-financial-functions.html
+    .. [2] Wheeler, D. A., E. Rathke, and R. Weir (Eds.) (2009, May).
        Open Document Format for Office Applications (OpenDocument)v1.2,
        Part 2: Recalculated Formula (OpenFormula) Format - Annotated Version,
        Pre-Draft 12. Organization for the Advancement of Structured Information
@@ -237,6 +263,8 @@ def pmt(rate, nper, pv, fv=0, when='end'):
 
 
 def _nper_dispatcher(rate, pmt, pv, fv=None, when=None):
+    warnings.warn(_depmsg.format(name='nper'),
+                  DeprecationWarning, stacklevel=3)
     return (rate, pmt, pv, fv)
 
 
@@ -245,6 +273,12 @@ def nper(rate, pmt, pv, fv=0, when='end'):
     """
     Compute the number of periodic payments.
 
+    .. deprecated:: 1.18
+
+       `nper` is deprecated; for details, see NEP 32 [1]_.
+       Use the corresponding function in the numpy-financial library,
+       https://pypi.org/project/numpy-financial.
+
     :class:`decimal.Decimal` type is not supported.
 
     Parameters
@@ -270,6 +304,11 @@ def nper(rate, pmt, pv, fv=0, when='end'):
 
      fv + pv + pmt*nper = 0
 
+    References
+    ----------
+    .. [1] NumPy Enhancement Proposal (NEP) 32,
+       https://numpy.org/neps/nep-0032-remove-financial-functions.html
+
     Examples
     --------
     If you only had $150/month to pay towards the loan, how long would it take
@@ -311,6 +350,8 @@ def nper(rate, pmt, pv, fv=0, when='end'):
 
 
 def _ipmt_dispatcher(rate, per, nper, pv, fv=None, when=None):
+    warnings.warn(_depmsg.format(name='ipmt'),
+                  DeprecationWarning, stacklevel=3)
     return (rate, per, nper, pv, fv)
 
 
@@ -319,6 +360,12 @@ def ipmt(rate, per, nper, pv, fv=0, when='end'):
     """
     Compute the interest portion of a payment.
 
+    .. deprecated:: 1.18
+
+       `ipmt` is deprecated; for details, see NEP 32 [1]_.
+       Use the corresponding function in the numpy-financial library,
+       https://pypi.org/project/numpy-financial.
+
     Parameters
     ----------
     rate : scalar or array_like of shape(M, )
@@ -354,6 +401,11 @@ def ipmt(rate, per, nper, pv, fv=0, when='end'):
 
     ``pmt = ppmt + ipmt``
 
+    References
+    ----------
+    .. [1] NumPy Enhancement Proposal (NEP) 32,
+       https://numpy.org/neps/nep-0032-remove-financial-functions.html
+
     Examples
     --------
     What is the amortization schedule for a 1 year loan of $2500 at
@@ -422,6 +474,8 @@ def _rbl(rate, per, pmt, pv, when):
 
 
 def _ppmt_dispatcher(rate, per, nper, pv, fv=None, when=None):
+    warnings.warn(_depmsg.format(name='ppmt'),
+                  DeprecationWarning, stacklevel=3)
     return (rate, per, nper, pv, fv)
 
 
@@ -430,6 +484,12 @@ def ppmt(rate, per, nper, pv, fv=0, when='end'):
     """
     Compute the payment against loan principal.
 
+    .. deprecated:: 1.18
+
+       `ppmt` is deprecated; for details, see NEP 32 [1]_.
+       Use the corresponding function in the numpy-financial library,
+       https://pypi.org/project/numpy-financial.
+
     Parameters
     ----------
     rate : array_like
@@ -450,12 +510,19 @@ def ppmt(rate, per, nper, pv, fv=0, when='end'):
     --------
     pmt, pv, ipmt
 
+    References
+    ----------
+    .. [1] NumPy Enhancement Proposal (NEP) 32,
+       https://numpy.org/neps/nep-0032-remove-financial-functions.html
+
     """
     total = pmt(rate, nper, pv, fv, when)
     return total - ipmt(rate, per, nper, pv, fv, when)
 
 
 def _pv_dispatcher(rate, nper, pmt, fv=None, when=None):
+    warnings.warn(_depmsg.format(name='pv'),
+                  DeprecationWarning, stacklevel=3)
     return (rate, nper, nper, pv, fv)
 
 
@@ -464,6 +531,12 @@ def pv(rate, nper, pmt, fv=0, when='end'):
     """
     Compute the present value.
 
+    .. deprecated:: 1.18
+
+       `pv` is deprecated; for details, see NEP 32 [1]_.
+       Use the corresponding function in the numpy-financial library,
+       https://pypi.org/project/numpy-financial.
+
     Given:
      * a future value, `fv`
      * an interest `rate` compounded once per period, of which
@@ -510,7 +583,9 @@ def pv(rate, nper, pmt, fv=0, when='end'):
 
     References
     ----------
-    .. [WRW] Wheeler, D. A., E. Rathke, and R. Weir (Eds.) (2009, May).
+    .. [1] NumPy Enhancement Proposal (NEP) 32,
+       https://numpy.org/neps/nep-0032-remove-financial-functions.html
+    .. [2] Wheeler, D. A., E. Rathke, and R. Weir (Eds.) (2009, May).
        Open Document Format for Office Applications (OpenDocument)v1.2,
        Part 2: Recalculated Formula (OpenFormula) Format - Annotated Version,
        Pre-Draft 12. Organization for the Advancement of Structured Information
@@ -567,6 +642,8 @@ def _g_div_gp(r, n, p, x, y, w):
 
 def _rate_dispatcher(nper, pmt, pv, fv, when=None, guess=None, tol=None,
                      maxiter=None):
+    warnings.warn(_depmsg.format(name='rate'),
+                  DeprecationWarning, stacklevel=3)
     return (nper, pmt, pv, fv)
 
 
@@ -582,6 +659,12 @@ def rate(nper, pmt, pv, fv, when='end', guess=None, tol=None, maxiter=100):
     """
     Compute the rate of interest per period.
 
+    .. deprecated:: 1.18
+
+       `rate` is deprecated; for details, see NEP 32 [1]_.
+       Use the corresponding function in the numpy-financial library,
+       https://pypi.org/project/numpy-financial.
+
     Parameters
     ----------
     nper : array_like
@@ -612,13 +695,16 @@ def rate(nper, pmt, pv, fv, when='end', guess=None, tol=None, maxiter=100):
 
     References
     ----------
-    Wheeler, D. A., E. Rathke, and R. Weir (Eds.) (2009, May). Open Document
-    Format for Office Applications (OpenDocument)v1.2, Part 2: Recalculated
-    Formula (OpenFormula) Format - Annotated Version, Pre-Draft 12.
-    Organization for the Advancement of Structured Information Standards
-    (OASIS). Billerica, MA, USA. [ODT Document]. Available:
-    http://www.oasis-open.org/committees/documents.php?wg_abbrev=office-formula
-    OpenDocument-formula-20090508.odt
+    .. [1] NumPy Enhancement Proposal (NEP) 32,
+       https://numpy.org/neps/nep-0032-remove-financial-functions.html
+    .. [2] Wheeler, D. A., E. Rathke, and R. Weir (Eds.) (2009, May).
+       Open Document Format for Office Applications (OpenDocument)v1.2,
+       Part 2: Recalculated Formula (OpenFormula) Format - Annotated Version,
+       Pre-Draft 12. Organization for the Advancement of Structured Information
+       Standards (OASIS). Billerica, MA, USA. [ODT Document].
+       Available:
+       http://www.oasis-open.org/committees/documents.php?wg_abbrev=office-formula
+       OpenDocument-formula-20090508.odt
 
     """
     when = _convert_when(when)
@@ -651,6 +737,8 @@ def rate(nper, pmt, pv, fv, when='end', guess=None, tol=None, maxiter=100):
 
 
 def _irr_dispatcher(values):
+    warnings.warn(_depmsg.format(name='irr'),
+                  DeprecationWarning, stacklevel=3)
     return (values,)
 
 
@@ -659,6 +747,12 @@ def irr(values):
     """
     Return the Internal Rate of Return (IRR).
 
+    .. deprecated:: 1.18
+
+       `irr` is deprecated; for details, see NEP 32 [1]_.
+       Use the corresponding function in the numpy-financial library,
+       https://pypi.org/project/numpy-financial.
+
     This is the "average" periodically compounded rate of return
     that gives a net present value of 0.0; for a more complete explanation,
     see Notes below.
@@ -693,13 +787,15 @@ def irr(values):
      + \\frac{55}{(1+r)^3} + \\frac{20}{(1+r)^4} = 0
 
     In general, for `values` :math:`= [v_0, v_1, ... v_M]`,
-    irr is the solution of the equation: [G]_
+    irr is the solution of the equation: [2]_
 
     .. math:: \\sum_{t=0}^M{\\frac{v_t}{(1+irr)^{t}}} = 0
 
     References
     ----------
-    .. [G] L. J. Gitman, "Principles of Managerial Finance, Brief," 3rd ed.,
+    .. [1] NumPy Enhancement Proposal (NEP) 32,
+       https://numpy.org/neps/nep-0032-remove-financial-functions.html
+    .. [2] L. J. Gitman, "Principles of Managerial Finance, Brief," 3rd ed.,
        Addison-Wesley, 2003, pg. 348.
 
     Examples
@@ -734,6 +830,8 @@ def irr(values):
 
 
 def _npv_dispatcher(rate, values):
+    warnings.warn(_depmsg.format(name='npv'),
+                  DeprecationWarning, stacklevel=3)
     return (values,)
 
 
@@ -742,6 +840,12 @@ def npv(rate, values):
     """
     Returns the NPV (Net Present Value) of a cash flow series.
 
+    .. deprecated:: 1.18
+
+       `npv` is deprecated; for details, see NEP 32 [1]_.
+       Use the corresponding function in the numpy-financial library,
+       https://pypi.org/project/numpy-financial.
+
     Parameters
     ----------
     rate : scalar
@@ -772,13 +876,15 @@ def npv(rate, values):
 
     Notes
     -----
-    Returns the result of: [G]_
+    Returns the result of: [2]_
 
     .. math :: \\sum_{t=0}^{M-1}{\\frac{values_t}{(1+rate)^{t}}}
 
     References
     ----------
-    .. [G] L. J. Gitman, "Principles of Managerial Finance, Brief," 3rd ed.,
+    .. [1] NumPy Enhancement Proposal (NEP) 32,
+       https://numpy.org/neps/nep-0032-remove-financial-functions.html
+    .. [2] L. J. Gitman, "Principles of Managerial Finance, Brief," 3rd ed.,
        Addison-Wesley, 2003, pg. 346.
 
     Examples
@@ -808,6 +914,8 @@ def npv(rate, values):
 
 
 def _mirr_dispatcher(values, finance_rate, reinvest_rate):
+    warnings.warn(_depmsg.format(name='mirr'),
+                  DeprecationWarning, stacklevel=3)
     return (values,)
 
 
@@ -816,6 +924,12 @@ def mirr(values, finance_rate, reinvest_rate):
     """
     Modified internal rate of return.
 
+    .. deprecated:: 1.18
+
+       `mirr` is deprecated; for details, see NEP 32 [1]_.
+       Use the corresponding function in the numpy-financial library,
+       https://pypi.org/project/numpy-financial.
+
     Parameters
     ----------
     values : array_like
@@ -832,6 +946,10 @@ def mirr(values, finance_rate, reinvest_rate):
     out : float
         Modified internal rate of return
 
+    References
+    ----------
+    .. [1] NumPy Enhancement Proposal (NEP) 32,
+       https://numpy.org/neps/nep-0032-remove-financial-functions.html
     """
     values = np.asarray(values)
     n = values.size
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index c39c2eea1..3ad630a7d 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -1893,7 +1893,7 @@ class vectorize(object):
         typecode characters or a list of data type specifiers. There should
         be one data type specifier for each output.
     doc : str, optional
-        The docstring for the function. If `None`, the docstring will be the
+        The docstring for the function. If None, the docstring will be the
         ``pyfunc.__doc__``.
     excluded : set, optional
         Set of strings or integers representing the positional or keyword
diff --git a/numpy/lib/histograms.py b/numpy/lib/histograms.py
index 8474bd5d3..03c365ab6 100644
--- a/numpy/lib/histograms.py
+++ b/numpy/lib/histograms.py
@@ -22,6 +22,16 @@ array_function_dispatch = functools.partial(
 _range = range
 
 
+def _ptp(x):
+    """Peak-to-peak value of x.
+
+    This implementation avoids the problem of signed integer arrays having a
+    peak-to-peak value that cannot be represented with the array's data type.
+    This function returns an unsigned value for signed integer arrays.
+    """
+    return _unsigned_subtract(x.max(), x.min())
+
+
 def _hist_bin_sqrt(x, range):
     """
     Square root histogram bin estimator.
@@ -40,7 +50,7 @@ def _hist_bin_sqrt(x, range):
     h : An estimate of the optimal bin width for the given data.
     """
     del range  # unused
-    return x.ptp() / np.sqrt(x.size)
+    return _ptp(x) / np.sqrt(x.size)
 
 
 def _hist_bin_sturges(x, range):
@@ -63,7 +73,7 @@ def _hist_bin_sturges(x, range):
     h : An estimate of the optimal bin width for the given data.
     """
     del range  # unused
-    return x.ptp() / (np.log2(x.size) + 1.0)
+    return _ptp(x) / (np.log2(x.size) + 1.0)
 
 
 def _hist_bin_rice(x, range):
@@ -87,7 +97,7 @@ def _hist_bin_rice(x, range):
     h : An estimate of the optimal bin width for the given data.
     """
     del range  # unused
-    return x.ptp() / (2.0 * x.size ** (1.0 / 3))
+    return _ptp(x) / (2.0 * x.size ** (1.0 / 3))
 
 
 def _hist_bin_scott(x, range):
@@ -137,7 +147,7 @@ def _hist_bin_stone(x, range):
     """
 
     n = x.size
-    ptp_x = np.ptp(x)
+    ptp_x = _ptp(x)
     if n <= 1 or ptp_x == 0:
         return 0
 
@@ -184,7 +194,7 @@ def _hist_bin_doane(x, range):
             np.true_divide(temp, sigma, temp)
             np.power(temp, 3, temp)
             g1 = np.mean(temp)
-            return x.ptp() / (1.0 + np.log2(x.size) +
+            return _ptp(x) / (1.0 + np.log2(x.size) +
                                     np.log2(1.0 + np.absolute(g1) / sg1))
     return 0.0
 
diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py
index 6cffab6ac..457cca146 100644
--- a/numpy/lib/nanfunctions.py
+++ b/numpy/lib/nanfunctions.py
@@ -99,7 +99,7 @@ def _replace_nan(a, val):
 
     if a.dtype == np.object_:
         # object arrays do not support `isnan` (gh-9009), so make a guess
-        mask = a != a
+        mask = np.not_equal(a, a, dtype=bool)
     elif issubclass(a.dtype.type, np.inexact):
         mask = np.isnan(a)
     else:
@@ -244,8 +244,8 @@ def nanmin(a, axis=None, out=None, keepdims=np._NoValue):
     out : ndarray, optional
         Alternate output array in which to place the result.  The default
         is ``None``; if provided, it must have the same shape as the
-        expected output, but the type will be cast if necessary.  See
-        `doc.ufuncs` for details.
+        expected output, but the type will be cast if necessary. See
+        `ufuncs-output-type` for more details.
 
         .. versionadded:: 1.8.0
     keepdims : bool, optional
@@ -359,8 +359,8 @@ def nanmax(a, axis=None, out=None, keepdims=np._NoValue):
     out : ndarray, optional
         Alternate output array in which to place the result.  The default
         is ``None``; if provided, it must have the same shape as the
-        expected output, but the type will be cast if necessary.  See
-        `doc.ufuncs` for details.
+        expected output, but the type will be cast if necessary. See
+        `ufuncs-output-type` for more details.
 
         .. versionadded:: 1.8.0
     keepdims : bool, optional
@@ -585,8 +585,8 @@ def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
         Alternate output array in which to place the result.  The default
         is ``None``. If provided, it must have the same shape as the
         expected output, but the type will be cast if necessary.  See
-        `doc.ufuncs` for details. The casting of NaN to integer can yield
-        unexpected results.
+        `ufuncs-output-type` for more details. The casting of NaN to integer
+        can yield unexpected results.
 
         .. versionadded:: 1.8.0
     keepdims : bool, optional
@@ -681,9 +681,9 @@ def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
     out : ndarray, optional
         Alternate output array in which to place the result.  The default
         is ``None``. If provided, it must have the same shape as the
-        expected output, but the type will be cast if necessary.  See
-        `doc.ufuncs` for details. The casting of NaN to integer can yield
-        unexpected results.
+        expected output, but the type will be cast if necessary. See
+        `ufuncs-output-type` for more details. The casting of NaN to integer
+        can yield unexpected results.
     keepdims : bool, optional
         If True, the axes which are reduced are left in the result as
         dimensions with size one. With this option, the result will
@@ -750,8 +750,8 @@ def nancumsum(a, axis=None, dtype=None, out=None):
     out : ndarray, optional
         Alternative output array in which to place the result. It must
         have the same shape and buffer length as the expected output
-        but the type will be cast if necessary. See `doc.ufuncs`
-        (Section "Output arguments") for more details.
+        but the type will be cast if necessary. See `ufuncs-output-type` for
+        more details.
 
     Returns
     -------
@@ -888,8 +888,8 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
     out : ndarray, optional
         Alternate output array in which to place the result.  The default
         is ``None``; if provided, it must have the same shape as the
-        expected output, but the type will be cast if necessary.  See
-        `doc.ufuncs` for details.
+        expected output, but the type will be cast if necessary. See
+        `ufuncs-output-type` for more details.
     keepdims : bool, optional
         If this is set to True, the axes which are reduced are left
         in the result as dimensions with size one. With this option,
@@ -1473,7 +1473,7 @@ def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
     mean : Average
     var : Variance while not ignoring NaNs
     nanstd, nanmean
-    numpy.doc.ufuncs : Section "Output arguments"
+    ufuncs-output-type
 
     Notes
     -----
@@ -1625,7 +1625,7 @@ def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
     --------
     var, mean, std
     nanvar, nanmean
-    numpy.doc.ufuncs : Section "Output arguments"
+    ufuncs-output-type
 
     Notes
     -----
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py
index 7e1d4db4f..430d44374 100644
--- a/numpy/lib/npyio.py
+++ b/numpy/lib/npyio.py
@@ -1529,9 +1529,9 @@ def fromregex(file, regexp, dtype, encoding=None):
             dtype = np.dtype(dtype)
 
         content = file.read()
-        if isinstance(content, bytes) and isinstance(regexp, np.unicode):
+        if isinstance(content, bytes) and isinstance(regexp, np.compat.unicode):
             regexp = asbytes(regexp)
-        elif isinstance(content, np.unicode) and isinstance(regexp, bytes):
+        elif isinstance(content, np.compat.unicode) and isinstance(regexp, bytes):
             regexp = asstr(regexp)
 
         if not hasattr(regexp, 'match'):
diff --git a/numpy/lib/polynomial.py b/numpy/lib/polynomial.py
index 2c72f623c..3d07a0de4 100644
--- a/numpy/lib/polynomial.py
+++ b/numpy/lib/polynomial.py
@@ -479,10 +479,10 @@ def polyfit(x, y, deg, rcond=None, full=False, w=None, cov=False):
         coefficients for `k`-th data set are in ``p[:,k]``.
 
     residuals, rank, singular_values, rcond
-        Present only if `full` = True.  Residuals of the least-squares fit,
-        the effective rank of the scaled Vandermonde coefficient matrix,
-        its singular values, and the specified value of `rcond`. For more
-        details, see `linalg.lstsq`.
+        Present only if `full` = True.  Residuals is sum of squared residuals
+        of the least-squares fit, the effective rank of the scaled Vandermonde
+        coefficient matrix, its singular values, and the specified value of
+        `rcond`. For more details, see `linalg.lstsq`.
 
     V : ndarray, shape (M,M) or (M,M,K)
         Present only if `full` = False and `cov`=True.  The covariance
diff --git a/numpy/lib/tests/test_financial.py b/numpy/lib/tests/test_financial.py
index 21088765f..cb67f7c0f 100644
--- a/numpy/lib/tests/test_financial.py
+++ b/numpy/lib/tests/test_financial.py
@@ -1,5 +1,6 @@
 from __future__ import division, absolute_import, print_function
 
+import warnings
 from decimal import Decimal
 
 import numpy as np
@@ -8,22 +9,35 @@ from numpy.testing import (
     )
 
 
+def filter_deprecation(func):
+    def newfunc(*args, **kwargs):
+        with warnings.catch_warnings(record=True) as ws:
+            warnings.filterwarnings('always', category=DeprecationWarning)
+            func(*args, **kwargs)
+            assert_(all(w.category is DeprecationWarning for w in ws))
+    return newfunc
+
+
 class TestFinancial(object):
+    @filter_deprecation
     def test_npv_irr_congruence(self):
         # IRR is defined as the rate required for the present value of a
         # a series of cashflows to be zero i.e. NPV(IRR(x), x) = 0
         cashflows = np.array([-40000, 5000, 8000, 12000, 30000])
         assert_allclose(np.npv(np.irr(cashflows), cashflows), 0, atol=1e-10, rtol=0)
 
+    @filter_deprecation
     def test_rate(self):
         assert_almost_equal(
             np.rate(10, 0, -3500, 10000),
             0.1107, 4)
 
+    @filter_deprecation
     def test_rate_decimal(self):
         rate = np.rate(Decimal('10'), Decimal('0'), Decimal('-3500'), Decimal('10000'))
         assert_equal(Decimal('0.1106908537142689284704528100'), rate)
 
+    @filter_deprecation
     def test_irr(self):
         v = [-150000, 15000, 25000, 35000, 45000, 60000]
         assert_almost_equal(np.irr(v), 0.0524, 2)
@@ -43,20 +57,25 @@ class TestFinancial(object):
         v = [-1, -2, -3]
         assert_equal(np.irr(v), np.nan)
 
+    @filter_deprecation
     def test_pv(self):
         assert_almost_equal(np.pv(0.07, 20, 12000, 0), -127128.17, 2)
 
+    @filter_deprecation
     def test_pv_decimal(self):
         assert_equal(np.pv(Decimal('0.07'), Decimal('20'), Decimal('12000'), Decimal('0')),
                      Decimal('-127128.1709461939327295222005'))
 
+    @filter_deprecation
     def test_fv(self):
         assert_equal(np.fv(0.075, 20, -2000, 0, 0), 86609.362673042924)
 
+    @filter_deprecation
     def test_fv_decimal(self):
         assert_equal(np.fv(Decimal('0.075'), Decimal('20'), Decimal('-2000'), 0, 0),
                      Decimal('86609.36267304300040536731624'))
 
+    @filter_deprecation
     def test_pmt(self):
         res = np.pmt(0.08 / 12, 5 * 12, 15000)
         tgt = -304.145914
@@ -71,6 +90,7 @@ class TestFinancial(object):
         tgt = np.array([[-166.66667, -19311.258], [-626.90814, -19311.258]])
         assert_allclose(res, tgt)
 
+    @filter_deprecation
     def test_pmt_decimal(self):
         res = np.pmt(Decimal('0.08') / Decimal('12'), 5 * 12, 15000)
         tgt = Decimal('-304.1459143262052370338701494')
@@ -94,18 +114,22 @@ class TestFinancial(object):
         assert_equal(res[1][0], tgt[1][0])
         assert_equal(res[1][1], tgt[1][1])
 
+    @filter_deprecation
     def test_ppmt(self):
         assert_equal(np.round(np.ppmt(0.1 / 12, 1, 60, 55000), 2), -710.25)
 
+    @filter_deprecation
     def test_ppmt_decimal(self):
         assert_equal(np.ppmt(Decimal('0.1') / Decimal('12'), Decimal('1'), Decimal('60'), Decimal('55000')),
                      Decimal('-710.2541257864217612489830917'))
 
     # Two tests showing how Decimal is actually getting at a more exact result
     # .23 / 12 does not come out nicely as a float but does as a decimal
+    @filter_deprecation
     def test_ppmt_special_rate(self):
         assert_equal(np.round(np.ppmt(0.23 / 12, 1, 60, 10000000000), 8), -90238044.232277036)
 
+    @filter_deprecation
     def test_ppmt_special_rate_decimal(self):
         # When rounded out to 8 decimal places like the float based test, this should not equal the same value
         # as the float, substituted for the decimal
@@ -118,31 +142,38 @@ class TestFinancial(object):
         assert_equal(np.ppmt(Decimal('0.23') / Decimal('12'), 1, 60, Decimal('10000000000')),
                      Decimal('-90238044.2322778884413969909'))
 
+    @filter_deprecation
     def test_ipmt(self):
         assert_almost_equal(np.round(np.ipmt(0.1 / 12, 1, 24, 2000), 2), -16.67)
 
+    @filter_deprecation
     def test_ipmt_decimal(self):
         result = np.ipmt(Decimal('0.1') / Decimal('12'), 1, 24, 2000)
         assert_equal(result.flat[0], Decimal('-16.66666666666666666666666667'))
 
+    @filter_deprecation
     def test_nper(self):
         assert_almost_equal(np.nper(0.075, -2000, 0, 100000.),
                             21.54, 2)
 
+    @filter_deprecation
     def test_nper2(self):
         assert_almost_equal(np.nper(0.0, -2000, 0, 100000.),
                             50.0, 1)
 
+    @filter_deprecation
     def test_npv(self):
         assert_almost_equal(
             np.npv(0.05, [-15000, 1500, 2500, 3500, 4500, 6000]),
             122.89, 2)
 
+    @filter_deprecation
     def test_npv_decimal(self):
         assert_equal(
             np.npv(Decimal('0.05'), [-15000, 1500, 2500, 3500, 4500, 6000]),
             Decimal('122.894854950942692161628715'))
 
+    @filter_deprecation
     def test_mirr(self):
         val = [-4500, -800, 800, 800, 600, 600, 800, 800, 700, 3000]
         assert_almost_equal(np.mirr(val, 0.08, 0.055), 0.0666, 4)
@@ -156,6 +187,7 @@ class TestFinancial(object):
         val = [39000, 30000, 21000, 37000, 46000]
         assert_(np.isnan(np.mirr(val, 0.10, 0.12)))
 
+    @filter_deprecation
     def test_mirr_decimal(self):
         val = [Decimal('-4500'), Decimal('-800'), Decimal('800'), Decimal('800'),
                Decimal('600'), Decimal('600'), Decimal('800'), Decimal('800'),
@@ -174,6 +206,7 @@ class TestFinancial(object):
         val = [Decimal('39000'), Decimal('30000'), Decimal('21000'), Decimal('37000'), Decimal('46000')]
         assert_(np.isnan(np.mirr(val, Decimal('0.10'), Decimal('0.12'))))
 
+    @filter_deprecation
     def test_when(self):
         # begin
         assert_equal(np.rate(10, 20, -3500, 10000, 1),
@@ -238,6 +271,7 @@ class TestFinancial(object):
         assert_equal(np.nper(0.075, -2000, 0, 100000., 0),
                      np.nper(0.075, -2000, 0, 100000., 'end'))
 
+    @filter_deprecation
     def test_decimal_with_when(self):
         """Test that decimals are still supported if the when argument is passed"""
         # begin
@@ -312,6 +346,7 @@ class TestFinancial(object):
                      np.ipmt(Decimal('0.1') / Decimal('12'), Decimal('1'), Decimal('24'), Decimal('2000'),
                              Decimal('0'), 'end').flat[0])
 
+    @filter_deprecation
     def test_broadcast(self):
         assert_almost_equal(np.nper(0.075, -2000, 0, 100000., [0, 1]),
                             [21.5449442, 20.76156441], 4)
@@ -329,6 +364,7 @@ class TestFinancial(object):
                             [-74.998201, -75.62318601, -75.62318601,
                              -76.88882405, -76.88882405], 4)
 
+    @filter_deprecation
     def test_broadcast_decimal(self):
         # Use almost equal because precision is tested in the explicit tests, this test is to ensure
         # broadcast with Decimal is not broken.
diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py
index 1eae8ccfb..9075ff538 100644
--- a/numpy/lib/tests/test_function_base.py
+++ b/numpy/lib/tests/test_function_base.py
@@ -2523,7 +2523,7 @@ class TestPercentile(object):
         assert_equal(np.percentile(x, 0, interpolation='nearest'), np.nan)
 
     def test_fraction(self):
-        x = [Fraction(i, 2) for i in np.arange(8)]
+        x = [Fraction(i, 2) for i in range(8)]
 
         p = np.percentile(x, Fraction(0))
         assert_equal(p, Fraction(0))
@@ -2943,7 +2943,7 @@ class TestQuantile(object):
 
     def test_fraction(self):
         # fractional input, integral quantile
-        x = [Fraction(i, 2) for i in np.arange(8)]
+        x = [Fraction(i, 2) for i in range(8)]
 
         q = np.quantile(x, 0)
         assert_equal(q, 0)
diff --git a/numpy/lib/tests/test_histograms.py b/numpy/lib/tests/test_histograms.py
index 4895a722c..dbf189f3e 100644
--- a/numpy/lib/tests/test_histograms.py
+++ b/numpy/lib/tests/test_histograms.py
@@ -8,6 +8,7 @@ from numpy.testing import (
     assert_array_almost_equal, assert_raises, assert_allclose,
     assert_array_max_ulp, assert_raises_regex, suppress_warnings,
     )
+import pytest
 
 
 class TestHistogram(object):
@@ -591,6 +592,16 @@ class TestHistogramOptimBinNums(object):
                 msg += " with datasize of {0}".format(testlen)
                 assert_equal(len(a), numbins, err_msg=msg)
 
+    @pytest.mark.parametrize("bins", ['auto', 'fd', 'doane', 'scott',
+                                      'stone', 'rice', 'sturges'])
+    def test_signed_integer_data(self, bins):
+        # Regression test for gh-14379.
+        a = np.array([-2, 0, 127], dtype=np.int8)
+        hist, edges = np.histogram(a, bins=bins)
+        hist32, edges32 = np.histogram(a.astype(np.int32), bins=bins)
+        assert_array_equal(hist, hist32)
+        assert_array_equal(edges, edges32)
+
     def test_simple_weighted(self):
         """
         Check that weighted data raises a TypeError
diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py
index 1181fe986..6e2291ca3 100644
--- a/numpy/lib/tests/test_io.py
+++ b/numpy/lib/tests/test_io.py
@@ -518,7 +518,7 @@ class TestSaveTxt(object):
 
     def test_unicode(self):
         utf8 = b'\xcf\x96'.decode('UTF-8')
-        a = np.array([utf8], dtype=np.unicode)
+        a = np.array([utf8], dtype=np.unicode_)
         with tempdir() as tmpdir:
             # set encoding as on windows it may not be unicode even on py3
             np.savetxt(os.path.join(tmpdir, 'test.csv'), a, fmt=['%s'],
@@ -526,7 +526,7 @@ class TestSaveTxt(object):
 
     def test_unicode_roundtrip(self):
         utf8 = b'\xcf\x96'.decode('UTF-8')
-        a = np.array([utf8], dtype=np.unicode)
+        a = np.array([utf8], dtype=np.unicode_)
         # our gz wrapper support encoding
         suffixes = ['', '.gz']
         # stdlib 2 versions do not support encoding
@@ -540,12 +540,12 @@ class TestSaveTxt(object):
                 np.savetxt(os.path.join(tmpdir, 'test.csv' + suffix), a,
                            fmt=['%s'], encoding='UTF-16-LE')
                 b = np.loadtxt(os.path.join(tmpdir, 'test.csv' + suffix),
-                               encoding='UTF-16-LE', dtype=np.unicode)
+                               encoding='UTF-16-LE', dtype=np.unicode_)
                 assert_array_equal(a, b)
 
     def test_unicode_bytestream(self):
         utf8 = b'\xcf\x96'.decode('UTF-8')
-        a = np.array([utf8], dtype=np.unicode)
+        a = np.array([utf8], dtype=np.unicode_)
         s = BytesIO()
         np.savetxt(s, a, fmt=['%s'], encoding='UTF-8')
         s.seek(0)
@@ -553,7 +553,7 @@ class TestSaveTxt(object):
 
     def test_unicode_stringstream(self):
         utf8 = b'\xcf\x96'.decode('UTF-8')
-        a = np.array([utf8], dtype=np.unicode)
+        a = np.array([utf8], dtype=np.unicode_)
         s = StringIO()
         np.savetxt(s, a, fmt=['%s'], encoding='UTF-8')
         s.seek(0)
@@ -632,12 +632,12 @@ class LoadTxtBase(object):
         with temppath() as path:
             with open(path, "wb") as f:
                 f.write(nonascii.encode("UTF-16"))
-            x = self.loadfunc(path, encoding="UTF-16", dtype=np.unicode)
+            x = self.loadfunc(path, encoding="UTF-16", dtype=np.unicode_)
             assert_array_equal(x, nonascii)
 
     def test_binary_decode(self):
         utf16 = b'\xff\xfeh\x04 \x00i\x04 \x00j\x04'
-        v = self.loadfunc(BytesIO(utf16), dtype=np.unicode, encoding='UTF-16')
+        v = self.loadfunc(BytesIO(utf16), dtype=np.unicode_, encoding='UTF-16')
         assert_array_equal(v, np.array(utf16.decode('UTF-16').split()))
 
     def test_converters_decode(self):
@@ -645,7 +645,7 @@ class LoadTxtBase(object):
         c = TextIO()
         c.write(b'\xcf\x96')
         c.seek(0)
-        x = self.loadfunc(c, dtype=np.unicode,
+        x = self.loadfunc(c, dtype=np.unicode_,
                           converters={0: lambda x: x.decode('UTF-8')})
         a = np.array([b'\xcf\x96'.decode('UTF-8')])
         assert_array_equal(x, a)
@@ -656,7 +656,7 @@ class LoadTxtBase(object):
         with temppath() as path:
             with io.open(path, 'wt', encoding='UTF-8') as f:
                 f.write(utf8)
-            x = self.loadfunc(path, dtype=np.unicode,
+            x = self.loadfunc(path, dtype=np.unicode_,
                               converters={0: lambda x: x + 't'},
                               encoding='UTF-8')
             a = np.array([utf8 + 't'])
@@ -1104,7 +1104,7 @@ class TestLoadTxt(LoadTxtBase):
             with open(path, "wb") as f:
                 f.write(butf8)
             with open(path, "rb") as f:
-                x = np.loadtxt(f, encoding="UTF-8", dtype=np.unicode)
+                x = np.loadtxt(f, encoding="UTF-8", dtype=np.unicode_)
             assert_array_equal(x, sutf8)
             # test broken latin1 conversion people now rely on
             with open(path, "rb") as f:
@@ -1587,7 +1587,7 @@ M   33  21.99
             with open(path, 'wb') as f:
                 f.write(b'skip,skip,2001-01-01' + utf8 + b',1.0,skip')
             test = np.genfromtxt(path, delimiter=",", names=None, dtype=float,
-                                 usecols=(2, 3), converters={2: np.unicode},
+                                 usecols=(2, 3), converters={2: np.compat.unicode},
                                  encoding='UTF-8')
         control = np.array([('2001-01-01' + utf8.decode('UTF-8'), 1.)],
                            dtype=[('', '|U11'), ('', float)])
@@ -2126,7 +2126,7 @@ M   33  21.99
             ctl = np.array([
                      ["test1", "testNonethe" + utf8.decode("UTF-8"), "test3"],
                      ["test1", "testNonethe" + utf8.decode("UTF-8"), "test3"]],
-                     dtype=np.unicode)
+                     dtype=np.unicode_)
             assert_array_equal(test, ctl)
 
             # test a mixed dtype
@@ -2169,7 +2169,7 @@ M   33  21.99
                      ["norm1", "norm2", "norm3"],
                      ["norm1", latin1, "norm3"],
                      ["test1", "testNonethe" + utf8, "test3"]],
-                     dtype=np.unicode)
+                     dtype=np.unicode_)
             assert_array_equal(test, ctl)
 
     def test_recfromtxt(self):
diff --git a/numpy/lib/tests/test_ufunclike.py b/numpy/lib/tests/test_ufunclike.py
index 0f06876a1..64280616f 100644
--- a/numpy/lib/tests/test_ufunclike.py
+++ b/numpy/lib/tests/test_ufunclike.py
@@ -21,7 +21,7 @@ class TestUfunclike(object):
         assert_equal(res, tgt)
         assert_equal(out, tgt)
 
-        a = a.astype(np.complex)
+        a = a.astype(np.complex_)
         with assert_raises(TypeError):
             ufl.isposinf(a)
 
@@ -36,7 +36,7 @@ class TestUfunclike(object):
         assert_equal(res, tgt)
         assert_equal(out, tgt)
 
-        a = a.astype(np.complex)
+        a = a.astype(np.complex_)
         with assert_raises(TypeError):
             ufl.isneginf(a)
 
diff --git a/numpy/linalg/linalg.py b/numpy/linalg/linalg.py
index 816a200eb..665b9fbec 100644
--- a/numpy/linalg/linalg.py
+++ b/numpy/linalg/linalg.py
@@ -194,37 +194,33 @@ def _fastCopyAndTranspose(type, *arrays):
     else:
         return cast_arrays
 
-def _assertRank2(*arrays):
+def _assert_2d(*arrays):
     for a in arrays:
         if a.ndim != 2:
             raise LinAlgError('%d-dimensional array given. Array must be '
                     'two-dimensional' % a.ndim)
 
-def _assertRankAtLeast2(*arrays):
+def _assert_stacked_2d(*arrays):
     for a in arrays:
         if a.ndim < 2:
             raise LinAlgError('%d-dimensional array given. Array must be '
                     'at least two-dimensional' % a.ndim)
 
-def _assertNdSquareness(*arrays):
+def _assert_stacked_square(*arrays):
     for a in arrays:
         m, n = a.shape[-2:]
         if m != n:
             raise LinAlgError('Last 2 dimensions of the array must be square')
 
-def _assertFinite(*arrays):
+def _assert_finite(*arrays):
     for a in arrays:
-        if not (isfinite(a).all()):
+        if not isfinite(a).all():
             raise LinAlgError("Array must not contain infs or NaNs")
 
-def _isEmpty2d(arr):
+def _is_empty_2d(arr):
     # check size first for efficiency
     return arr.size == 0 and product(arr.shape[-2:]) == 0
 
-def _assertNoEmpty2d(*arrays):
-    for a in arrays:
-        if _isEmpty2d(a):
-            raise LinAlgError("Arrays cannot be empty")
 
 def transpose(a):
     """
@@ -386,8 +382,8 @@ def solve(a, b):
 
     """
     a, _ = _makearray(a)
-    _assertRankAtLeast2(a)
-    _assertNdSquareness(a)
+    _assert_stacked_2d(a)
+    _assert_stacked_square(a)
     b, wrap = _makearray(b)
     t, result_t = _commonType(a, b)
 
@@ -542,8 +538,8 @@ def inv(a):
 
     """
     a, wrap = _makearray(a)
-    _assertRankAtLeast2(a)
-    _assertNdSquareness(a)
+    _assert_stacked_2d(a)
+    _assert_stacked_square(a)
     t, result_t = _commonType(a)
 
     signature = 'D->D' if isComplexType(t) else 'd->d'
@@ -622,8 +618,8 @@ def matrix_power(a, n):
 
     """
     a = asanyarray(a)
-    _assertRankAtLeast2(a)
-    _assertNdSquareness(a)
+    _assert_stacked_2d(a)
+    _assert_stacked_square(a)
 
     try:
         n = operator.index(n)
@@ -752,8 +748,8 @@ def cholesky(a):
     extobj = get_linalg_error_extobj(_raise_linalgerror_nonposdef)
     gufunc = _umath_linalg.cholesky_lo
     a, wrap = _makearray(a)
-    _assertRankAtLeast2(a)
-    _assertNdSquareness(a)
+    _assert_stacked_2d(a)
+    _assert_stacked_square(a)
     t, result_t = _commonType(a)
     signature = 'D->D' if isComplexType(t) else 'd->d'
     r = gufunc(a, signature=signature, extobj=extobj)
@@ -895,7 +891,7 @@ def qr(a, mode='reduced'):
             raise ValueError("Unrecognized mode '%s'" % mode)
 
     a, wrap = _makearray(a)
-    _assertRank2(a)
+    _assert_2d(a)
     m, n = a.shape
     t, result_t = _commonType(a)
     a = _fastCopyAndTranspose(t, a)
@@ -1047,9 +1043,9 @@ def eigvals(a):
 
     """
     a, wrap = _makearray(a)
-    _assertRankAtLeast2(a)
-    _assertNdSquareness(a)
-    _assertFinite(a)
+    _assert_stacked_2d(a)
+    _assert_stacked_square(a)
+    _assert_finite(a)
     t, result_t = _commonType(a)
 
     extobj = get_linalg_error_extobj(
@@ -1157,8 +1153,8 @@ def eigvalsh(a, UPLO='L'):
         gufunc = _umath_linalg.eigvalsh_up
 
     a, wrap = _makearray(a)
-    _assertRankAtLeast2(a)
-    _assertNdSquareness(a)
+    _assert_stacked_2d(a)
+    _assert_stacked_square(a)
     t, result_t = _commonType(a)
     signature = 'D->d' if isComplexType(t) else 'd->d'
     w = gufunc(a, signature=signature, extobj=extobj)
@@ -1294,9 +1290,9 @@ def eig(a):
 
     """
     a, wrap = _makearray(a)
-    _assertRankAtLeast2(a)
-    _assertNdSquareness(a)
-    _assertFinite(a)
+    _assert_stacked_2d(a)
+    _assert_stacked_square(a)
+    _assert_finite(a)
     t, result_t = _commonType(a)
 
     extobj = get_linalg_error_extobj(
@@ -1435,8 +1431,8 @@ def eigh(a, UPLO='L'):
         raise ValueError("UPLO argument must be 'L' or 'U'")
 
     a, wrap = _makearray(a)
-    _assertRankAtLeast2(a)
-    _assertNdSquareness(a)
+    _assert_stacked_2d(a)
+    _assert_stacked_square(a)
     t, result_t = _commonType(a)
 
     extobj = get_linalg_error_extobj(
@@ -1608,7 +1604,7 @@ def svd(a, full_matrices=True, compute_uv=True, hermitian=False):
             s = abs(s)
             return s
 
-    _assertRankAtLeast2(a)
+    _assert_stacked_2d(a)
     t, result_t = _commonType(a)
 
     extobj = get_linalg_error_extobj(_raise_linalgerror_svd_nonconvergence)
@@ -1729,7 +1725,8 @@ def cond(x, p=None):
 
     """
     x = asarray(x)  # in case we have a matrix
-    _assertNoEmpty2d(x)
+    if _is_empty_2d(x):
+        raise LinAlgError("cond is not defined on empty arrays")
     if p is None or p == 2 or p == -2:
         s = svd(x, compute_uv=False)
         with errstate(all='ignore'):
@@ -1740,8 +1737,8 @@ def cond(x, p=None):
     else:
         # Call inv(x) ignoring errors. The result array will
         # contain nans in the entries where inversion failed.
-        _assertRankAtLeast2(x)
-        _assertNdSquareness(x)
+        _assert_stacked_2d(x)
+        _assert_stacked_square(x)
         t, result_t = _commonType(x)
         signature = 'D->D' if isComplexType(t) else 'd->d'
         with errstate(all='ignore'):
@@ -1956,7 +1953,7 @@ def pinv(a, rcond=1e-15, hermitian=False):
     """
     a, wrap = _makearray(a)
     rcond = asarray(rcond)
-    if _isEmpty2d(a):
+    if _is_empty_2d(a):
         m, n = a.shape[-2:]
         res = empty(a.shape[:-2] + (n, m), dtype=a.dtype)
         return wrap(res)
@@ -2052,8 +2049,8 @@ def slogdet(a):
 
     """
     a = asarray(a)
-    _assertRankAtLeast2(a)
-    _assertNdSquareness(a)
+    _assert_stacked_2d(a)
+    _assert_stacked_square(a)
     t, result_t = _commonType(a)
     real_t = _realType(result_t)
     signature = 'D->Dd' if isComplexType(t) else 'd->dd'
@@ -2112,8 +2109,8 @@ def det(a):
 
     """
     a = asarray(a)
-    _assertRankAtLeast2(a)
-    _assertNdSquareness(a)
+    _assert_stacked_2d(a)
+    _assert_stacked_square(a)
     t, result_t = _commonType(a)
     signature = 'D->D' if isComplexType(t) else 'd->d'
     r = _umath_linalg.det(a, signature=signature)
@@ -2224,7 +2221,7 @@ def lstsq(a, b, rcond="warn"):
     is_1d = b.ndim == 1
     if is_1d:
         b = b[:, newaxis]
-    _assertRank2(a, b)
+    _assert_2d(a, b)
     m, n = a.shape[-2:]
     m2, n_rhs = b.shape[-2:]
     if m != m2:
@@ -2657,7 +2654,7 @@ def multi_dot(arrays):
         arrays[0] = atleast_2d(arrays[0])
     if arrays[-1].ndim == 1:
         arrays[-1] = atleast_2d(arrays[-1]).T
-    _assertRank2(*arrays)
+    _assert_2d(*arrays)
 
     # _multi_dot_three is much faster than _multi_dot_matrix_chain_order
     if n == 3:
diff --git a/numpy/ma/core.py b/numpy/ma/core.py
index bb3788c9a..bb0d8d412 100644
--- a/numpy/ma/core.py
+++ b/numpy/ma/core.py
@@ -4394,7 +4394,7 @@ class MaskedArray(ndarray):
         ----------
         axis : None or int or tuple of ints, optional
             Axis or axes along which the count is performed.
-            The default (`axis` = `None`) performs the count over all
+            The default, None, performs the count over all
             the dimensions of the input array. `axis` may be negative, in
             which case it counts from the last to the first axis.
 
@@ -4774,7 +4774,7 @@ class MaskedArray(ndarray):
 
         See Also
         --------
-        ndarray.all : corresponding function for ndarrays
+        numpy.ndarray.all : corresponding function for ndarrays
         numpy.all : equivalent function
 
         Examples
@@ -4812,7 +4812,7 @@ class MaskedArray(ndarray):
 
         See Also
         --------
-        ndarray.any : corresponding function for ndarrays
+        numpy.ndarray.any : corresponding function for ndarrays
         numpy.any : equivalent function
 
         """
@@ -4866,7 +4866,7 @@ class MaskedArray(ndarray):
         flatnonzero :
             Return indices that are non-zero in the flattened version of the input
             array.
-        ndarray.nonzero :
+        numpy.ndarray.nonzero :
             Equivalent ndarray method.
         count_nonzero :
             Counts the number of non-zero elements in the input array.
@@ -4994,7 +4994,7 @@ class MaskedArray(ndarray):
 
         See Also
         --------
-        ndarray.sum : corresponding function for ndarrays
+        numpy.ndarray.sum : corresponding function for ndarrays
         numpy.sum : equivalent function
 
         Examples
@@ -5065,7 +5065,7 @@ class MaskedArray(ndarray):
 
         See Also
         --------
-        ndarray.cumsum : corresponding function for ndarrays
+        numpy.ndarray.cumsum : corresponding function for ndarrays
         numpy.cumsum : equivalent function
 
         Examples
@@ -5102,7 +5102,7 @@ class MaskedArray(ndarray):
 
         See Also
         --------
-        ndarray.prod : corresponding function for ndarrays
+        numpy.ndarray.prod : corresponding function for ndarrays
         numpy.prod : equivalent function
         """
         kwargs = {} if keepdims is np._NoValue else {'keepdims': keepdims}
@@ -5148,7 +5148,7 @@ class MaskedArray(ndarray):
 
         See Also
         --------
-        ndarray.cumprod : corresponding function for ndarrays
+        numpy.ndarray.cumprod : corresponding function for ndarrays
         numpy.cumprod : equivalent function
         """
         result = self.filled(1).cumprod(axis=axis, dtype=dtype, out=out)
@@ -5171,7 +5171,7 @@ class MaskedArray(ndarray):
 
         See Also
         --------
-        ndarray.mean : corresponding function for ndarrays
+        numpy.ndarray.mean : corresponding function for ndarrays
         numpy.mean : Equivalent function
         numpy.ma.average: Weighted average.
 
@@ -5260,7 +5260,7 @@ class MaskedArray(ndarray):
 
         See Also
         --------
-        ndarray.var : corresponding function for ndarrays
+        numpy.ndarray.var : corresponding function for ndarrays
         numpy.var : Equivalent function
         """
         kwargs = {} if keepdims is np._NoValue else {'keepdims': keepdims}
@@ -5323,7 +5323,7 @@ class MaskedArray(ndarray):
 
         See Also
         --------
-        ndarray.std : corresponding function for ndarrays
+        numpy.ndarray.std : corresponding function for ndarrays
         numpy.std : Equivalent function
         """
         kwargs = {} if keepdims is np._NoValue else {'keepdims': keepdims}
@@ -5344,7 +5344,7 @@ class MaskedArray(ndarray):
 
         See Also
         --------
-        ndarray.around : corresponding function for ndarrays
+        numpy.ndarray.around : corresponding function for ndarrays
         numpy.around : equivalent function
         """
         result = self._data.round(decimals=decimals, out=out).view(type(self))
@@ -5406,7 +5406,7 @@ class MaskedArray(ndarray):
         --------
         MaskedArray.sort : Describes sorting algorithms used.
         lexsort : Indirect stable sort with multiple keys.
-        ndarray.sort : Inplace sort.
+        numpy.ndarray.sort : Inplace sort.
 
         Notes
         -----
@@ -5558,7 +5558,7 @@ class MaskedArray(ndarray):
 
         See Also
         --------
-        ndarray.sort : Method to sort an array in-place.
+        numpy.ndarray.sort : Method to sort an array in-place.
         argsort : Indirect sort.
         lexsort : Indirect stable sort on multiple keys.
         searchsorted : Find elements in a sorted array.
@@ -5978,7 +5978,7 @@ class MaskedArray(ndarray):
 
         See Also
         --------
-        ndarray.tobytes
+        numpy.ndarray.tobytes
         tolist, tofile
 
         Notes
diff --git a/numpy/ma/extras.py b/numpy/ma/extras.py
index de1aa3af8..4a83ac781 100644
--- a/numpy/ma/extras.py
+++ b/numpy/ma/extras.py
@@ -542,7 +542,7 @@ def average(a, axis=None, weights=None, returned=False):
         Data to be averaged.
         Masked entries are not taken into account in the computation.
     axis : int, optional
-        Axis along which to average `a`. If `None`, averaging is done over
+        Axis along which to average `a`. If None, averaging is done over
         the flattened array.
     weights : array_like, optional
         The importance that each element has in the computation of the average.
diff --git a/numpy/matlib.py b/numpy/matlib.py
index 604ef470b..b1b155586 100644
--- a/numpy/matlib.py
+++ b/numpy/matlib.py
@@ -239,7 +239,7 @@ def rand(*args):
 
     See Also
     --------
-    randn, numpy.random.rand
+    randn, numpy.random.RandomState.rand
 
     Examples
     --------
@@ -285,7 +285,7 @@ def randn(*args):
 
     See Also
     --------
-    rand, random.randn
+    rand, numpy.random.RandomState.randn
 
     Notes
     -----
diff --git a/numpy/matrixlib/defmatrix.py b/numpy/matrixlib/defmatrix.py
index 3c7e8ffc2..cabd41367 100644
--- a/numpy/matrixlib/defmatrix.py
+++ b/numpy/matrixlib/defmatrix.py
@@ -1046,7 +1046,7 @@ def bmat(obj, ldict=None, gdict=None):
         referenced by name.
     ldict : dict, optional
         A dictionary that replaces local operands in current frame.
-        Ignored if `obj` is not a string or `gdict` is `None`.
+        Ignored if `obj` is not a string or `gdict` is None.
     gdict : dict, optional
         A dictionary that replaces global operands in current frame.
         Ignored if `obj` is not a string.
diff --git a/numpy/polynomial/chebyshev.py b/numpy/polynomial/chebyshev.py
index 093eb0048..0cd9c4d23 100644
--- a/numpy/polynomial/chebyshev.py
+++ b/numpy/polynomial/chebyshev.py
@@ -1468,7 +1468,7 @@ def chebvander2d(x, y, deg):
     .. versionadded:: 1.7.0
 
     """
-    return pu._vander2d(chebvander, x, y, deg)
+    return pu._vander_nd_flat((chebvander, chebvander), (x, y), deg)
 
 
 def chebvander3d(x, y, z, deg):
@@ -1522,7 +1522,7 @@ def chebvander3d(x, y, z, deg):
     .. versionadded:: 1.7.0
 
     """
-    return pu._vander3d(chebvander, x, y, z, deg)
+    return pu._vander_nd_flat((chebvander, chebvander, chebvander), (x, y, z), deg)
 
 
 def chebfit(x, y, deg, rcond=None, full=False, w=None):
diff --git a/numpy/polynomial/hermite.py b/numpy/polynomial/hermite.py
index 0011fa3b7..9b1aea239 100644
--- a/numpy/polynomial/hermite.py
+++ b/numpy/polynomial/hermite.py
@@ -1193,7 +1193,7 @@ def hermvander2d(x, y, deg):
     .. versionadded:: 1.7.0
 
     """
-    return pu._vander2d(hermvander, x, y, deg)
+    return pu._vander_nd_flat((hermvander, hermvander), (x, y), deg)
 
 
 def hermvander3d(x, y, z, deg):
@@ -1247,7 +1247,7 @@ def hermvander3d(x, y, z, deg):
     .. versionadded:: 1.7.0
 
     """
-    return pu._vander3d(hermvander, x, y, z, deg)
+    return pu._vander_nd_flat((hermvander, hermvander, hermvander), (x, y, z), deg)
 
 
 def hermfit(x, y, deg, rcond=None, full=False, w=None):
diff --git a/numpy/polynomial/hermite_e.py b/numpy/polynomial/hermite_e.py
index b1cc2d3ab..c5a0a05a2 100644
--- a/numpy/polynomial/hermite_e.py
+++ b/numpy/polynomial/hermite_e.py
@@ -1186,7 +1186,7 @@ def hermevander2d(x, y, deg):
     .. versionadded:: 1.7.0
 
     """
-    return pu._vander2d(hermevander, x, y, deg)
+    return pu._vander_nd_flat((hermevander, hermevander), (x, y), deg)
 
 
 def hermevander3d(x, y, z, deg):
@@ -1240,7 +1240,7 @@ def hermevander3d(x, y, z, deg):
     .. versionadded:: 1.7.0
 
     """
-    return pu._vander3d(hermevander, x, y, z, deg)
+    return pu._vander_nd_flat((hermevander, hermevander, hermevander), (x, y, z), deg)
 
 
 def hermefit(x, y, deg, rcond=None, full=False, w=None):
diff --git a/numpy/polynomial/laguerre.py b/numpy/polynomial/laguerre.py
index 7e7e45ca1..538a1d449 100644
--- a/numpy/polynomial/laguerre.py
+++ b/numpy/polynomial/laguerre.py
@@ -1193,7 +1193,7 @@ def lagvander2d(x, y, deg):
     .. versionadded:: 1.7.0
 
     """
-    return pu._vander2d(lagvander, x, y, deg)
+    return pu._vander_nd_flat((lagvander, lagvander), (x, y), deg)
 
 
 def lagvander3d(x, y, z, deg):
@@ -1247,7 +1247,7 @@ def lagvander3d(x, y, z, deg):
     .. versionadded:: 1.7.0
 
     """
-    return pu._vander3d(lagvander, x, y, z, deg)
+    return pu._vander_nd_flat((lagvander, lagvander, lagvander), (x, y, z), deg)
 
 
 def lagfit(x, y, deg, rcond=None, full=False, w=None):
diff --git a/numpy/polynomial/legendre.py b/numpy/polynomial/legendre.py
index 281982d0b..c11824761 100644
--- a/numpy/polynomial/legendre.py
+++ b/numpy/polynomial/legendre.py
@@ -1229,7 +1229,7 @@ def legvander2d(x, y, deg):
     .. versionadded:: 1.7.0
 
     """
-    return pu._vander2d(legvander, x, y, deg)
+    return pu._vander_nd_flat((legvander, legvander), (x, y), deg)
 
 
 def legvander3d(x, y, z, deg):
@@ -1283,7 +1283,7 @@ def legvander3d(x, y, z, deg):
     .. versionadded:: 1.7.0
 
     """
-    return pu._vander3d(legvander, x, y, z, deg)
+    return pu._vander_nd_flat((legvander, legvander, legvander), (x, y, z), deg)
 
 
 def legfit(x, y, deg, rcond=None, full=False, w=None):
diff --git a/numpy/polynomial/polynomial.py b/numpy/polynomial/polynomial.py
index 3f0a902cf..315ea1495 100644
--- a/numpy/polynomial/polynomial.py
+++ b/numpy/polynomial/polynomial.py
@@ -1133,7 +1133,7 @@ def polyvander2d(x, y, deg):
     polyvander, polyvander3d, polyval2d, polyval3d
 
     """
-    return pu._vander2d(polyvander, x, y, deg)
+    return pu._vander_nd_flat((polyvander, polyvander), (x, y), deg)
 
 
 def polyvander3d(x, y, z, deg):
@@ -1187,7 +1187,7 @@ def polyvander3d(x, y, z, deg):
     .. versionadded:: 1.7.0
 
     """
-    return pu._vander3d(polyvander, x, y, z, deg)
+    return pu._vander_nd_flat((polyvander, polyvander, polyvander), (x, y, z), deg)
 
 
 def polyfit(x, y, deg, rcond=None, full=False, w=None):
diff --git a/numpy/polynomial/polyutils.py b/numpy/polynomial/polyutils.py
index 35b24d1ab..5dcfa7a7a 100644
--- a/numpy/polynomial/polyutils.py
+++ b/numpy/polynomial/polyutils.py
@@ -46,6 +46,7 @@ Functions
 from __future__ import division, absolute_import, print_function
 
 import operator
+import functools
 import warnings
 
 import numpy as np
@@ -415,45 +416,89 @@ def mapdomain(x, old, new):
     return off + scl*x
 
 
-def _vander2d(vander_f, x, y, deg):
-    """
-    Helper function used to implement the ``<type>vander2d`` functions.
+def _nth_slice(i, ndim):
+    sl = [np.newaxis] * ndim
+    sl[i] = slice(None)
+    return tuple(sl)
+
+
+def _vander_nd(vander_fs, points, degrees):
+    r"""
+    A generalization of the Vandermonde matrix for N dimensions
+
+    The result is built by combining the results of 1d Vandermonde matrices,
+
+    .. math::
+        W[i_0, \ldots, i_M, j_0, \ldots, j_N] = \prod_{k=0}^N{V_k(x_k)[i_0, \ldots, i_M, j_k]}
+
+    where
+
+    .. math::
+        N &= \texttt{len(points)} = \texttt{len(degrees)} = \texttt{len(vander\_fs)} \\
+        M &= \texttt{points[k].ndim} \\
+        V_k &= \texttt{vander\_fs[k]} \\
+        x_k &= \texttt{points[k]} \\
+        0 \le j_k &\le \texttt{degrees[k]}
+
+    Expanding the one-dimensional :math:`V_k` functions gives:
+
+    .. math::
+        W[i_0, \ldots, i_M, j_0, \ldots, j_N] = \prod_{k=0}^N{B_{k, j_k}(x_k[i_0, \ldots, i_M])}
+
+    where :math:`B_{k,m}` is the m'th basis of the polynomial construction used along
+    dimension :math:`k`. For a regular polynomial, :math:`B_{k, m}(x) = P_m(x) = x^m`.
 
     Parameters
     ----------
-    vander_f : function(array_like, int) -> ndarray
-        The 1d vander function, such as ``polyvander``
-    x, y, deg :
-        See the ``<type>vander2d`` functions for more detail
+    vander_fs : Sequence[function(array_like, int) -> ndarray]
+        The 1d vander function to use for each axis, such as ``polyvander``
+    points : Sequence[array_like]
+        Arrays of point coordinates, all of the same shape. The dtypes
+        will be converted to either float64 or complex128 depending on
+        whether any of the elements are complex. Scalars are converted to
+        1-D arrays.
+        This must be the same length as `vander_fs`.
+    degrees : Sequence[int]
+        The maximum degree (inclusive) to use for each axis.
+        This must be the same length as `vander_fs`.
+
+    Returns
+    -------
+    vander_nd : ndarray
+        An array of shape ``points[0].shape + tuple(d + 1 for d in degrees)``.
     """
-    degx, degy = deg
-    x, y = np.array((x, y), copy=False) + 0.0
+    n_dims = len(vander_fs)
+    if n_dims != len(points):
+        raise ValueError(
+            "Expected {} dimensions of sample points, got {}".format(n_dims, len(points)))
+    if n_dims != len(degrees):
+        raise ValueError(
+            "Expected {} dimensions of degrees, got {}".format(n_dims, len(degrees)))
+    if n_dims == 0:
+        raise ValueError("Unable to guess a dtype or shape when no points are given")
+
+    # convert to the same shape and type
+    points = tuple(np.array(tuple(points), copy=False) + 0.0)
 
-    vx = vander_f(x, degx)
-    vy = vander_f(y, degy)
-    v = vx[..., None]*vy[..., None,:]
-    return v.reshape(v.shape[:-2] + (-1,))
+    # produce the vandermonde matrix for each dimension, placing the last
+    # axis of each in an independent trailing axis of the output
+    vander_arrays = (
+        vander_fs[i](points[i], degrees[i])[(...,) + _nth_slice(i, n_dims)]
+        for i in range(n_dims)
+    )
 
+    # we checked this wasn't empty already, so no `initial` needed
+    return functools.reduce(operator.mul, vander_arrays)
 
-def _vander3d(vander_f, x, y, z, deg):
+
+def _vander_nd_flat(vander_fs, points, degrees):
     """
-    Helper function used to implement the ``<type>vander3d`` functions.
+    Like `_vander_nd`, but flattens the last ``len(degrees)`` axes into a single axis
 
-    Parameters
-    ----------
-    vander_f : function(array_like, int) -> ndarray
-        The 1d vander function, such as ``polyvander``
-    x, y, z, deg :
-        See the ``<type>vander3d`` functions for more detail
+    Used to implement the public ``<type>vander<n>d`` functions.
     """
-    degx, degy, degz = deg
-    x, y, z = np.array((x, y, z), copy=False) + 0.0
-
-    vx = vander_f(x, degx)
-    vy = vander_f(y, degy)
-    vz = vander_f(z, degz)
-    v = vx[..., None, None]*vy[..., None,:, None]*vz[..., None, None,:]
-    return v.reshape(v.shape[:-3] + (-1,))
+    v = _vander_nd(vander_fs, points, degrees)
+    return v.reshape(v.shape[:-len(degrees)] + (-1,))
 
 
 def _fromroots(line_f, mul_f, roots):
diff --git a/numpy/random/.gitignore b/numpy/random/.gitignore
new file mode 100644
index 000000000..fea3f955a
--- /dev/null
+++ b/numpy/random/.gitignore
@@ -0,0 +1,3 @@
+# generated files
+_bounded_integers.pyx
+_bounded_integers.pxd
diff --git a/numpy/random/__init__.py b/numpy/random/__init__.py
index f7c248451..1ceb5c4dd 100644
--- a/numpy/random/__init__.py
+++ b/numpy/random/__init__.py
@@ -179,20 +179,19 @@ __all__ = [
 
 # add these for module-freeze analysis (like PyInstaller)
 from . import _pickle
-from . import common
-from . import bounded_integers
-
+from . import _common
+from . import _bounded_integers
+
+from ._generator import Generator, default_rng
+from ._bit_generator import SeedSequence, BitGenerator
+from ._mt19937 import MT19937
+from ._pcg64 import PCG64
+from ._philox import Philox
+from ._sfc64 import SFC64
 from .mtrand import *
-from .generator import Generator, default_rng
-from .bit_generator import SeedSequence
-from .mt19937 import MT19937
-from .pcg64 import PCG64
-from .philox import Philox
-from .sfc64 import SFC64
-from .mtrand import RandomState
 
 __all__ += ['Generator', 'RandomState', 'SeedSequence', 'MT19937',
-            'Philox', 'PCG64', 'SFC64', 'default_rng']
+            'Philox', 'PCG64', 'SFC64', 'default_rng', 'BitGenerator']
 
 
 def __RandomState_ctor():
diff --git a/numpy/random/bit_generator.pxd b/numpy/random/_bit_generator.pxd
index 984033f17..bd5e47a20 100644
--- a/numpy/random/bit_generator.pxd
+++ b/numpy/random/_bit_generator.pxd
@@ -1,6 +1,15 @@
-
-from .common cimport bitgen_t, uint32_t
 cimport numpy as np
+from libc.stdint cimport uint32_t, uint64_t
+
+cdef extern from "numpy/random/bitgen.h":
+    struct bitgen:
+        void *state
+        uint64_t (*next_uint64)(void *st) nogil
+        uint32_t (*next_uint32)(void *st) nogil
+        double (*next_double)(void *st) nogil
+        uint64_t (*next_raw)(void *st) nogil
+
+    ctypedef bitgen bitgen_t
 
 cdef class BitGenerator():
     cdef readonly object _seed_seq
diff --git a/numpy/random/bit_generator.pyx b/numpy/random/_bit_generator.pyx
index eb608af6c..21d21e6bb 100644
--- a/numpy/random/bit_generator.pyx
+++ b/numpy/random/_bit_generator.pyx
@@ -53,9 +53,7 @@ from cpython.pycapsule cimport PyCapsule_New
 import numpy as np
 cimport numpy as np
 
-from libc.stdint cimport uint32_t
-from .common cimport (random_raw, benchmark, prepare_ctypes, prepare_cffi)
-from .distributions cimport bitgen_t
+from ._common cimport (random_raw, benchmark, prepare_ctypes, prepare_cffi)
 
 __all__ = ['SeedSequence', 'BitGenerator']
 
@@ -116,7 +114,7 @@ def _coerce_to_uint32_array(x):
     Examples
     --------
     >>> import numpy as np
-    >>> from numpy.random.bit_generator import _coerce_to_uint32_array
+    >>> from numpy.random._bit_generator import _coerce_to_uint32_array
     >>> _coerce_to_uint32_array(12345)
     array([12345], dtype=uint32)
     >>> _coerce_to_uint32_array('12345')
@@ -484,13 +482,12 @@ cdef class BitGenerator():
 
     Parameters
     ----------
-    seed : {None, int, array_like[ints], ISeedSequence}, optional
+    seed : {None, int, array_like[ints], SeedSequence}, optional
         A seed to initialize the `BitGenerator`. If None, then fresh,
         unpredictable entropy will be pulled from the OS. If an ``int`` or
         ``array_like[ints]`` is passed, then it will be passed to
-        `SeedSequence` to derive the initial `BitGenerator` state. One may also
-        pass in an implementor of the `ISeedSequence` interface like
-        `SeedSequence`.
+        ~`numpy.random.SeedSequence` to derive the initial `BitGenerator` state.
+        One may also pass in a `SeedSequence` instance.
 
     Attributes
     ----------
diff --git a/numpy/random/bounded_integers.pxd.in b/numpy/random/_bounded_integers.pxd.in
index 7a3f224dc..320d35774 100644
--- a/numpy/random/bounded_integers.pxd.in
+++ b/numpy/random/_bounded_integers.pxd.in
@@ -4,7 +4,7 @@ import numpy as np
 cimport numpy as np
 ctypedef np.npy_bool bool_t
 
-from .common cimport bitgen_t
+from ._bit_generator cimport bitgen_t
 
 cdef inline uint64_t _gen_mask(uint64_t max_val) nogil:
     """Mask generator for use in bounded random numbers"""
diff --git a/numpy/random/bounded_integers.pyx.in b/numpy/random/_bounded_integers.pyx.in
index 411b65a37..7e19471e4 100644
--- a/numpy/random/bounded_integers.pyx.in
+++ b/numpy/random/_bounded_integers.pyx.in
@@ -4,12 +4,54 @@
 import numpy as np
 cimport numpy as np
 
-from .distributions cimport *
-
 __all__ = []
 
 np.import_array()
 
+cdef extern from "numpy/random/distributions.h":
+    # Generate random numbers in closed interval [off, off + rng].
+    uint64_t random_bounded_uint64(bitgen_t *bitgen_state,
+                                   uint64_t off, uint64_t rng,
+                                   uint64_t mask, bint use_masked) nogil
+    uint32_t random_buffered_bounded_uint32(bitgen_t *bitgen_state,
+                                            uint32_t off, uint32_t rng,
+                                            uint32_t mask, bint use_masked,
+                                            int *bcnt, uint32_t *buf) nogil
+    uint16_t random_buffered_bounded_uint16(bitgen_t *bitgen_state,
+                                            uint16_t off, uint16_t rng,
+                                            uint16_t mask, bint use_masked,
+                                            int *bcnt, uint32_t *buf) nogil
+    uint8_t random_buffered_bounded_uint8(bitgen_t *bitgen_state,
+                                          uint8_t off, uint8_t rng,
+                                          uint8_t mask, bint use_masked,
+                                          int *bcnt, uint32_t *buf) nogil
+    np.npy_bool random_buffered_bounded_bool(bitgen_t *bitgen_state,
+                                             np.npy_bool off, np.npy_bool rng,
+                                             np.npy_bool mask, bint use_masked,
+                                             int *bcnt, uint32_t *buf) nogil
+    void random_bounded_uint64_fill(bitgen_t *bitgen_state,
+                                    uint64_t off, uint64_t rng, np.npy_intp cnt,
+                                    bint use_masked,
+                                    uint64_t *out) nogil
+    void random_bounded_uint32_fill(bitgen_t *bitgen_state,
+                                    uint32_t off, uint32_t rng, np.npy_intp cnt,
+                                    bint use_masked,
+                                    uint32_t *out) nogil
+    void random_bounded_uint16_fill(bitgen_t *bitgen_state,
+                                    uint16_t off, uint16_t rng, np.npy_intp cnt,
+                                    bint use_masked,
+                                    uint16_t *out) nogil
+    void random_bounded_uint8_fill(bitgen_t *bitgen_state,
+                                   uint8_t off, uint8_t rng, np.npy_intp cnt,
+                                   bint use_masked,
+                                   uint8_t *out) nogil
+    void random_bounded_bool_fill(bitgen_t *bitgen_state,
+                                  np.npy_bool off, np.npy_bool rng, np.npy_intp cnt,
+                                  bint use_masked,
+                                  np.npy_bool *out) nogil
+
+
+
 _integers_types = {'bool': (0, 2),
                  'int8': (-2**7, 2**7),
                  'int16': (-2**15, 2**15),
@@ -149,7 +191,7 @@ cdef object _rand_{{nptype}}_broadcast(object low, object high, object size,
         highm1_arr = <np.ndarray>np.PyArray_FROM_OTF(high_m1, np.{{npctype}}, np.NPY_ALIGNED | np.NPY_FORCECAST)
     else:
         # If input is object or a floating type
-        highm1_arr = <np.ndarray>np.empty_like(high_arr, dtype=np.{{nptype}})
+        highm1_arr = <np.ndarray>np.empty_like(high_arr, dtype=np.{{otype}})
         highm1_data = <{{nptype}}_t *>np.PyArray_DATA(highm1_arr)
         cnt = np.PyArray_SIZE(high_arr)
         flat = high_arr.flat
@@ -171,10 +213,10 @@ cdef object _rand_{{nptype}}_broadcast(object low, object high, object size,
     low_arr = <np.ndarray>np.PyArray_FROM_OTF(low, np.{{npctype}}, np.NPY_ALIGNED | np.NPY_FORCECAST)
 
     if size is not None:
-        out_arr = <np.ndarray>np.empty(size, np.{{nptype}})
+        out_arr = <np.ndarray>np.empty(size, np.{{otype}})
     else:
         it = np.PyArray_MultiIterNew2(low_arr, high_arr)
-        out_arr = <np.ndarray>np.empty(it.shape, np.{{nptype}})
+        out_arr = <np.ndarray>np.empty(it.shape, np.{{otype}})
 
     it = np.PyArray_MultiIterNew3(low_arr, high_arr, out_arr)
     out_data = <uint64_t *>np.PyArray_DATA(out_arr)
@@ -216,12 +258,12 @@ cdef object _rand_{{nptype}}(object low, object high, object size,
     """
     _rand_{{nptype}}(low, high, size, use_masked, *state, lock)
 
-    Return random np.{{nptype}} integers from `low` (inclusive) to `high` (exclusive).
+    Return random `np.{{otype}}` integers from `low` (inclusive) to `high` (exclusive).
 
     Return random integers from the "discrete uniform" distribution in the
     interval [`low`, `high`).  If `high` is None (the default),
     then results are from [0, `low`). On entry the arguments are presumed
-    to have been validated for size and order for the np.{{nptype}} type.
+    to have been validated for size and order for the `np.{{otype}}` type.
 
     Parameters
     ----------
@@ -247,7 +289,7 @@ cdef object _rand_{{nptype}}(object low, object high, object size,
 
     Returns
     -------
-    out : python scalar or ndarray of np.{{nptype}}
+    out : python scalar or ndarray of np.{{otype}}
           `size`-shaped array of random integers from the appropriate
           distribution, or a single such random int if `size` not provided.
 
@@ -266,7 +308,7 @@ cdef object _rand_{{nptype}}(object low, object high, object size,
 
     if size is not None:
         if (np.prod(size) == 0):
-            return np.empty(size, dtype=np.{{nptype}})
+            return np.empty(size, dtype=np.{{otype}})
 
     low_arr = <np.ndarray>np.array(low, copy=False)
     high_arr = <np.ndarray>np.array(high, copy=False)
@@ -295,7 +337,7 @@ cdef object _rand_{{nptype}}(object low, object high, object size,
                 random_bounded_{{utype}}_fill(state, off, rng, 1, use_masked, &out_val)
             return np.{{otype}}(<{{nptype}}_t>out_val)
         else:
-            out_arr = <np.ndarray>np.empty(size, np.{{nptype}})
+            out_arr = <np.ndarray>np.empty(size, np.{{otype}})
             cnt = np.PyArray_SIZE(out_arr)
             out_data = <{{utype}}_t *>np.PyArray_DATA(out_arr)
             with lock, nogil:
diff --git a/numpy/random/common.pxd b/numpy/random/_common.pxd
index ac0a94bb0..74bebca83 100644
--- a/numpy/random/common.pxd
+++ b/numpy/random/_common.pxd
@@ -1,23 +1,12 @@
 #cython: language_level=3
 
-from libc.stdint cimport (uint8_t, uint16_t, uint32_t, uint64_t,
-                          int8_t, int16_t, int32_t, int64_t, intptr_t,
-                          uintptr_t)
-from libc.math cimport sqrt
-
-cdef extern from "src/bitgen.h":
-    struct bitgen:
-        void *state
-        uint64_t (*next_uint64)(void *st) nogil
-        uint32_t (*next_uint32)(void *st) nogil
-        double (*next_double)(void *st) nogil
-        uint64_t (*next_raw)(void *st) nogil
-
-    ctypedef bitgen bitgen_t
+from libc.stdint cimport uint32_t, uint64_t, int32_t, int64_t
 
 import numpy as np
 cimport numpy as np
 
+from ._bit_generator cimport bitgen_t
+
 cdef double POISSON_LAM_MAX
 cdef double LEGACY_POISSON_LAM_MAX
 cdef uint64_t MAXSIZE
@@ -44,7 +33,7 @@ cdef object prepare_ctypes(bitgen_t *bitgen)
 cdef int check_constraint(double val, object name, constraint_type cons) except -1
 cdef int check_array_constraint(np.ndarray val, object name, constraint_type cons) except -1
 
-cdef extern from "src/aligned_malloc/aligned_malloc.h":
+cdef extern from "include/aligned_malloc.h":
     cdef void *PyArray_realloc_aligned(void *p, size_t n)
     cdef void *PyArray_malloc_aligned(size_t n)
     cdef void *PyArray_calloc_aligned(size_t n, size_t s)
@@ -56,6 +45,7 @@ ctypedef double (*random_double_1)(void *state, double a) nogil
 ctypedef double (*random_double_2)(void *state, double a, double b) nogil
 ctypedef double (*random_double_3)(void *state, double a, double b, double c) nogil
 
+ctypedef double (*random_float_fill)(bitgen_t *state, np.npy_intp count, float* out) nogil
 ctypedef float (*random_float_0)(bitgen_t *state) nogil
 ctypedef float (*random_float_1)(bitgen_t *state, float a) nogil
 
diff --git a/numpy/random/common.pyx b/numpy/random/_common.pyx
index 74cd5f033..ef1afac7c 100644
--- a/numpy/random/common.pyx
+++ b/numpy/random/_common.pyx
@@ -6,7 +6,7 @@ import sys
 import numpy as np
 cimport numpy as np
 
-from .common cimport *
+from libc.stdint cimport uintptr_t
 
 __all__ = ['interface']
 
@@ -262,14 +262,16 @@ cdef object double_fill(void *func, bitgen_t *state, object size, object lock, o
     return out_array
 
 cdef object float_fill(void *func, bitgen_t *state, object size, object lock, object out):
-    cdef random_float_0 random_func = (<random_float_0>func)
+    cdef random_float_fill random_func = (<random_float_fill>func)
+    cdef float out_val
     cdef float *out_array_data
     cdef np.ndarray out_array
     cdef np.npy_intp i, n
 
     if size is None and out is None:
         with lock:
-            return random_func(state)
+            random_func(state, 1, &out_val)
+            return out_val
 
     if out is not None:
         check_output(out, np.float32, size)
@@ -280,8 +282,7 @@ cdef object float_fill(void *func, bitgen_t *state, object size, object lock, ob
     n = np.PyArray_SIZE(out_array)
     out_array_data = <float *>np.PyArray_DATA(out_array)
     with lock, nogil:
-        for i in range(n):
-            out_array_data[i] = random_func(state)
+        random_func(state, n, out_array_data)
     return out_array
 
 cdef object float_fill_from_double(void *func, bitgen_t *state, object size, object lock, object out):
diff --git a/numpy/random/examples/cython/extending.pyx b/numpy/random/_examples/cython/extending.pyx
index a6a4ba4bf..d12c0b919 100644
--- a/numpy/random/examples/cython/extending.pyx
+++ b/numpy/random/_examples/cython/extending.pyx
@@ -8,7 +8,7 @@ import numpy as np
 cimport numpy as np
 cimport cython
 
-from numpy.random.common cimport bitgen_t
+from numpy.random._bit_generator cimport bitgen_t
 from numpy.random import PCG64
 
 np.import_array()
@@ -39,7 +39,7 @@ def uniform_mean(Py_ssize_t n):
     return randoms.mean()
 
 
-# This function is declated nogil so it can be used without the GIL below
+# This function is declared nogil so it can be used without the GIL below
 cdef uint32_t bounded_uint(uint32_t lb, uint32_t ub, bitgen_t *rng) nogil:
     cdef uint32_t mask, delta, val
     mask = delta = ub - lb
diff --git a/numpy/random/examples/cython/extending_distributions.pyx b/numpy/random/_examples/cython/extending_distributions.pyx
index 3cefec97e..3f342c475 100644
--- a/numpy/random/examples/cython/extending_distributions.pyx
+++ b/numpy/random/_examples/cython/extending_distributions.pyx
@@ -1,21 +1,26 @@
 #!/usr/bin/env python
 #cython: language_level=3
 """
-This file shows how the distributions that are accessed through
-distributions.pxd can be used Cython code.
+This file shows how the to use a BitGenerator to create a distribution.
 """
 import numpy as np
 cimport numpy as np
 cimport cython
 from cpython.pycapsule cimport PyCapsule_IsValid, PyCapsule_GetPointer
-from numpy.random.common cimport *
-from numpy.random.distributions cimport random_gauss_zig
+from libc.stdint cimport uint16_t, uint64_t
+from numpy.random._bit_generator cimport bitgen_t
+
 from numpy.random import PCG64
 
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def normals_zig(Py_ssize_t n):
+def uniforms(Py_ssize_t n):
+    """
+    Create an array of `n` uniformly distributed doubles.
+    A 'real' distribution would want to process the values into
+    some non-uniform distribution
+    """
     cdef Py_ssize_t i
     cdef bitgen_t *rng
     cdef const char *capsule_name = "BitGenerator"
@@ -23,37 +28,47 @@ def normals_zig(Py_ssize_t n):
 
     x = PCG64()
     capsule = x.capsule
+    # Optional check that the capsule if from a BitGenerator
     if not PyCapsule_IsValid(capsule, capsule_name):
         raise ValueError("Invalid pointer to anon_func_state")
+    # Cast the pointer
     rng = <bitgen_t *> PyCapsule_GetPointer(capsule, capsule_name)
-    random_values = np.empty(n)
-    # Best practice is to release GIL and acquire the lock
+    random_values = np.empty(n, dtype='float64')
     with x.lock, nogil:
         for i in range(n):
-            random_values[i] = random_gauss_zig(rng)
+            # Call the function
+            random_values[i] = rng.next_double(rng.state)
     randoms = np.asarray(random_values)
-    return randoms
-
 
+    return randoms
+ 
+# cython example 2
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def uniforms(Py_ssize_t n):
+def uint10_uniforms(Py_ssize_t n):
+    """Uniform 10 bit integers stored as 16-bit unsigned integers"""
     cdef Py_ssize_t i
     cdef bitgen_t *rng
     cdef const char *capsule_name = "BitGenerator"
-    cdef double[::1] random_values
+    cdef uint16_t[::1] random_values
+    cdef int bits_remaining
+    cdef int width = 10
+    cdef uint64_t buff, mask = 0x3FF
 
     x = PCG64()
     capsule = x.capsule
-    # Optional check that the capsule if from a BitGenerator
     if not PyCapsule_IsValid(capsule, capsule_name):
         raise ValueError("Invalid pointer to anon_func_state")
-    # Cast the pointer
     rng = <bitgen_t *> PyCapsule_GetPointer(capsule, capsule_name)
-    random_values = np.empty(n)
+    random_values = np.empty(n, dtype='uint16')
+    # Best practice is to release GIL and acquire the lock
+    bits_remaining = 0
     with x.lock, nogil:
         for i in range(n):
-            # Call the function
-            random_values[i] = rng.next_double(rng.state)
+            if bits_remaining < width:
+                buff = rng.next_uint64(rng.state)
+            random_values[i] = buff & mask
+            buff >>= width
+
     randoms = np.asarray(random_values)
     return randoms
diff --git a/numpy/random/_examples/cython/setup.py b/numpy/random/_examples/cython/setup.py
new file mode 100644
index 000000000..19f045fc0
--- /dev/null
+++ b/numpy/random/_examples/cython/setup.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env python3
+"""
+Build the Cython demonstrations of low-level access to NumPy random
+
+Usage: python setup.py build_ext -i
+"""
+
+import numpy as np
+from distutils.core import setup
+from Cython.Build import cythonize
+from setuptools.extension import Extension
+from os.path import join, abspath, dirname
+
+path = abspath(dirname(__file__))
+
+extending = Extension("extending",
+                      sources=[join(path, 'extending.pyx')],
+                      include_dirs=[
+                            np.get_include(),
+                            join(path, '..', '..')
+                        ],
+                      )
+distributions = Extension("extending_distributions",
+                          sources=[join(path, 'extending_distributions.pyx')],
+                          include_dirs=[np.get_include()])
+
+extensions = [extending, distributions]
+
+setup(
+    ext_modules=cythonize(extensions)
+)
diff --git a/numpy/random/_examples/numba/extending.py b/numpy/random/_examples/numba/extending.py
new file mode 100644
index 000000000..0d240596b
--- /dev/null
+++ b/numpy/random/_examples/numba/extending.py
@@ -0,0 +1,84 @@
+import numpy as np
+import numba as nb
+
+from numpy.random import PCG64
+from timeit import timeit
+
+bit_gen = PCG64()
+next_d = bit_gen.cffi.next_double
+state_addr = bit_gen.cffi.state_address
+
+def normals(n, state):
+    out = np.empty(n)
+    for i in range((n + 1) // 2):
+        x1 = 2.0 * next_d(state) - 1.0
+        x2 = 2.0 * next_d(state) - 1.0
+        r2 = x1 * x1 + x2 * x2
+        while r2 >= 1.0 or r2 == 0.0:
+            x1 = 2.0 * next_d(state) - 1.0
+            x2 = 2.0 * next_d(state) - 1.0
+            r2 = x1 * x1 + x2 * x2
+        f = np.sqrt(-2.0 * np.log(r2) / r2)
+        out[2 * i] = f * x1
+        if 2 * i + 1 < n:
+            out[2 * i + 1] = f * x2
+    return out
+
+# Compile using Numba
+normalsj = nb.jit(normals, nopython=True)
+# Must use state address not state with numba
+n = 10000
+
+def numbacall():
+    return normalsj(n, state_addr)
+
+rg = np.random.Generator(PCG64())
+
+def numpycall():
+    return rg.normal(size=n)
+
+# Check that the functions work
+r1 = numbacall()
+r2 = numpycall()
+assert r1.shape == (n,)
+assert r1.shape == r2.shape
+
+t1 = timeit(numbacall, number=1000)
+print('{:.2f} secs for {} PCG64 (Numba/PCG64) gaussian randoms'.format(t1, n))
+t2 = timeit(numpycall, number=1000)
+print('{:.2f} secs for {} PCG64 (NumPy/PCG64) gaussian randoms'.format(t2, n))
+
+# example 2
+
+next_u32 = bit_gen.ctypes.next_uint32
+ctypes_state = bit_gen.ctypes.state
+
+@nb.jit(nopython=True)
+def bounded_uint(lb, ub, state):
+    mask = delta = ub - lb
+    mask |= mask >> 1
+    mask |= mask >> 2
+    mask |= mask >> 4
+    mask |= mask >> 8
+    mask |= mask >> 16
+
+    val = next_u32(state) & mask
+    while val > delta:
+        val = next_u32(state) & mask
+
+    return lb + val
+
+
+print(bounded_uint(323, 2394691, ctypes_state.value))
+
+
+@nb.jit(nopython=True)
+def bounded_uints(lb, ub, n, state):
+    out = np.empty(n, dtype=np.uint32)
+    for i in range(n):
+        out[i] = bounded_uint(lb, ub, state)
+
+
+bounded_uints(323, 2394691, 10000000, ctypes_state.value)
+
+
diff --git a/numpy/random/examples/numba/extending_distributions.py b/numpy/random/_examples/numba/extending_distributions.py
index 9233ccced..7cf8bf0b0 100644
--- a/numpy/random/examples/numba/extending_distributions.py
+++ b/numpy/random/_examples/numba/extending_distributions.py
@@ -1,22 +1,28 @@
 r"""
-On *nix, execute in randomgen/src/distributions
+Building the required library in this example requires a source distribution
+of NumPy or clone of the NumPy git repository since distributions.c is not
+included in binary distributions.
 
+On *nix, execute in numpy/random/src/distributions
+
+export ${PYTHON_VERSION}=3.8 # Python version
 export PYTHON_INCLUDE=#path to Python's include folder, usually \
     ${PYTHON_HOME}/include/python${PYTHON_VERSION}m
 export NUMPY_INCLUDE=#path to numpy's include folder, usually \
     ${PYTHON_HOME}/lib/python${PYTHON_VERSION}/site-packages/numpy/core/include
 gcc -shared -o libdistributions.so -fPIC distributions.c \
     -I${NUMPY_INCLUDE} -I${PYTHON_INCLUDE}
-mv libdistributions.so ../../examples/numba/
+mv libdistributions.so ../../_examples/numba/
 
 On Windows
 
-rem PYTHON_HOME is setup dependent, this is an example
+rem PYTHON_HOME and PYTHON_VERSION are setup dependent, this is an example
 set PYTHON_HOME=c:\Anaconda
+set PYTHON_VERSION=38
 cl.exe /LD .\distributions.c -DDLL_EXPORT \
     -I%PYTHON_HOME%\lib\site-packages\numpy\core\include \
-    -I%PYTHON_HOME%\include %PYTHON_HOME%\libs\python36.lib
-move distributions.dll ../../examples/numba/
+    -I%PYTHON_HOME%\include %PYTHON_HOME%\libs\python%PYTHON_VERSION%.lib
+move distributions.dll ../../_examples/numba/
 """
 import os
 
@@ -35,19 +41,19 @@ else:
     raise RuntimeError('Required DLL/so file was not found.')
 
 ffi.cdef("""
-double random_gauss_zig(void *bitgen_state);
+double random_standard_normal(void *bitgen_state);
 """)
 x = PCG64()
 xffi = x.cffi
 bit_generator = xffi.bit_generator
 
-random_gauss_zig = lib.random_gauss_zig
+random_standard_normal = lib.random_standard_normal
 
 
 def normals(n, bit_generator):
     out = np.empty(n)
     for i in range(n):
-        out[i] = random_gauss_zig(bit_generator)
+        out[i] = random_standard_normal(bit_generator)
     return out
 
 
diff --git a/numpy/random/generator.pyx b/numpy/random/_generator.pyx
index df7485a97..2d8455982 100644
--- a/numpy/random/generator.pyx
+++ b/numpy/random/_generator.pyx
@@ -3,36 +3,159 @@
 import operator
 import warnings
 
-import numpy as np
-from numpy.core.multiarray import normalize_axis_index
-
-from .bounded_integers import _integers_types
-from .pcg64 import PCG64
-
 from cpython.pycapsule cimport PyCapsule_IsValid, PyCapsule_GetPointer
 from cpython cimport (Py_INCREF, PyFloat_AsDouble)
-from libc cimport string
 
 cimport cython
+import numpy as np
 cimport numpy as np
+from numpy.core.multiarray import normalize_axis_index
 
-from .bounded_integers cimport *
-from .common cimport *
-from .distributions cimport *
+from libc cimport string
+from libc.stdint cimport (uint8_t, uint16_t, uint32_t, uint64_t,
+                          int32_t, int64_t, INT64_MAX, SIZE_MAX)
+from ._bounded_integers cimport (_rand_bool, _rand_int32, _rand_int64,
+         _rand_int16, _rand_int8, _rand_uint64, _rand_uint32, _rand_uint16,
+         _rand_uint8, _gen_mask)
+from ._bounded_integers import _integers_types
+from ._pcg64 import PCG64
+from ._bit_generator cimport bitgen_t
+from ._common cimport (POISSON_LAM_MAX, CONS_POSITIVE, CONS_NONE,
+            CONS_NON_NEGATIVE, CONS_BOUNDED_0_1, CONS_BOUNDED_GT_0_1,
+            CONS_GT_1, CONS_POSITIVE_NOT_NAN, CONS_POISSON,
+            double_fill, cont, kahan_sum, cont_broadcast_3, float_fill, cont_f,
+            check_array_constraint, check_constraint, disc, discrete_broadcast_iii,
+        )
+
+
+cdef extern from "numpy/random/distributions.h":
+
+    struct s_binomial_t:
+        int has_binomial
+        double psave
+        int64_t nsave
+        double r
+        double q
+        double fm
+        int64_t m
+        double p1
+        double xm
+        double xl
+        double xr
+        double c
+        double laml
+        double lamr
+        double p2
+        double p3
+        double p4
+
+    ctypedef s_binomial_t binomial_t
+
+    double random_standard_uniform(bitgen_t *bitgen_state) nogil
+    void random_standard_uniform_fill(bitgen_t* bitgen_state, np.npy_intp cnt, double *out) nogil
+    double random_standard_exponential(bitgen_t *bitgen_state) nogil
+    void random_standard_exponential_fill(bitgen_t *bitgen_state, np.npy_intp cnt, double *out) nogil
+    double random_standard_exponential_zig(bitgen_t *bitgen_state) nogil
+    void random_standard_exponential_zig_fill(bitgen_t *bitgen_state, np.npy_intp cnt, double *out) nogil
+    double random_standard_normal(bitgen_t* bitgen_state) nogil
+    void random_standard_normal_fill(bitgen_t *bitgen_state, np.npy_intp count, double *out) nogil
+    void random_standard_normal_fill_f(bitgen_t *bitgen_state, np.npy_intp count, float *out) nogil
+    double random_standard_gamma(bitgen_t *bitgen_state, double shape) nogil
+
+    float random_standard_uniform_f(bitgen_t *bitgen_state) nogil
+    void random_standard_uniform_fill_f(bitgen_t* bitgen_state, np.npy_intp cnt, float *out) nogil
+    float random_standard_exponential_f(bitgen_t *bitgen_state) nogil
+    float random_standard_exponential_zig_f(bitgen_t *bitgen_state) nogil
+    void random_standard_exponential_fill_f(bitgen_t *bitgen_state, np.npy_intp cnt, float *out) nogil
+    void random_standard_exponential_zig_fill_f(bitgen_t *bitgen_state, np.npy_intp cnt, float *out) nogil
+    float random_standard_normal_f(bitgen_t* bitgen_state) nogil
+    float random_standard_gamma_f(bitgen_t *bitgen_state, float shape) nogil
+
+    int64_t random_positive_int64(bitgen_t *bitgen_state) nogil
+    int32_t random_positive_int32(bitgen_t *bitgen_state) nogil
+    int64_t random_positive_int(bitgen_t *bitgen_state) nogil
+    uint64_t random_uint(bitgen_t *bitgen_state) nogil
+
+    double random_normal(bitgen_t *bitgen_state, double loc, double scale) nogil
+
+    double random_gamma(bitgen_t *bitgen_state, double shape, double scale) nogil
+    float random_gamma_f(bitgen_t *bitgen_state, float shape, float scale) nogil
+
+    double random_exponential(bitgen_t *bitgen_state, double scale) nogil
+    double random_uniform(bitgen_t *bitgen_state, double lower, double range) nogil
+    double random_beta(bitgen_t *bitgen_state, double a, double b) nogil
+    double random_chisquare(bitgen_t *bitgen_state, double df) nogil
+    double random_f(bitgen_t *bitgen_state, double dfnum, double dfden) nogil
+    double random_standard_cauchy(bitgen_t *bitgen_state) nogil
+    double random_pareto(bitgen_t *bitgen_state, double a) nogil
+    double random_weibull(bitgen_t *bitgen_state, double a) nogil
+    double random_power(bitgen_t *bitgen_state, double a) nogil
+    double random_laplace(bitgen_t *bitgen_state, double loc, double scale) nogil
+    double random_gumbel(bitgen_t *bitgen_state, double loc, double scale) nogil
+    double random_logistic(bitgen_t *bitgen_state, double loc, double scale) nogil
+    double random_lognormal(bitgen_t *bitgen_state, double mean, double sigma) nogil
+    double random_rayleigh(bitgen_t *bitgen_state, double mode) nogil
+    double random_standard_t(bitgen_t *bitgen_state, double df) nogil
+    double random_noncentral_chisquare(bitgen_t *bitgen_state, double df,
+                                       double nonc) nogil
+    double random_noncentral_f(bitgen_t *bitgen_state, double dfnum,
+                               double dfden, double nonc) nogil
+    double random_wald(bitgen_t *bitgen_state, double mean, double scale) nogil
+    double random_vonmises(bitgen_t *bitgen_state, double mu, double kappa) nogil
+    double random_triangular(bitgen_t *bitgen_state, double left, double mode,
+                             double right) nogil
+
+    int64_t random_poisson(bitgen_t *bitgen_state, double lam) nogil
+    int64_t random_negative_binomial(bitgen_t *bitgen_state, double n, double p) nogil
+    int64_t random_binomial(bitgen_t *bitgen_state, double p, int64_t n, binomial_t *binomial) nogil
+    int64_t random_logseries(bitgen_t *bitgen_state, double p) nogil
+    int64_t random_geometric_search(bitgen_t *bitgen_state, double p) nogil
+    int64_t random_geometric_inversion(bitgen_t *bitgen_state, double p) nogil
+    int64_t random_geometric(bitgen_t *bitgen_state, double p) nogil
+    int64_t random_zipf(bitgen_t *bitgen_state, double a) nogil
+    int64_t random_hypergeometric(bitgen_t *bitgen_state, int64_t good, int64_t bad,
+                                    int64_t sample) nogil
+
+    uint64_t random_interval(bitgen_t *bitgen_state, uint64_t max) nogil
+
+    # Generate random uint64 numbers in closed interval [off, off + rng].
+    uint64_t random_bounded_uint64(bitgen_t *bitgen_state,
+                                   uint64_t off, uint64_t rng,
+                                   uint64_t mask, bint use_masked) nogil
+
+    void random_multinomial(bitgen_t *bitgen_state, int64_t n, int64_t *mnix,
+                            double *pix, np.npy_intp d, binomial_t *binomial) nogil
+
+    int random_mvhg_count(bitgen_t *bitgen_state,
+                          int64_t total,
+                          size_t num_colors, int64_t *colors,
+                          int64_t nsample,
+                          size_t num_variates, int64_t *variates) nogil
+    void random_mvhg_marginals(bitgen_t *bitgen_state,
+                               int64_t total,
+                               size_t num_colors, int64_t *colors,
+                               int64_t nsample,
+                               size_t num_variates, int64_t *variates) nogil
 
+np.import_array()
 
-__all__ = ['Generator', 'beta', 'binomial', 'bytes', 'chisquare', 'choice',
-           'dirichlet', 'exponential', 'f', 'gamma',
-           'geometric', 'gumbel', 'hypergeometric', 'integers', 'laplace',
-           'logistic', 'lognormal', 'logseries', 'multinomial',
-           'multivariate_normal', 'negative_binomial', 'noncentral_chisquare',
-           'noncentral_f', 'normal', 'pareto', 'permutation',
-           'poisson', 'power', 'random',  'rayleigh', 'shuffle',
-           'standard_cauchy', 'standard_exponential', 'standard_gamma',
-           'standard_normal', 'standard_t', 'triangular',
-           'uniform', 'vonmises', 'wald', 'weibull', 'zipf']
 
-np.import_array()
+cdef int64_t _safe_sum_nonneg_int64(size_t num_colors, int64_t *colors):
+    """
+    Sum the values in the array `colors`.
+
+    Return -1 if an overflow occurs.
+    The values in *colors are assumed to be nonnegative.
+    """
+    cdef size_t i
+    cdef int64_t sum
+
+    sum = 0
+    for i in range(num_colors):
+        if colors[i] > INT64_MAX - sum:
+            return -1
+        sum += colors[i]
+    return sum
 
 
 cdef bint _check_bit_generator(object bitgen):
@@ -193,9 +316,9 @@ cdef class Generator:
         cdef double temp
         key = np.dtype(dtype).name
         if key == 'float64':
-            return double_fill(&random_double_fill, &self._bitgen, size, self.lock, out)
+            return double_fill(&random_standard_uniform_fill, &self._bitgen, size, self.lock, out)
         elif key == 'float32':
-            return float_fill(&random_float, &self._bitgen, size, self.lock, out)
+            return float_fill(&random_standard_uniform_fill_f, &self._bitgen, size, self.lock, out)
         else:
             raise TypeError('Unsupported dtype "%s" for random' % key)
 
@@ -341,9 +464,9 @@ cdef class Generator:
                 return double_fill(&random_standard_exponential_fill, &self._bitgen, size, self.lock, out)
         elif key == 'float32':
             if method == u'zig':
-                return float_fill(&random_standard_exponential_zig_f, &self._bitgen, size, self.lock, out)
+                return float_fill(&random_standard_exponential_zig_fill_f, &self._bitgen, size, self.lock, out)
             else:
-                return float_fill(&random_standard_exponential_f, &self._bitgen, size, self.lock, out)
+                return float_fill(&random_standard_exponential_fill_f, &self._bitgen, size, self.lock, out)
         else:
             raise TypeError('Unsupported dtype "%s" for standard_exponential'
                             % key)
@@ -379,7 +502,7 @@ cdef class Generator:
             Desired dtype of the result. All dtypes are determined by their
             name, i.e., 'int64', 'int', etc, so byteorder is not available
             and a specific precision may have different C types depending
-            on the platform. The default value is 'np.int'.
+            on the platform. The default value is `np.int_`.
         endpoint : bool, optional
             If true, sample from the interval [low, high] instead of the
             default [low, high)
@@ -472,7 +595,7 @@ cdef class Generator:
         elif key == 'bool':
             ret = _rand_bool(low, high, size, _masked, endpoint, &self._bitgen, self.lock)
 
-        if size is None and dtype in (np.bool, np.int, np.long):
+        if size is None and dtype in (bool, int, np.compat.long):
             if np.array(ret).shape == ():
                 return dtype(ret)
         return ret
@@ -781,7 +904,6 @@ cdef class Generator:
         --------
         integers : Discrete uniform distribution, yielding integers.
         random : Floats uniformly distributed over ``[0, 1)``.
-        random : Alias for `random`.
 
         Notes
         -----
@@ -920,9 +1042,9 @@ cdef class Generator:
         """
         key = np.dtype(dtype).name
         if key == 'float64':
-            return double_fill(&random_gauss_zig_fill, &self._bitgen, size, self.lock, out)
+            return double_fill(&random_standard_normal_fill, &self._bitgen, size, self.lock, out)
         elif key == 'float32':
-            return float_fill(&random_gauss_zig_f, &self._bitgen, size, self.lock, out)
+            return float_fill(&random_standard_normal_fill_f, &self._bitgen, size, self.lock, out)
 
         else:
             raise TypeError('Unsupported dtype "%s" for standard_normal' % key)
@@ -1023,7 +1145,7 @@ cdef class Generator:
                [ 0.39924804,  4.68456316,  4.99394529,  4.84057254]])  # random
 
         """
-        return cont(&random_normal_zig, &self._bitgen, size, self.lock, 2,
+        return cont(&random_normal, &self._bitgen, size, self.lock, 2,
                     loc, '', CONS_NONE,
                     scale, 'scale', CONS_NON_NEGATIVE,
                     0.0, '', CONS_NONE,
@@ -1109,13 +1231,13 @@ cdef class Generator:
         cdef void *func
         key = np.dtype(dtype).name
         if key == 'float64':
-            return cont(&random_standard_gamma_zig, &self._bitgen, size, self.lock, 1,
+            return cont(&random_standard_gamma, &self._bitgen, size, self.lock, 1,
                         shape, 'shape', CONS_NON_NEGATIVE,
                         0.0, '', CONS_NONE,
                         0.0, '', CONS_NONE,
                         out)
         if key == 'float32':
-            return cont_f(&random_standard_gamma_zig_f, &self._bitgen, size, self.lock,
+            return cont_f(&random_standard_gamma_f, &self._bitgen, size, self.lock,
                           shape, 'shape', CONS_NON_NEGATIVE,
                           out)
         else:
@@ -3147,6 +3269,8 @@ cdef class Generator:
 
         See Also
         --------
+        multivariate_hypergeometric : Draw samples from the multivariate
+            hypergeometric distribution.
         scipy.stats.hypergeom : probability density function, distribution or
             cumulative density function, etc.
 
@@ -3332,7 +3456,7 @@ cdef class Generator:
 
     # Multivariate distributions:
     def multivariate_normal(self, mean, cov, size=None, check_valid='warn',
-                            tol=1e-8):
+                            tol=1e-8, *, method='svd'):
         """
         multivariate_normal(mean, cov, size=None, check_valid='warn', tol=1e-8)
 
@@ -3362,6 +3486,15 @@ cdef class Generator:
         tol : float, optional
             Tolerance when checking the singular values in covariance matrix.
             cov is cast to double before the check.
+        method : { 'svd', 'eigh', 'cholesky'}, optional
+            The cov input is used to compute a factor matrix A such that
+            ``A @ A.T = cov``. This argument is used to select the method
+            used to compute the factor matrix A. The default method 'svd' is
+            the slowest, while 'cholesky' is the fastest but less robust than
+            the slowest method. The method `eigh` uses eigen decomposition to
+            compute A and is faster than svd but slower than cholesky.
+
+            .. versionadded:: 1.18.0
 
         Returns
         -------
@@ -3422,10 +3555,16 @@ cdef class Generator:
         --------
         >>> mean = (1, 2)
         >>> cov = [[1, 0], [0, 1]]
-        >>> x = np.random.default_rng().multivariate_normal(mean, cov, (3, 3))
+        >>> rng = np.random.default_rng()
+        >>> x = rng.multivariate_normal(mean, cov, (3, 3))
         >>> x.shape
         (3, 3, 2)
 
+        We can use a different method other than the default to factorize cov:
+        >>> y = rng.multivariate_normal(mean, cov, (3, 3), method='cholesky')
+        >>> y.shape
+        (3, 3, 2)
+
         The following is probably true, given that 0.6 is roughly twice the
         standard deviation:
 
@@ -3433,7 +3572,9 @@ cdef class Generator:
         [True, True] # random
 
         """
-        from numpy.dual import svd
+        if method not in {'eigh', 'svd', 'cholesky'}:
+            raise ValueError(
+                "method must be one of {'eigh', 'svd', 'cholesky'}")
 
         # Check preconditions on arguments
         mean = np.array(mean)
@@ -3476,13 +3617,27 @@ cdef class Generator:
 
         # GH10839, ensure double to make tol meaningful
         cov = cov.astype(np.double)
-        (u, s, v) = svd(cov)
+        if method == 'svd':
+            from numpy.dual import svd
+            (u, s, vh) = svd(cov)
+        elif method == 'eigh':
+            from numpy.dual import eigh
+            # could call linalg.svd(hermitian=True), but that calculates a vh we don't need
+            (s, u)  = eigh(cov)
+        else:
+            from numpy.dual import cholesky
+            l = cholesky(cov)
 
-        if check_valid != 'ignore':
+        # make sure check_valid is ignored whe method == 'cholesky'
+        # since the decomposition will have failed if cov is not valid.
+        if check_valid != 'ignore' and method != 'cholesky':
             if check_valid != 'warn' and check_valid != 'raise':
-                raise ValueError("check_valid must equal 'warn', 'raise', or 'ignore'")
-
-            psd = np.allclose(np.dot(v.T * s, v), cov, rtol=tol, atol=tol)
+                raise ValueError(
+                    "check_valid must equal 'warn', 'raise', or 'ignore'")
+            if method == 'svd':
+                psd = np.allclose(np.dot(vh.T * s, vh), cov, rtol=tol, atol=tol)
+            else:
+                psd = not np.any(s < -tol)
             if not psd:
                 if check_valid == 'warn':
                     warnings.warn("covariance is not positive-semidefinite.",
@@ -3490,7 +3645,17 @@ cdef class Generator:
                 else:
                     raise ValueError("covariance is not positive-semidefinite.")
 
-        x = np.dot(x, np.sqrt(s)[:, None] * v)
+        if method == 'cholesky':
+            _factor = l
+        elif method == 'eigh':
+            # if check_valid == 'ignore' we need to ensure that np.sqrt does not
+            # return a NaN if s is a very small negative number that is
+            # approximately zero or when the covariance is not positive-semidefinite
+            _factor = u * np.sqrt(abs(s))
+        else:
+            _factor = np.sqrt(s)[:, None] * vh
+
+        x = np.dot(x, _factor)
         x += mean
         x.shape = tuple(final_shape)
         return x
@@ -3645,6 +3810,222 @@ cdef class Generator:
 
         return multin
 
+    def multivariate_hypergeometric(self, object colors, object nsample,
+                                    size=None, method='marginals'):
+        """
+        multivariate_hypergeometric(colors, nsample, size=None,
+                                    method='marginals')
+
+        Generate variates from a multivariate hypergeometric distribution.
+
+        The multivariate hypergeometric distribution is a generalization
+        of the hypergeometric distribution.
+
+        Choose ``nsample`` items at random without replacement from a
+        collection with ``N`` distinct types.  ``N`` is the length of
+        ``colors``, and the values in ``colors`` are the number of occurrences
+        of that type in the collection.  The total number of items in the
+        collection is ``sum(colors)``.  Each random variate generated by this
+        function is a vector of length ``N`` holding the counts of the
+        different types that occurred in the ``nsample`` items.
+
+        The name ``colors`` comes from a common description of the
+        distribution: it is the probability distribution of the number of
+        marbles of each color selected without replacement from an urn
+        containing marbles of different colors; ``colors[i]`` is the number
+        of marbles in the urn with color ``i``.
+
+        Parameters
+        ----------
+        colors : sequence of integers
+            The number of each type of item in the collection from which
+            a sample is drawn.  The values in ``colors`` must be nonnegative.
+            To avoid loss of precision in the algorithm, ``sum(colors)``
+            must be less than ``10**9`` when `method` is "marginals".
+        nsample : int
+            The number of items selected.  ``nsample`` must not be greater
+            than ``sum(colors)``.
+        size : int or tuple of ints, optional
+            The number of variates to generate, either an integer or a tuple
+            holding the shape of the array of variates.  If the given size is,
+            e.g., ``(k, m)``, then ``k * m`` variates are drawn, where one
+            variate is a vector of length ``len(colors)``, and the return value
+            has shape ``(k, m, len(colors))``.  If `size` is an integer, the
+            output has shape ``(size, len(colors))``.  Default is None, in
+            which case a single variate is returned as an array with shape
+            ``(len(colors),)``.
+        method : string, optional
+            Specify the algorithm that is used to generate the variates.
+            Must be 'count' or 'marginals' (the default).  See the Notes
+            for a description of the methods.
+
+        Returns
+        -------
+        variates : ndarray
+            Array of variates drawn from the multivariate hypergeometric
+            distribution.
+
+        See Also
+        --------
+        hypergeometric : Draw samples from the (univariate) hypergeometric
+            distribution.
+
+        Notes
+        -----
+        The two methods do not return the same sequence of variates.
+
+        The "count" algorithm is roughly equivalent to the following numpy
+        code::
+
+            choices = np.repeat(np.arange(len(colors)), colors)
+            selection = np.random.choice(choices, nsample, replace=False)
+            variate = np.bincount(selection, minlength=len(colors))
+
+        The "count" algorithm uses a temporary array of integers with length
+        ``sum(colors)``.
+
+        The "marginals" algorithm generates a variate by using repeated
+        calls to the univariate hypergeometric sampler.  It is roughly
+        equivalent to::
+
+            variate = np.zeros(len(colors), dtype=np.int64)
+            # `remaining` is the cumulative sum of `colors` from the last
+            # element to the first; e.g. if `colors` is [3, 1, 5], then
+            # `remaining` is [9, 6, 5].
+            remaining = np.cumsum(colors[::-1])[::-1]
+            for i in range(len(colors)-1):
+                if nsample < 1:
+                    break
+                variate[i] = hypergeometric(colors[i], remaining[i+1],
+                                           nsample)
+                nsample -= variate[i]
+            variate[-1] = nsample
+
+        The default method is "marginals".  For some cases (e.g. when
+        `colors` contains relatively small integers), the "count" method
+        can be significantly faster than the "marginals" method.  If
+        performance of the algorithm is important, test the two methods
+        with typical inputs to decide which works best.
+
+        .. versionadded:: 1.18.0
+
+        Examples
+        --------
+        >>> colors = [16, 8, 4]
+        >>> seed = 4861946401452
+        >>> gen = np.random.Generator(np.random.PCG64(seed))
+        >>> gen.multivariate_hypergeometric(colors, 6)
+        array([5, 0, 1])
+        >>> gen.multivariate_hypergeometric(colors, 6, size=3)
+        array([[5, 0, 1],
+               [2, 2, 2],
+               [3, 3, 0]])
+        >>> gen.multivariate_hypergeometric(colors, 6, size=(2, 2))
+        array([[[3, 2, 1],
+                [3, 2, 1]],
+               [[4, 1, 1],
+                [3, 2, 1]]])
+        """
+        cdef int64_t nsamp
+        cdef size_t num_colors
+        cdef int64_t total
+        cdef int64_t *colors_ptr
+        cdef int64_t max_index
+        cdef size_t num_variates
+        cdef int64_t *variates_ptr
+        cdef int result
+
+        if method not in ['count', 'marginals']:
+            raise ValueError('method must be "count" or "marginals".')
+
+        try:
+            operator.index(nsample)
+        except TypeError:
+            raise ValueError('nsample must be an integer')
+
+        if nsample < 0:
+            raise ValueError("nsample must be nonnegative.")
+        if nsample > INT64_MAX:
+            raise ValueError("nsample must not exceed %d" % INT64_MAX)
+        nsamp = nsample
+
+        # Validation of colors, a 1-d sequence of nonnegative integers.
+        invalid_colors = False
+        try:
+            colors = np.asarray(colors)
+            if colors.ndim != 1:
+                invalid_colors = True
+            elif colors.size > 0 and not np.issubdtype(colors.dtype,
+                                                       np.integer):
+                invalid_colors = True
+            elif np.any((colors < 0) | (colors > INT64_MAX)):
+                invalid_colors = True
+        except ValueError:
+            invalid_colors = True
+        if invalid_colors:
+            raise ValueError('colors must be a one-dimensional sequence '
+                             'of nonnegative integers not exceeding %d.' %
+                             INT64_MAX)
+
+        colors = np.ascontiguousarray(colors, dtype=np.int64)
+        num_colors = colors.size
+
+        colors_ptr = <int64_t *> np.PyArray_DATA(colors)
+
+        total = _safe_sum_nonneg_int64(num_colors, colors_ptr)
+        if total == -1:
+            raise ValueError("sum(colors) must not exceed the maximum value "
+                             "of a 64 bit signed integer (%d)" % INT64_MAX)
+
+        if method == 'marginals' and total >= 1000000000:
+            raise ValueError('When method is "marginals", sum(colors) must '
+                             'be less than 1000000000.')
+
+        # The C code that implements the 'count' method will malloc an
+        # array of size total*sizeof(size_t). Here we ensure that that
+        # product does not overflow.
+        if SIZE_MAX > <uint64_t>INT64_MAX:
+            max_index = INT64_MAX // sizeof(size_t)
+        else:
+            max_index = SIZE_MAX // sizeof(size_t)
+        if method == 'count' and total > max_index:
+            raise ValueError("When method is 'count', sum(colors) must not "
+                             "exceed %d" % max_index)
+        if nsamp > total:
+            raise ValueError("nsample > sum(colors)")
+
+        # Figure out the shape of the return array.
+        if size is None:
+            shape = (num_colors,)
+        elif np.isscalar(size):
+            shape = (size, num_colors)
+        else:
+            shape = tuple(size) + (num_colors,)
+        variates = np.zeros(shape, dtype=np.int64)
+
+        if num_colors == 0:
+            return variates
+
+        # One variate is a vector of length num_colors.
+        num_variates = variates.size // num_colors
+        variates_ptr = <int64_t *> np.PyArray_DATA(variates)
+
+        if method == 'count':
+            with self.lock, nogil:
+                result = random_mvhg_count(&self._bitgen, total,
+                                           num_colors, colors_ptr, nsamp,
+                                           num_variates, variates_ptr)
+            if result == -1:
+                raise MemoryError("Insufficent memory for multivariate_"
+                                  "hypergeometric with method='count' and "
+                                  "sum(colors)=%d" % total)
+        else:
+            with self.lock, nogil:
+                random_mvhg_marginals(&self._bitgen, total,
+                                      num_colors, colors_ptr, nsamp,
+                                      num_variates, variates_ptr)
+        return variates
+
     def dirichlet(self, object alpha, size=None):
         """
         dirichlet(alpha, size=None)
@@ -3773,7 +4154,7 @@ cdef class Generator:
             while i < totsize:
                 acc = 0.0
                 for j in range(k):
-                    val_data[i+j] = random_standard_gamma_zig(&self._bitgen,
+                    val_data[i+j] = random_standard_gamma(&self._bitgen,
                                                               alpha_data[j])
                     acc = acc + val_data[i + j]
                 invacc = 1/acc
@@ -4003,21 +4384,24 @@ def default_rng(seed=None):
 
     Parameters
     ----------
-    seed : {None, int, array_like[ints], ISeedSequence, BitGenerator, Generator}, optional
+    seed : {None, int, array_like[ints], SeedSequence, BitGenerator, Generator}, optional
         A seed to initialize the `BitGenerator`. If None, then fresh,
         unpredictable entropy will be pulled from the OS. If an ``int`` or
         ``array_like[ints]`` is passed, then it will be passed to
         `SeedSequence` to derive the initial `BitGenerator` state. One may also
-        pass in an implementor of the `ISeedSequence` interface like
-        `SeedSequence`.
+        pass in a`SeedSequence` instance
         Additionally, when passed a `BitGenerator`, it will be wrapped by
         `Generator`. If passed a `Generator`, it will be returned unaltered.
 
+    Returns
+    -------
+    Generator
+        The initialized generator object.
+
     Notes
     -----
-    When `seed` is omitted or ``None``, a new `BitGenerator` and `Generator` will
-    be instantiated each time. This function does not manage a default global
-    instance.
+    If ``seed`` is not a `BitGenerator` or a `Generator`, a new `BitGenerator`
+    is instantiated. This function does not manage a default global instance.
     """
     if _check_bit_generator(seed):
         # We were passed a BitGenerator, so just wrap it up.
diff --git a/numpy/random/mt19937.pyx b/numpy/random/_mt19937.pyx
index 7d0f6cd22..e99652b73 100644
--- a/numpy/random/mt19937.pyx
+++ b/numpy/random/_mt19937.pyx
@@ -3,8 +3,8 @@ import operator
 import numpy as np
 cimport numpy as np
 
-from .common cimport *
-from .bit_generator cimport BitGenerator, SeedSequence
+from libc.stdint cimport uint32_t, uint64_t
+from ._bit_generator cimport BitGenerator, SeedSequence
 
 __all__ = ['MT19937']
 
@@ -48,13 +48,12 @@ cdef class MT19937(BitGenerator):
 
     Parameters
     ----------
-    seed : {None, int, array_like[ints], ISeedSequence}, optional
+    seed : {None, int, array_like[ints], SeedSequence}, optional
         A seed to initialize the `BitGenerator`. If None, then fresh,
         unpredictable entropy will be pulled from the OS. If an ``int`` or
         ``array_like[ints]`` is passed, then it will be passed to
         `SeedSequence` to derive the initial `BitGenerator` state. One may also
-        pass in an implementor of the `ISeedSequence` interface like
-        `SeedSequence`.
+        pass in a `SeedSequence` instance.
 
     Attributes
     ----------
diff --git a/numpy/random/pcg64.pyx b/numpy/random/_pcg64.pyx
index 585520139..1a5d852a2 100644
--- a/numpy/random/pcg64.pyx
+++ b/numpy/random/_pcg64.pyx
@@ -1,8 +1,9 @@
 import numpy as np
 cimport numpy as np
 
-from .common cimport *
-from .bit_generator cimport BitGenerator
+from libc.stdint cimport uint32_t, uint64_t
+from ._common cimport uint64_to_double, wrap_int
+from ._bit_generator cimport BitGenerator
 
 __all__ = ['PCG64']
 
@@ -43,13 +44,12 @@ cdef class PCG64(BitGenerator):
 
     Parameters
     ----------
-    seed : {None, int, array_like[ints], ISeedSequence}, optional
+    seed : {None, int, array_like[ints], SeedSequence}, optional
         A seed to initialize the `BitGenerator`. If None, then fresh,
         unpredictable entropy will be pulled from the OS. If an ``int`` or
         ``array_like[ints]`` is passed, then it will be passed to
         `SeedSequence` to derive the initial `BitGenerator` state. One may also
-        pass in an implementor of the `ISeedSequence` interface like
-        `SeedSequence`.
+        pass in a `SeedSequence` instance.
 
     Notes
     -----
diff --git a/numpy/random/philox.pyx b/numpy/random/_philox.pyx
index 8b7683017..9f136c32f 100644
--- a/numpy/random/philox.pyx
+++ b/numpy/random/_philox.pyx
@@ -6,9 +6,11 @@ except ImportError:
     from dummy_threading import Lock
 
 import numpy as np
+cimport numpy as np
 
-from .common cimport *
-from .bit_generator cimport BitGenerator
+from libc.stdint cimport uint32_t, uint64_t
+from ._common cimport uint64_to_double, int_to_array, wrap_int
+from ._bit_generator cimport BitGenerator
 
 __all__ = ['Philox']
 
@@ -62,21 +64,20 @@ cdef class Philox(BitGenerator):
 
     Parameters
     ----------
-    seed : {None, int, array_like[ints], ISeedSequence}, optional
+    seed : {None, int, array_like[ints], SeedSequence}, optional
         A seed to initialize the `BitGenerator`. If None, then fresh,
         unpredictable entropy will be pulled from the OS. If an ``int`` or
         ``array_like[ints]`` is passed, then it will be passed to
         `SeedSequence` to derive the initial `BitGenerator` state. One may also
-        pass in an implementor of the `ISeedSequence` interface like
-        `SeedSequence`.
+        pass in a `SeedSequence` instance.
     counter : {None, int, array_like}, optional
         Counter to use in the Philox state. Can be either
         a Python int (long in 2.x) in [0, 2**256) or a 4-element uint64 array.
         If not provided, the RNG is initialized at 0.
     key : {None, int, array_like}, optional
-        Key to use in the Philox state.  Unlike seed, the value in key is
+        Key to use in the Philox state.  Unlike ``seed``, the value in key is
         directly set. Can be either a Python int in [0, 2**128) or a 2-element
-        uint64 array. `key` and `seed` cannot both be used.
+        uint64 array. `key` and ``seed`` cannot both be used.
 
     Attributes
     ----------
@@ -108,10 +109,10 @@ cdef class Philox(BitGenerator):
     randoms produced. The second is a key which determined the sequence
     produced. Using different keys produces independent sequences.
 
-    The input seed is processed by `SeedSequence` to generate the key. The
+    The input ``seed`` is processed by `SeedSequence` to generate the key. The
     counter is set to 0.
 
-    Alternately, one can omit the seed parameter and set the ``key`` and
+    Alternately, one can omit the ``seed`` parameter and set the ``key`` and
     ``counter`` directly.
 
     **Parallel Features**
@@ -146,7 +147,7 @@ cdef class Philox(BitGenerator):
 
     **Compatibility Guarantee**
 
-    ``Philox`` makes a guarantee that a fixed seed will always produce
+    ``Philox`` makes a guarantee that a fixed ``seed`` will always produce
     the same random integer stream.
 
     Examples
diff --git a/numpy/random/_pickle.py b/numpy/random/_pickle.py
index 3b58f21e8..29ff69644 100644
--- a/numpy/random/_pickle.py
+++ b/numpy/random/_pickle.py
@@ -1,10 +1,10 @@
 from .mtrand import RandomState
-from .philox import Philox
-from .pcg64 import PCG64
-from .sfc64 import SFC64
+from ._philox import Philox
+from ._pcg64 import PCG64
+from ._sfc64 import SFC64
 
-from .generator import Generator
-from .mt19937 import MT19937
+from ._generator import Generator
+from ._mt19937 import MT19937
 
 BitGenerators = {'MT19937': MT19937,
                  'PCG64': PCG64,
diff --git a/numpy/random/sfc64.pyx b/numpy/random/_sfc64.pyx
index a881096e9..1633669d5 100644
--- a/numpy/random/sfc64.pyx
+++ b/numpy/random/_sfc64.pyx
@@ -1,8 +1,9 @@
 import numpy as np
 cimport numpy as np
 
-from .common cimport *
-from .bit_generator cimport BitGenerator
+from libc.stdint cimport uint32_t, uint64_t
+from ._common cimport uint64_to_double
+from ._bit_generator cimport BitGenerator
 
 __all__ = ['SFC64']
 
@@ -38,13 +39,12 @@ cdef class SFC64(BitGenerator):
 
     Parameters
     ----------
-    seed : {None, int, array_like[ints], ISeedSequence}, optional
+    seed : {None, int, array_like[ints], SeedSequence}, optional
         A seed to initialize the `BitGenerator`. If None, then fresh,
         unpredictable entropy will be pulled from the OS. If an ``int`` or
         ``array_like[ints]`` is passed, then it will be passed to
         `SeedSequence` to derive the initial `BitGenerator` state. One may also
-        pass in an implementor of the `ISeedSequence` interface like
-        `SeedSequence`.
+        pass in a `SeedSequence` instance.
 
     Notes
     -----
diff --git a/numpy/random/distributions.pxd b/numpy/random/distributions.pxd
deleted file mode 100644
index 75edaee9d..000000000
--- a/numpy/random/distributions.pxd
+++ /dev/null
@@ -1,140 +0,0 @@
-#cython: language_level=3
-
-from .common cimport (uint8_t, uint16_t, uint32_t, uint64_t,
-                          int32_t, int64_t, bitgen_t)
-import numpy as np
-cimport numpy as np
-
-cdef extern from "src/distributions/distributions.h":
-
-    struct s_binomial_t:
-        int has_binomial
-        double psave
-        int64_t nsave
-        double r
-        double q
-        double fm
-        int64_t m
-        double p1
-        double xm
-        double xl
-        double xr
-        double c
-        double laml
-        double lamr
-        double p2
-        double p3
-        double p4
-
-    ctypedef s_binomial_t binomial_t
-
-    double random_double(bitgen_t *bitgen_state) nogil
-    void random_double_fill(bitgen_t* bitgen_state, np.npy_intp cnt, double *out) nogil
-    double random_standard_exponential(bitgen_t *bitgen_state) nogil
-    void random_standard_exponential_fill(bitgen_t *bitgen_state, np.npy_intp cnt, double *out) nogil
-    double random_standard_exponential_zig(bitgen_t *bitgen_state) nogil
-    void random_standard_exponential_zig_fill(bitgen_t *bitgen_state, np.npy_intp cnt, double *out) nogil
-    double random_gauss_zig(bitgen_t* bitgen_state) nogil
-    void random_gauss_zig_fill(bitgen_t *bitgen_state, np.npy_intp count, double *out) nogil
-    double random_standard_gamma_zig(bitgen_t *bitgen_state, double shape) nogil
-
-    float random_float(bitgen_t *bitgen_state) nogil
-    float random_standard_exponential_f(bitgen_t *bitgen_state) nogil
-    float random_standard_exponential_zig_f(bitgen_t *bitgen_state) nogil
-    float random_gauss_zig_f(bitgen_t* bitgen_state) nogil
-    float random_standard_gamma_f(bitgen_t *bitgen_state, float shape) nogil
-    float random_standard_gamma_zig_f(bitgen_t *bitgen_state, float shape) nogil
-
-    int64_t random_positive_int64(bitgen_t *bitgen_state) nogil
-    int32_t random_positive_int32(bitgen_t *bitgen_state) nogil
-    int64_t random_positive_int(bitgen_t *bitgen_state) nogil
-    uint64_t random_uint(bitgen_t *bitgen_state) nogil
-
-    double random_normal_zig(bitgen_t *bitgen_state, double loc, double scale) nogil
-
-    double random_gamma(bitgen_t *bitgen_state, double shape, double scale) nogil
-    float random_gamma_float(bitgen_t *bitgen_state, float shape, float scale) nogil
-
-    double random_exponential(bitgen_t *bitgen_state, double scale) nogil
-    double random_uniform(bitgen_t *bitgen_state, double lower, double range) nogil
-    double random_beta(bitgen_t *bitgen_state, double a, double b) nogil
-    double random_chisquare(bitgen_t *bitgen_state, double df) nogil
-    double random_f(bitgen_t *bitgen_state, double dfnum, double dfden) nogil
-    double random_standard_cauchy(bitgen_t *bitgen_state) nogil
-    double random_pareto(bitgen_t *bitgen_state, double a) nogil
-    double random_weibull(bitgen_t *bitgen_state, double a) nogil
-    double random_power(bitgen_t *bitgen_state, double a) nogil
-    double random_laplace(bitgen_t *bitgen_state, double loc, double scale) nogil
-    double random_gumbel(bitgen_t *bitgen_state, double loc, double scale) nogil
-    double random_logistic(bitgen_t *bitgen_state, double loc, double scale) nogil
-    double random_lognormal(bitgen_t *bitgen_state, double mean, double sigma) nogil
-    double random_rayleigh(bitgen_t *bitgen_state, double mode) nogil
-    double random_standard_t(bitgen_t *bitgen_state, double df) nogil
-    double random_noncentral_chisquare(bitgen_t *bitgen_state, double df,
-                                       double nonc) nogil
-    double random_noncentral_f(bitgen_t *bitgen_state, double dfnum,
-                               double dfden, double nonc) nogil
-    double random_wald(bitgen_t *bitgen_state, double mean, double scale) nogil
-    double random_vonmises(bitgen_t *bitgen_state, double mu, double kappa) nogil
-    double random_triangular(bitgen_t *bitgen_state, double left, double mode,
-                             double right) nogil
-
-    int64_t random_poisson(bitgen_t *bitgen_state, double lam) nogil
-    int64_t random_negative_binomial(bitgen_t *bitgen_state, double n, double p) nogil
-    int64_t random_binomial(bitgen_t *bitgen_state, double p, int64_t n, binomial_t *binomial) nogil
-    int64_t random_logseries(bitgen_t *bitgen_state, double p) nogil
-    int64_t random_geometric_search(bitgen_t *bitgen_state, double p) nogil
-    int64_t random_geometric_inversion(bitgen_t *bitgen_state, double p) nogil
-    int64_t random_geometric(bitgen_t *bitgen_state, double p) nogil
-    int64_t random_zipf(bitgen_t *bitgen_state, double a) nogil
-    int64_t random_hypergeometric(bitgen_t *bitgen_state, int64_t good, int64_t bad,
-                                    int64_t sample) nogil
-
-    uint64_t random_interval(bitgen_t *bitgen_state, uint64_t max) nogil
-
-    # Generate random uint64 numbers in closed interval [off, off + rng].
-    uint64_t random_bounded_uint64(bitgen_t *bitgen_state,
-                                   uint64_t off, uint64_t rng,
-                                   uint64_t mask, bint use_masked) nogil
-
-    # Generate random uint32 numbers in closed interval [off, off + rng].
-    uint32_t random_buffered_bounded_uint32(bitgen_t *bitgen_state,
-                                            uint32_t off, uint32_t rng,
-                                            uint32_t mask, bint use_masked,
-                                            int *bcnt, uint32_t *buf) nogil
-    uint16_t random_buffered_bounded_uint16(bitgen_t *bitgen_state,
-                                            uint16_t off, uint16_t rng,
-                                            uint16_t mask, bint use_masked,
-                                            int *bcnt, uint32_t *buf) nogil
-    uint8_t random_buffered_bounded_uint8(bitgen_t *bitgen_state,
-                                          uint8_t off, uint8_t rng,
-                                          uint8_t mask, bint use_masked,
-                                          int *bcnt, uint32_t *buf) nogil
-    np.npy_bool random_buffered_bounded_bool(bitgen_t *bitgen_state,
-                                             np.npy_bool off, np.npy_bool rng,
-                                             np.npy_bool mask, bint use_masked,
-                                             int *bcnt, uint32_t *buf) nogil
-
-    void random_bounded_uint64_fill(bitgen_t *bitgen_state,
-                                    uint64_t off, uint64_t rng, np.npy_intp cnt,
-                                    bint use_masked,
-                                    uint64_t *out) nogil
-    void random_bounded_uint32_fill(bitgen_t *bitgen_state,
-                                    uint32_t off, uint32_t rng, np.npy_intp cnt,
-                                    bint use_masked,
-                                    uint32_t *out) nogil
-    void random_bounded_uint16_fill(bitgen_t *bitgen_state,
-                                    uint16_t off, uint16_t rng, np.npy_intp cnt,
-                                    bint use_masked,
-                                    uint16_t *out) nogil
-    void random_bounded_uint8_fill(bitgen_t *bitgen_state,
-                                   uint8_t off, uint8_t rng, np.npy_intp cnt,
-                                   bint use_masked,
-                                   uint8_t *out) nogil
-    void random_bounded_bool_fill(bitgen_t *bitgen_state,
-                                  np.npy_bool off, np.npy_bool rng, np.npy_intp cnt,
-                                  bint use_masked,
-                                  np.npy_bool *out) nogil
-
-    void random_multinomial(bitgen_t *bitgen_state, int64_t n, int64_t *mnix,
-                            double *pix, np.npy_intp d, binomial_t *binomial) nogil
diff --git a/numpy/random/examples/cython/setup.py b/numpy/random/examples/cython/setup.py
deleted file mode 100644
index 69f057ed5..000000000
--- a/numpy/random/examples/cython/setup.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env python3
-"""
-Build the demos
-
-Usage: python setup.py build_ext -i
-"""
-
-import numpy as np
-from distutils.core import setup
-from Cython.Build import cythonize
-from setuptools.extension import Extension
-from os.path import join
-
-extending = Extension("extending",
-                      sources=['extending.pyx'],
-                      include_dirs=[np.get_include()])
-distributions = Extension("extending_distributions",
-                          sources=['extending_distributions.pyx',
-                                   join('..', '..', 'src',
-                                        'distributions', 'distributions.c')],
-                          include_dirs=[np.get_include()])
-
-extensions = [extending, distributions]
-
-setup(
-    ext_modules=cythonize(extensions)
-)
diff --git a/numpy/random/examples/numba/extending.py b/numpy/random/examples/numba/extending.py
deleted file mode 100644
index d41c2d76f..000000000
--- a/numpy/random/examples/numba/extending.py
+++ /dev/null
@@ -1,77 +0,0 @@
-import datetime as dt
-
-import numpy as np
-import numba as nb
-
-from numpy.random import PCG64
-
-x = PCG64()
-f = x.ctypes.next_uint32
-s = x.ctypes.state
-
-
-@nb.jit(nopython=True)
-def bounded_uint(lb, ub, state):
-    mask = delta = ub - lb
-    mask |= mask >> 1
-    mask |= mask >> 2
-    mask |= mask >> 4
-    mask |= mask >> 8
-    mask |= mask >> 16
-
-    val = f(state) & mask
-    while val > delta:
-        val = f(state) & mask
-
-    return lb + val
-
-
-print(bounded_uint(323, 2394691, s.value))
-
-
-@nb.jit(nopython=True)
-def bounded_uints(lb, ub, n, state):
-    out = np.empty(n, dtype=np.uint32)
-    for i in range(n):
-        out[i] = bounded_uint(lb, ub, state)
-
-
-bounded_uints(323, 2394691, 10000000, s.value)
-
-g = x.cffi.next_double
-cffi_state = x.cffi.state
-state_addr = x.cffi.state_address
-
-
-def normals(n, state):
-    out = np.empty(n)
-    for i in range((n + 1) // 2):
-        x1 = 2.0 * g(state) - 1.0
-        x2 = 2.0 * g(state) - 1.0
-        r2 = x1 * x1 + x2 * x2
-        while r2 >= 1.0 or r2 == 0.0:
-            x1 = 2.0 * g(state) - 1.0
-            x2 = 2.0 * g(state) - 1.0
-            r2 = x1 * x1 + x2 * x2
-        f = np.sqrt(-2.0 * np.log(r2) / r2)
-        out[2 * i] = f * x1
-        if 2 * i + 1 < n:
-            out[2 * i + 1] = f * x2
-    return out
-
-
-print(normals(10, cffi_state).var())
-# Warm up
-normalsj = nb.jit(normals, nopython=True)
-normalsj(1, state_addr)
-
-start = dt.datetime.now()
-normalsj(1000000, state_addr)
-ms = 1000 * (dt.datetime.now() - start).total_seconds()
-print('1,000,000 Polar-transform (numba/PCG64) randoms in '
-      '{ms:0.1f}ms'.format(ms=ms))
-
-start = dt.datetime.now()
-np.random.standard_normal(1000000)
-ms = 1000 * (dt.datetime.now() - start).total_seconds()
-print('1,000,000 Polar-transform (NumPy) randoms in {ms:0.1f}ms'.format(ms=ms))
diff --git a/numpy/random/src/aligned_malloc/aligned_malloc.h b/numpy/random/include/aligned_malloc.h
index ea24f6d23..ea24f6d23 100644
--- a/numpy/random/src/aligned_malloc/aligned_malloc.h
+++ b/numpy/random/include/aligned_malloc.h
diff --git a/numpy/random/src/legacy/legacy-distributions.h b/numpy/random/include/legacy-distributions.h
index 4bc15d58e..b8ba0841c 100644
--- a/numpy/random/src/legacy/legacy-distributions.h
+++ b/numpy/random/include/legacy-distributions.h
@@ -2,7 +2,7 @@
 #define _RANDOMDGEN__DISTRIBUTIONS_LEGACY_H_
 
 
-#include "../distributions/distributions.h"
+#include "numpy/random/distributions.h"
 
 typedef struct aug_bitgen {
   bitgen_t *bit_generator;
diff --git a/numpy/random/legacy_distributions.pxd b/numpy/random/legacy_distributions.pxd
deleted file mode 100644
index c681388db..000000000
--- a/numpy/random/legacy_distributions.pxd
+++ /dev/null
@@ -1,50 +0,0 @@
-#cython: language_level=3
-
-from libc.stdint cimport int64_t
-
-import numpy as np
-cimport numpy as np
-
-from .distributions cimport bitgen_t, binomial_t
-
-cdef extern from "legacy-distributions.h":
-
-    struct aug_bitgen:
-        bitgen_t *bit_generator
-        int has_gauss
-        double gauss
-
-    ctypedef aug_bitgen aug_bitgen_t
-
-    double legacy_gauss(aug_bitgen_t *aug_state) nogil
-    double legacy_pareto(aug_bitgen_t *aug_state, double a) nogil
-    double legacy_weibull(aug_bitgen_t *aug_state, double a) nogil
-    double legacy_standard_gamma(aug_bitgen_t *aug_state, double shape) nogil
-    double legacy_normal(aug_bitgen_t *aug_state, double loc, double scale) nogil
-    double legacy_standard_t(aug_bitgen_t *aug_state, double df) nogil
-
-    double legacy_standard_exponential(aug_bitgen_t *aug_state) nogil
-    double legacy_power(aug_bitgen_t *aug_state, double a) nogil
-    double legacy_gamma(aug_bitgen_t *aug_state, double shape, double scale) nogil
-    double legacy_power(aug_bitgen_t *aug_state, double a) nogil
-    double legacy_chisquare(aug_bitgen_t *aug_state, double df) nogil
-    double legacy_noncentral_chisquare(aug_bitgen_t *aug_state, double df,
-                                    double nonc) nogil
-    double legacy_noncentral_f(aug_bitgen_t *aug_state, double dfnum, double dfden,
-                            double nonc) nogil
-    double legacy_wald(aug_bitgen_t *aug_state, double mean, double scale) nogil
-    double legacy_lognormal(aug_bitgen_t *aug_state, double mean, double sigma) nogil
-    int64_t legacy_random_binomial(bitgen_t *bitgen_state, double p,
-                                   int64_t n, binomial_t *binomial) nogil
-    int64_t legacy_negative_binomial(aug_bitgen_t *aug_state, double n, double p) nogil
-    int64_t legacy_random_hypergeometric(bitgen_t *bitgen_state, int64_t good, int64_t bad, int64_t sample) nogil
-    int64_t legacy_random_logseries(bitgen_t *bitgen_state, double p) nogil
-    int64_t legacy_random_poisson(bitgen_t *bitgen_state, double lam) nogil
-    int64_t legacy_random_zipf(bitgen_t *bitgen_state, double a) nogil
-    int64_t legacy_random_geometric(bitgen_t *bitgen_state, double p) nogil
-    void legacy_random_multinomial(bitgen_t *bitgen_state, long n, long *mnix, double *pix, np.npy_intp d, binomial_t *binomial) nogil
-    double legacy_standard_cauchy(aug_bitgen_t *state) nogil
-    double legacy_beta(aug_bitgen_t *aug_state, double a, double b) nogil
-    double legacy_f(aug_bitgen_t *aug_state, double dfnum, double dfden) nogil
-    double legacy_exponential(aug_bitgen_t *aug_state, double scale) nogil
-    double legacy_power(aug_bitgen_t *state, double a) nogil
diff --git a/numpy/random/mtrand.pyx b/numpy/random/mtrand.pyx
index c469a4645..691a6e6e7 100644
--- a/numpy/random/mtrand.pyx
+++ b/numpy/random/mtrand.pyx
@@ -5,19 +5,100 @@ import warnings
 
 import numpy as np
 
-from .bounded_integers import _integers_types
-from .mt19937 import MT19937 as _MT19937
 from cpython.pycapsule cimport PyCapsule_IsValid, PyCapsule_GetPointer
 from cpython cimport (Py_INCREF, PyFloat_AsDouble)
-from libc cimport string
-
 cimport cython
 cimport numpy as np
 
-from .bounded_integers cimport *
-from .common cimport *
-from .distributions cimport *
-from .legacy_distributions cimport *
+from libc cimport string
+from libc.stdint cimport int64_t, uint64_t
+from ._bounded_integers cimport (_rand_bool, _rand_int32, _rand_int64,
+         _rand_int16, _rand_int8, _rand_uint64, _rand_uint32, _rand_uint16,
+         _rand_uint8,)
+from ._bounded_integers import _integers_types
+from ._mt19937 import MT19937 as _MT19937
+from ._bit_generator cimport bitgen_t
+from ._common cimport (POISSON_LAM_MAX, CONS_POSITIVE, CONS_NONE,
+            CONS_NON_NEGATIVE, CONS_BOUNDED_0_1, CONS_BOUNDED_GT_0_1, CONS_GTE_1,
+            CONS_GT_1, LEGACY_CONS_POISSON,
+            double_fill, cont, kahan_sum, cont_broadcast_3,
+            check_array_constraint, check_constraint, disc, discrete_broadcast_iii,
+        )
+
+cdef extern from "numpy/random/distributions.h":
+    struct s_binomial_t:
+        int has_binomial
+        double psave
+        int64_t nsave
+        double r
+        double q
+        double fm
+        int64_t m
+        double p1
+        double xm
+        double xl
+        double xr
+        double c
+        double laml
+        double lamr
+        double p2
+        double p3
+        double p4
+
+    ctypedef s_binomial_t binomial_t
+
+    void random_standard_uniform_fill(bitgen_t* bitgen_state, np.npy_intp cnt, double *out) nogil
+    int64_t random_positive_int(bitgen_t *bitgen_state) nogil
+    double random_uniform(bitgen_t *bitgen_state, double lower, double range) nogil
+    double random_vonmises(bitgen_t *bitgen_state, double mu, double kappa) nogil
+    double random_laplace(bitgen_t *bitgen_state, double loc, double scale) nogil
+    double random_gumbel(bitgen_t *bitgen_state, double loc, double scale) nogil
+    double random_logistic(bitgen_t *bitgen_state, double loc, double scale) nogil
+    double random_rayleigh(bitgen_t *bitgen_state, double mode) nogil
+    double random_triangular(bitgen_t *bitgen_state, double left, double mode,
+                                 double right) nogil
+    uint64_t random_interval(bitgen_t *bitgen_state, uint64_t max) nogil
+
+cdef extern from "include/legacy-distributions.h":
+    struct aug_bitgen:
+        bitgen_t *bit_generator
+        int has_gauss
+        double gauss
+
+    ctypedef aug_bitgen aug_bitgen_t
+
+    double legacy_gauss(aug_bitgen_t *aug_state) nogil
+    double legacy_pareto(aug_bitgen_t *aug_state, double a) nogil
+    double legacy_weibull(aug_bitgen_t *aug_state, double a) nogil
+    double legacy_standard_gamma(aug_bitgen_t *aug_state, double shape) nogil
+    double legacy_normal(aug_bitgen_t *aug_state, double loc, double scale) nogil
+    double legacy_standard_t(aug_bitgen_t *aug_state, double df) nogil
+
+    double legacy_standard_exponential(aug_bitgen_t *aug_state) nogil
+    double legacy_power(aug_bitgen_t *aug_state, double a) nogil
+    double legacy_gamma(aug_bitgen_t *aug_state, double shape, double scale) nogil
+    double legacy_power(aug_bitgen_t *aug_state, double a) nogil
+    double legacy_chisquare(aug_bitgen_t *aug_state, double df) nogil
+    double legacy_noncentral_chisquare(aug_bitgen_t *aug_state, double df,
+                                    double nonc) nogil
+    double legacy_noncentral_f(aug_bitgen_t *aug_state, double dfnum, double dfden,
+                            double nonc) nogil
+    double legacy_wald(aug_bitgen_t *aug_state, double mean, double scale) nogil
+    double legacy_lognormal(aug_bitgen_t *aug_state, double mean, double sigma) nogil
+    int64_t legacy_random_binomial(bitgen_t *bitgen_state, double p,
+                                   int64_t n, binomial_t *binomial) nogil
+    int64_t legacy_negative_binomial(aug_bitgen_t *aug_state, double n, double p) nogil
+    int64_t legacy_random_hypergeometric(bitgen_t *bitgen_state, int64_t good, int64_t bad, int64_t sample) nogil
+    int64_t legacy_random_logseries(bitgen_t *bitgen_state, double p) nogil
+    int64_t legacy_random_poisson(bitgen_t *bitgen_state, double lam) nogil
+    int64_t legacy_random_zipf(bitgen_t *bitgen_state, double a) nogil
+    int64_t legacy_random_geometric(bitgen_t *bitgen_state, double p) nogil
+    void legacy_random_multinomial(bitgen_t *bitgen_state, long n, long *mnix, double *pix, np.npy_intp d, binomial_t *binomial) nogil
+    double legacy_standard_cauchy(aug_bitgen_t *state) nogil
+    double legacy_beta(aug_bitgen_t *aug_state, double a, double b) nogil
+    double legacy_f(aug_bitgen_t *aug_state, double dfnum, double dfden) nogil
+    double legacy_exponential(aug_bitgen_t *aug_state, double scale) nogil
+    double legacy_power(aug_bitgen_t *state, double a) nogil
 
 np.import_array()
 
@@ -84,7 +165,7 @@ cdef class RandomState:
     --------
     Generator
     MT19937
-    :ref:`bit_generator`
+    numpy.random.BitGenerator
 
     """
     cdef public object _bit_generator
@@ -329,7 +410,7 @@ cdef class RandomState:
 
         """
         cdef double temp
-        return double_fill(&random_double_fill, &self._bitgen, size, self.lock, None)
+        return double_fill(&random_standard_uniform_fill, &self._bitgen, size, self.lock, None)
 
     def random(self, size=None):
         """
@@ -474,7 +555,7 @@ cdef class RandomState:
         tomaxint(size=None)
 
         Return a sample of uniformly distributed random integers in the interval
-        [0, ``np.iinfo(np.int).max``]. The np.int type translates to the C long
+        [0, ``np.iinfo(np.int_).max``]. The `np.int_` type translates to the C long
         integer type and its precision is platform dependent.
 
         Parameters
@@ -503,7 +584,7 @@ cdef class RandomState:
                 [ 739731006, 1947757578]],
                [[1871712945,  752307660],
                 [1601631370, 1479324245]]])
-        >>> rs.tomaxint((2,2,2)) < np.iinfo(np.int).max
+        >>> rs.tomaxint((2,2,2)) < np.iinfo(np.int_).max
         array([[[ True,  True],
                 [ True,  True]],
                [[ True,  True],
@@ -555,7 +636,7 @@ cdef class RandomState:
             Desired dtype of the result. All dtypes are determined by their
             name, i.e., 'int64', 'int', etc, so byteorder is not available
             and a specific precision may have different C types depending
-            on the platform. The default value is 'np.int'.
+            on the platform. The default value is `np.int_`.
 
             .. versionadded:: 1.11.0
 
@@ -567,7 +648,7 @@ cdef class RandomState:
 
         See Also
         --------
-        random.random_integers : similar to `randint`, only for the closed
+        random_integers : similar to `randint`, only for the closed
             interval [`low`, `high`], and 1 is the lowest value if `high` is
             omitted.
 
@@ -643,7 +724,7 @@ cdef class RandomState:
         elif key == 'bool':
             ret = _rand_bool(low, high, size, _masked, _endpoint, &self._bitgen, self.lock)
 
-        if size is None and dtype in (np.bool, np.int, np.long):
+        if size is None and dtype in (bool, int, np.compat.long):
             if np.array(ret).shape == ():
                 return dtype(ret)
         return ret
@@ -985,7 +1066,7 @@ cdef class RandomState:
 
         .. note::
             This is a convenience function for users porting code from Matlab,
-            and wraps `numpy.random.random_sample`. That function takes a
+            and wraps `random_sample`. That function takes a
             tuple to specify the size of the output, which is consistent with
             other NumPy functions like `numpy.zeros` and `numpy.ones`.
 
@@ -1029,7 +1110,7 @@ cdef class RandomState:
 
         .. note::
             This is a convenience function for users porting code from Matlab,
-            and wraps `numpy.random.standard_normal`. That function takes a
+            and wraps `standard_normal`. That function takes a
             tuple to specify the size of the output, which is consistent with
             other NumPy functions like `numpy.zeros` and `numpy.ones`.
 
@@ -1084,11 +1165,11 @@ cdef class RandomState:
         """
         random_integers(low, high=None, size=None)
 
-        Random integers of type np.int between `low` and `high`, inclusive.
+        Random integers of type `np.int_` between `low` and `high`, inclusive.
 
-        Return random integers of type np.int from the "discrete uniform"
+        Return random integers of type `np.int_` from the "discrete uniform"
         distribution in the closed interval [`low`, `high`].  If `high` is
-        None (the default), then results are from [1, `low`]. The np.int
+        None (the default), then results are from [1, `low`]. The `np.int_`
         type translates to the C long integer type and its precision
         is platform dependent.
 
@@ -1289,8 +1370,8 @@ cdef class RandomState:
         The function has its peak at the mean, and its "spread" increases with
         the standard deviation (the function reaches 0.607 times its maximum at
         :math:`x + \\sigma` and :math:`x - \\sigma` [2]_).  This implies that
-        `numpy.random.normal` is more likely to return samples lying close to
-        the mean, rather than those far away.
+        normal is more likely to return samples lying close to the mean, rather
+        than those far away.
 
         References
         ----------
diff --git a/numpy/random/setup.py b/numpy/random/setup.py
index ce7f0565f..776a018bc 100644
--- a/numpy/random/setup.py
+++ b/numpy/random/setup.py
@@ -34,6 +34,7 @@ def configuration(parent_package='', top_path=None):
 
     defs.append(('NPY_NO_DEPRECATED_API', 0))
     config.add_data_dir('tests')
+    config.add_data_dir('_examples')
 
     EXTRA_LINK_ARGS = []
     # Math lib
@@ -47,11 +48,6 @@ def configuration(parent_package='', top_path=None):
     elif not is_msvc:
         # Some bit generators require c99
         EXTRA_COMPILE_ARGS += ['-std=c99']
-        INTEL_LIKE = any(arch in platform.machine() 
-                         for arch in ('x86', 'i686', 'i386', 'amd64'))
-        if INTEL_LIKE:
-            # Assumes GCC or GCC-like compiler
-            EXTRA_COMPILE_ARGS += ['-msse2']
 
     # Use legacy integer variable sizes
     LEGACY_DEFS = [('NP_RANDOM_LEGACY', '1')]
@@ -61,32 +57,32 @@ def configuration(parent_package='', top_path=None):
 
     for gen in ['mt19937']:
         # gen.pyx, src/gen/gen.c, src/gen/gen-jump.c
-        config.add_extension(gen,
-                             sources=['{0}.c'.format(gen),
+        config.add_extension('_{0}'.format(gen),
+                             sources=['_{0}.c'.format(gen),
                                       'src/{0}/{0}.c'.format(gen),
                                       'src/{0}/{0}-jump.c'.format(gen)],
                              include_dirs=['.', 'src', join('src', gen)],
                              libraries=EXTRA_LIBRARIES,
                              extra_compile_args=EXTRA_COMPILE_ARGS,
                              extra_link_args=EXTRA_LINK_ARGS,
-                             depends=['%s.pyx' % gen],
+                             depends=['_%s.pyx' % gen],
                              define_macros=defs,
                              )
     for gen in ['philox', 'pcg64', 'sfc64']:
         # gen.pyx, src/gen/gen.c
         _defs = defs + PCG64_DEFS if gen == 'pcg64' else defs
-        config.add_extension(gen,
-                             sources=['{0}.c'.format(gen),
+        config.add_extension('_{0}'.format(gen),
+                             sources=['_{0}.c'.format(gen),
                                       'src/{0}/{0}.c'.format(gen)],
                              include_dirs=['.', 'src', join('src', gen)],
                              libraries=EXTRA_LIBRARIES,
                              extra_compile_args=EXTRA_COMPILE_ARGS,
                              extra_link_args=EXTRA_LINK_ARGS,
-                             depends=['%s.pyx' % gen, 'bit_generator.pyx',
+                             depends=['_%s.pyx' % gen, 'bit_generator.pyx',
                                       'bit_generator.pxd'],
                              define_macros=_defs,
                              )
-    for gen in ['common', 'bit_generator']:
+    for gen in ['_common', '_bit_generator']:
         # gen.pyx
         config.add_extension(gen,
                              sources=['{0}.c'.format(gen)],
@@ -97,12 +93,15 @@ def configuration(parent_package='', top_path=None):
                              depends=['%s.pyx' % gen, '%s.pxd' % gen,],
                              define_macros=defs,
                              )
+        config.add_data_files('{0}.pxd'.format(gen))
     other_srcs = [
         'src/distributions/logfactorial.c',
         'src/distributions/distributions.c',
+        'src/distributions/random_mvhg_count.c',
+        'src/distributions/random_mvhg_marginals.c',
         'src/distributions/random_hypergeometric.c',
     ]
-    for gen in ['generator', 'bounded_integers']:
+    for gen in ['_generator', '_bounded_integers']:
         # gen.pyx, src/distributions/distributions.c
         config.add_extension(gen,
                              sources=['{0}.c'.format(gen)] + other_srcs,
@@ -113,8 +112,8 @@ def configuration(parent_package='', top_path=None):
                              depends=['%s.pyx' % gen],
                              define_macros=defs,
                              )
+    config.add_data_files('_bounded_inteters.pxd')
     config.add_extension('mtrand',
-                         # mtrand does not depend on random_hypergeometric.c.
                          sources=['mtrand.c',
                                   'src/legacy/legacy-distributions.c',
                                   'src/distributions/logfactorial.c',
diff --git a/numpy/random/src/aligned_malloc/aligned_malloc.c b/numpy/random/src/aligned_malloc/aligned_malloc.c
deleted file mode 100644
index 6e8192cfb..000000000
--- a/numpy/random/src/aligned_malloc/aligned_malloc.c
+++ /dev/null
@@ -1,9 +0,0 @@
-#include "aligned_malloc.h"
-
-static NPY_INLINE void *PyArray_realloc_aligned(void *p, size_t n);
-
-static NPY_INLINE void *PyArray_malloc_aligned(size_t n);
-
-static NPY_INLINE void *PyArray_calloc_aligned(size_t n, size_t s);
-
-static NPY_INLINE void PyArray_free_aligned(void *p);
-\ No newline at end of file
diff --git a/numpy/random/src/distributions/distributions.c b/numpy/random/src/distributions/distributions.c
index 1244ffe65..df3323408 100644
--- a/numpy/random/src/distributions/distributions.c
+++ b/numpy/random/src/distributions/distributions.c
@@ -1,4 +1,4 @@
-#include "distributions.h"
+#include "numpy/random/distributions.h"
 #include "ziggurat_constants.h"
 #include "logfactorial.h"
 
@@ -6,90 +6,52 @@
 #include <intrin.h>
 #endif
 
-/* Random generators for external use */
-float random_float(bitgen_t *bitgen_state) { return next_float(bitgen_state); }
-
-double random_double(bitgen_t *bitgen_state) {
-  return next_double(bitgen_state);
+/* Inline generators for internal use */
+static NPY_INLINE uint32_t next_uint32(bitgen_t *bitgen_state) {
+  return bitgen_state->next_uint32(bitgen_state->state);
 }
-
-static NPY_INLINE double next_standard_exponential(bitgen_t *bitgen_state) {
-  return -log(1.0 - next_double(bitgen_state));
+static NPY_INLINE uint64_t next_uint64(bitgen_t *bitgen_state) {
+  return bitgen_state->next_uint64(bitgen_state->state);
 }
 
-double random_standard_exponential(bitgen_t *bitgen_state) {
-  return next_standard_exponential(bitgen_state);
+static NPY_INLINE float next_float(bitgen_t *bitgen_state) {
+  return (next_uint32(bitgen_state) >> 9) * (1.0f / 8388608.0f);
 }
 
-void random_standard_exponential_fill(bitgen_t *bitgen_state, npy_intp cnt,
-                                      double *out) {
-  npy_intp i;
-  for (i = 0; i < cnt; i++) {
-    out[i] = next_standard_exponential(bitgen_state);
-  }
+/* Random generators for external use */
+float random_standard_uniform_f(bitgen_t *bitgen_state) {
+    return next_float(bitgen_state); 
 }
 
-float random_standard_exponential_f(bitgen_t *bitgen_state) {
-  return -logf(1.0f - next_float(bitgen_state));
+double random_standard_uniform(bitgen_t *bitgen_state) {
+    return next_double(bitgen_state);
 }
 
-void random_double_fill(bitgen_t *bitgen_state, npy_intp cnt, double *out) {
+void random_standard_uniform_fill(bitgen_t *bitgen_state, npy_intp cnt, double *out) {
   npy_intp i;
   for (i = 0; i < cnt; i++) {
     out[i] = next_double(bitgen_state);
   }
 }
-#if 0
-double random_gauss(bitgen_t *bitgen_state) {
-  if (bitgen_state->has_gauss) {
-    const double temp = bitgen_state->gauss;
-    bitgen_state->has_gauss = false;
-    bitgen_state->gauss = 0.0;
-    return temp;
-  } else {
-    double f, x1, x2, r2;
 
-    do {
-      x1 = 2.0 * next_double(bitgen_state) - 1.0;
-      x2 = 2.0 * next_double(bitgen_state) - 1.0;
-      r2 = x1 * x1 + x2 * x2;
-    } while (r2 >= 1.0 || r2 == 0.0);
-
-    /* Polar method, a more efficient version of the Box-Muller approach. */
-    f = sqrt(-2.0 * log(r2) / r2);
-    /* Keep for next call */
-    bitgen_state->gauss = f * x1;
-    bitgen_state->has_gauss = true;
-    return f * x2;
+void random_standard_uniform_fill_f(bitgen_t *bitgen_state, npy_intp cnt, float *out) {
+  npy_intp i;
+  for (i = 0; i < cnt; i++) {
+    out[i] = next_float(bitgen_state);
   }
 }
 
-float random_gauss_f(bitgen_t *bitgen_state) {
-  if (bitgen_state->has_gauss_f) {
-    const float temp = bitgen_state->gauss_f;
-    bitgen_state->has_gauss_f = false;
-    bitgen_state->gauss_f = 0.0f;
-    return temp;
-  } else {
-    float f, x1, x2, r2;
-
-    do {
-      x1 = 2.0f * next_float(bitgen_state) - 1.0f;
-      x2 = 2.0f * next_float(bitgen_state) - 1.0f;
-      r2 = x1 * x1 + x2 * x2;
-    } while (r2 >= 1.0 || r2 == 0.0);
+double random_standard_exponential(bitgen_t *bitgen_state) {
+    return -log(1.0 - next_double(bitgen_state));
+}
 
-    /* Polar method, a more efficient version of the Box-Muller approach. */
-    f = sqrtf(-2.0f * logf(r2) / r2);
-    /* Keep for next call */
-    bitgen_state->gauss_f = f * x1;
-    bitgen_state->has_gauss_f = true;
-    return f * x2;
+void random_standard_exponential_fill(bitgen_t * bitgen_state, npy_intp cnt, double * out)
+{
+  npy_intp i;
+  for (i = 0; i < cnt; i++) {
+    out[i] = random_standard_exponential(bitgen_state);
   }
 }
-#endif
-
-static NPY_INLINE double standard_exponential_zig(bitgen_t *bitgen_state);
 
 static double standard_exponential_zig_unlikely(bitgen_t *bitgen_state,
                                                 uint8_t idx, double x) {
@@ -101,11 +63,11 @@ static double standard_exponential_zig_unlikely(bitgen_t *bitgen_state,
              exp(-x)) {
     return x;
   } else {
-    return standard_exponential_zig(bitgen_state);
+    return random_standard_exponential_zig(bitgen_state);
   }
 }
 
-static NPY_INLINE double standard_exponential_zig(bitgen_t *bitgen_state) {
+double random_standard_exponential_zig(bitgen_t *bitgen_state) {
   uint64_t ri;
   uint8_t idx;
   double x;
@@ -120,20 +82,26 @@ static NPY_INLINE double standard_exponential_zig(bitgen_t *bitgen_state) {
   return standard_exponential_zig_unlikely(bitgen_state, idx, x);
 }
 
-double random_standard_exponential_zig(bitgen_t *bitgen_state) {
-  return standard_exponential_zig(bitgen_state);
+void random_standard_exponential_zig_fill(bitgen_t * bitgen_state, npy_intp cnt, double * out)
+{
+  npy_intp i;
+  for (i = 0; i < cnt; i++) {
+    out[i] = random_standard_exponential_zig(bitgen_state);
+  }
+}
+
+float random_standard_exponential_f(bitgen_t *bitgen_state) {
+  return -logf(1.0f - next_float(bitgen_state));
 }
 
-void random_standard_exponential_zig_fill(bitgen_t *bitgen_state, npy_intp cnt,
-                                          double *out) {
+void random_standard_exponential_fill_f(bitgen_t * bitgen_state, npy_intp cnt, float * out)
+{
   npy_intp i;
   for (i = 0; i < cnt; i++) {
-    out[i] = standard_exponential_zig(bitgen_state);
+    out[i] = random_standard_exponential_f(bitgen_state);
   }
 }
 
-static NPY_INLINE float standard_exponential_zig_f(bitgen_t *bitgen_state);
-
 static float standard_exponential_zig_unlikely_f(bitgen_t *bitgen_state,
                                                  uint8_t idx, float x) {
   if (idx == 0) {
@@ -144,11 +112,11 @@ static float standard_exponential_zig_unlikely_f(bitgen_t *bitgen_state,
              expf(-x)) {
     return x;
   } else {
-    return standard_exponential_zig_f(bitgen_state);
+    return random_standard_exponential_zig_f(bitgen_state);
   }
 }
 
-static NPY_INLINE float standard_exponential_zig_f(bitgen_t *bitgen_state) {
+float random_standard_exponential_zig_f(bitgen_t *bitgen_state) {
   uint32_t ri;
   uint8_t idx;
   float x;
@@ -163,11 +131,15 @@ static NPY_INLINE float standard_exponential_zig_f(bitgen_t *bitgen_state) {
   return standard_exponential_zig_unlikely_f(bitgen_state, idx, x);
 }
 
-float random_standard_exponential_zig_f(bitgen_t *bitgen_state) {
-  return standard_exponential_zig_f(bitgen_state);
+void random_standard_exponential_zig_fill_f(bitgen_t * bitgen_state, npy_intp cnt, float * out)
+{
+  npy_intp i;
+  for (i = 0; i < cnt; i++) {
+    out[i] = random_standard_exponential_zig_f(bitgen_state);
+  }
 }
 
-static NPY_INLINE double next_gauss_zig(bitgen_t *bitgen_state) {
+double random_standard_normal(bitgen_t *bitgen_state) {
   uint64_t r;
   int sign;
   uint64_t rabs;
@@ -202,18 +174,14 @@ static NPY_INLINE double next_gauss_zig(bitgen_t *bitgen_state) {
   }
 }
 
-double random_gauss_zig(bitgen_t *bitgen_state) {
-  return next_gauss_zig(bitgen_state);
-}
-
-void random_gauss_zig_fill(bitgen_t *bitgen_state, npy_intp cnt, double *out) {
+void random_standard_normal_fill(bitgen_t *bitgen_state, npy_intp cnt, double *out) {
   npy_intp i;
   for (i = 0; i < cnt; i++) {
-    out[i] = next_gauss_zig(bitgen_state);
+    out[i] = random_standard_normal(bitgen_state);
   }
 }
 
-float random_gauss_zig_f(bitgen_t *bitgen_state) {
+float random_standard_normal_f(bitgen_t *bitgen_state) {
   uint32_t r;
   int sign;
   uint32_t rabs;
@@ -247,101 +215,14 @@ float random_gauss_zig_f(bitgen_t *bitgen_state) {
   }
 }
 
-/*
-static NPY_INLINE double standard_gamma(bitgen_t *bitgen_state, double shape) {
-  double b, c;
-  double U, V, X, Y;
-
-  if (shape == 1.0) {
-    return random_standard_exponential(bitgen_state);
-  } else if (shape < 1.0) {
-    for (;;) {
-      U = next_double(bitgen_state);
-      V = random_standard_exponential(bitgen_state);
-      if (U <= 1.0 - shape) {
-        X = pow(U, 1. / shape);
-        if (X <= V) {
-          return X;
-        }
-      } else {
-        Y = -log((1 - U) / shape);
-        X = pow(1.0 - shape + shape * Y, 1. / shape);
-        if (X <= (V + Y)) {
-          return X;
-        }
-      }
-    }
-  } else {
-    b = shape - 1. / 3.;
-    c = 1. / sqrt(9 * b);
-    for (;;) {
-      do {
-        X = random_gauss(bitgen_state);
-        V = 1.0 + c * X;
-      } while (V <= 0.0);
-
-      V = V * V * V;
-      U = next_double(bitgen_state);
-      if (U < 1.0 - 0.0331 * (X * X) * (X * X))
-        return (b * V);
-      if (log(U) < 0.5 * X * X + b * (1. - V + log(V)))
-        return (b * V);
-    }
-  }
-}
-
-static NPY_INLINE float standard_gamma_float(bitgen_t *bitgen_state, float
-shape) { float b, c; float U, V, X, Y;
-
-  if (shape == 1.0f) {
-    return random_standard_exponential_f(bitgen_state);
-  } else if (shape < 1.0f) {
-    for (;;) {
-      U = next_float(bitgen_state);
-      V = random_standard_exponential_f(bitgen_state);
-      if (U <= 1.0f - shape) {
-        X = powf(U, 1.0f / shape);
-        if (X <= V) {
-          return X;
-        }
-      } else {
-        Y = -logf((1.0f - U) / shape);
-        X = powf(1.0f - shape + shape * Y, 1.0f / shape);
-        if (X <= (V + Y)) {
-          return X;
-        }
-      }
-    }
-  } else {
-    b = shape - 1.0f / 3.0f;
-    c = 1.0f / sqrtf(9.0f * b);
-    for (;;) {
-      do {
-        X = random_gauss_f(bitgen_state);
-        V = 1.0f + c * X;
-      } while (V <= 0.0f);
-
-      V = V * V * V;
-      U = next_float(bitgen_state);
-      if (U < 1.0f - 0.0331f * (X * X) * (X * X))
-        return (b * V);
-      if (logf(U) < 0.5f * X * X + b * (1.0f - V + logf(V)))
-        return (b * V);
-    }
+void random_standard_normal_fill_f(bitgen_t *bitgen_state, npy_intp cnt, float *out) {
+  npy_intp i;
+  for (i = 0; i < cnt; i++) {
+    out[i] = random_standard_normal_f(bitgen_state);
   }
 }
 
-
-double random_standard_gamma(bitgen_t *bitgen_state, double shape) {
-  return standard_gamma(bitgen_state, shape);
-}
-
-float random_standard_gamma_f(bitgen_t *bitgen_state, float shape) {
-  return standard_gamma_float(bitgen_state, shape);
-}
-*/
-
-static NPY_INLINE double standard_gamma_zig(bitgen_t *bitgen_state,
+double random_standard_gamma(bitgen_t *bitgen_state,
                                             double shape) {
   double b, c;
   double U, V, X, Y;
@@ -372,7 +253,7 @@ static NPY_INLINE double standard_gamma_zig(bitgen_t *bitgen_state,
     c = 1. / sqrt(9 * b);
     for (;;) {
       do {
-        X = random_gauss_zig(bitgen_state);
+        X = random_standard_normal(bitgen_state);
         V = 1.0 + c * X;
       } while (V <= 0.0);
 
@@ -387,7 +268,7 @@ static NPY_INLINE double standard_gamma_zig(bitgen_t *bitgen_state,
   }
 }
 
-static NPY_INLINE float standard_gamma_zig_f(bitgen_t *bitgen_state,
+float random_standard_gamma_f(bitgen_t *bitgen_state,
                                              float shape) {
   float b, c;
   float U, V, X, Y;
@@ -418,7 +299,7 @@ static NPY_INLINE float standard_gamma_zig_f(bitgen_t *bitgen_state,
     c = 1.0f / sqrtf(9.0f * b);
     for (;;) {
       do {
-        X = random_gauss_zig_f(bitgen_state);
+        X = random_standard_normal_f(bitgen_state);
         V = 1.0f + c * X;
       } while (V <= 0.0f);
 
@@ -433,14 +314,6 @@ static NPY_INLINE float standard_gamma_zig_f(bitgen_t *bitgen_state,
   }
 }
 
-double random_standard_gamma_zig(bitgen_t *bitgen_state, double shape) {
-  return standard_gamma_zig(bitgen_state, shape);
-}
-
-float random_standard_gamma_zig_f(bitgen_t *bitgen_state, float shape) {
-  return standard_gamma_zig_f(bitgen_state, shape);
-}
-
 int64_t random_positive_int64(bitgen_t *bitgen_state) {
   return next_uint64(bitgen_state) >> 1;
 }
@@ -470,10 +343,10 @@ uint64_t random_uint(bitgen_t *bitgen_state) {
  * algorithm comes from SPECFUN by Shanjie Zhang and Jianming Jin and their
  * book "Computation of Special Functions", 1996, John Wiley & Sons, Inc.
  *
- * If loggam(k+1) is being used to compute log(k!) for an integer k, consider
+ * If random_loggam(k+1) is being used to compute log(k!) for an integer k, consider
  * using logfactorial(k) instead.
  */
-double loggam(double x) {
+double random_loggam(double x) {
   double x0, x2, xp, gl, gl0;
   RAND_INT_TYPE k, n;
 
@@ -513,12 +386,12 @@ double random_normal(bitgen_t *bitgen_state, double loc, double scale) {
 }
 */
 
-double random_normal_zig(bitgen_t *bitgen_state, double loc, double scale) {
-  return loc + scale * random_gauss_zig(bitgen_state);
+double random_normal(bitgen_t *bitgen_state, double loc, double scale) {
+  return loc + scale * random_standard_normal(bitgen_state);
 }
 
 double random_exponential(bitgen_t *bitgen_state, double scale) {
-  return scale * standard_exponential_zig(bitgen_state);
+  return scale * random_standard_exponential_zig(bitgen_state);
 }
 
 double random_uniform(bitgen_t *bitgen_state, double lower, double range) {
@@ -526,11 +399,11 @@ double random_uniform(bitgen_t *bitgen_state, double lower, double range) {
 }
 
 double random_gamma(bitgen_t *bitgen_state, double shape, double scale) {
-  return scale * random_standard_gamma_zig(bitgen_state, shape);
+  return scale * random_standard_gamma(bitgen_state, shape);
 }
 
-float random_gamma_float(bitgen_t *bitgen_state, float shape, float scale) {
-  return scale * random_standard_gamma_zig_f(bitgen_state, shape);
+float random_gamma_f(bitgen_t *bitgen_state, float shape, float scale) {
+  return scale * random_standard_gamma_f(bitgen_state, shape);
 }
 
 double random_beta(bitgen_t *bitgen_state, double a, double b) {
@@ -562,14 +435,14 @@ double random_beta(bitgen_t *bitgen_state, double a, double b) {
       }
     }
   } else {
-    Ga = random_standard_gamma_zig(bitgen_state, a);
-    Gb = random_standard_gamma_zig(bitgen_state, b);
+    Ga = random_standard_gamma(bitgen_state, a);
+    Gb = random_standard_gamma(bitgen_state, b);
     return Ga / (Ga + Gb);
   }
 }
 
 double random_chisquare(bitgen_t *bitgen_state, double df) {
-  return 2.0 * random_standard_gamma_zig(bitgen_state, df / 2.0);
+  return 2.0 * random_standard_gamma(bitgen_state, df / 2.0);
 }
 
 double random_f(bitgen_t *bitgen_state, double dfnum, double dfden) {
@@ -578,22 +451,22 @@ double random_f(bitgen_t *bitgen_state, double dfnum, double dfden) {
 }
 
 double random_standard_cauchy(bitgen_t *bitgen_state) {
-  return random_gauss_zig(bitgen_state) / random_gauss_zig(bitgen_state);
+  return random_standard_normal(bitgen_state) / random_standard_normal(bitgen_state);
 }
 
 double random_pareto(bitgen_t *bitgen_state, double a) {
-  return exp(standard_exponential_zig(bitgen_state) / a) - 1;
+  return exp(random_standard_exponential_zig(bitgen_state) / a) - 1;
 }
 
 double random_weibull(bitgen_t *bitgen_state, double a) {
   if (a == 0.0) {
     return 0.0;
   }
-  return pow(standard_exponential_zig(bitgen_state), 1. / a);
+  return pow(random_standard_exponential_zig(bitgen_state), 1. / a);
 }
 
 double random_power(bitgen_t *bitgen_state, double a) {
-  return pow(1 - exp(-standard_exponential_zig(bitgen_state)), 1. / a);
+  return pow(1 - exp(-random_standard_exponential_zig(bitgen_state)), 1. / a);
 }
 
 double random_laplace(bitgen_t *bitgen_state, double loc, double scale) {
@@ -634,7 +507,7 @@ double random_logistic(bitgen_t *bitgen_state, double loc, double scale) {
 }
 
 double random_lognormal(bitgen_t *bitgen_state, double mean, double sigma) {
-  return exp(random_normal_zig(bitgen_state, mean, sigma));
+  return exp(random_normal(bitgen_state, mean, sigma));
 }
 
 double random_rayleigh(bitgen_t *bitgen_state, double mode) {
@@ -644,8 +517,8 @@ double random_rayleigh(bitgen_t *bitgen_state, double mode) {
 double random_standard_t(bitgen_t *bitgen_state, double df) {
   double num, denom;
 
-  num = random_gauss_zig(bitgen_state);
-  denom = random_standard_gamma_zig(bitgen_state, df / 2);
+  num = random_standard_normal(bitgen_state);
+  denom = random_standard_gamma(bitgen_state, df / 2);
   return sqrt(df / 2) * num / sqrt(denom);
 }
 
@@ -699,7 +572,7 @@ static RAND_INT_TYPE random_poisson_ptrs(bitgen_t *bitgen_state, double lam) {
     /* log(V) == log(0.0) ok here */
     /* if U==0.0 so that us==0.0, log is ok since always returns */
     if ((log(V) + log(invalpha) - log(a / (us * us) + b)) <=
-        (-lam + k * loglam - loggam(k + 1))) {
+        (-lam + k * loglam - random_loggam(k + 1))) {
       return k;
     }
   }
@@ -934,7 +807,7 @@ double random_noncentral_chisquare(bitgen_t *bitgen_state, double df,
   }
   if (1 < df) {
     const double Chi2 = random_chisquare(bitgen_state, df - 1);
-    const double n = random_gauss_zig(bitgen_state) + sqrt(nonc);
+    const double n = random_standard_normal(bitgen_state) + sqrt(nonc);
     return Chi2 + n * n;
   } else {
     const RAND_INT_TYPE i = random_poisson(bitgen_state, nonc / 2.0);
@@ -953,7 +826,7 @@ double random_wald(bitgen_t *bitgen_state, double mean, double scale) {
   double mu_2l;
 
   mu_2l = mean / (2 * scale);
-  Y = random_gauss_zig(bitgen_state);
+  Y = random_standard_normal(bitgen_state);
   Y = mean * Y * Y;
   X = mean + mu_2l * (Y - sqrt(4 * scale * Y + Y * Y));
   U = next_double(bitgen_state);
@@ -1092,8 +965,8 @@ RAND_INT_TYPE random_zipf(bitgen_t *bitgen_state, double a) {
   while (1) {
     double T, U, V, X;
 
-    U = 1.0 - random_double(bitgen_state);
-    V = random_double(bitgen_state);
+    U = 1.0 - next_double(bitgen_state);
+    V = next_double(bitgen_state);
     X = floor(pow(U, -1.0 / am1));
     /*
      * The real result may be above what can be represented in a signed
@@ -1297,10 +1170,7 @@ static NPY_INLINE uint64_t bounded_lemire_uint64(bitgen_t *bitgen_state,
 
   if (leftover < rng_excl) {
     /* `rng_excl` is a simple upper bound for `threshold`. */
-
-    const uint64_t threshold = -rng_excl % rng_excl;
-    /* Same as: threshold=((uint64_t)(0x10000000000000000ULLL - rng_excl)) %
-     * rng_excl; */
+    const uint64_t threshold = (UINT64_MAX - rng) % rng_excl;
 
     while (leftover < threshold) {
       m = ((__uint128_t)next_uint64(bitgen_state)) * rng_excl;
@@ -1323,10 +1193,7 @@ static NPY_INLINE uint64_t bounded_lemire_uint64(bitgen_t *bitgen_state,
 
   if (leftover < rng_excl) {
     /* `rng_excl` is a simple upper bound for `threshold`. */
-
-    const uint64_t threshold = -rng_excl % rng_excl;
-    /* Same as:threshold=((uint64_t)(0x10000000000000000ULLL - rng_excl)) %
-     * rng_excl; */
+    const uint64_t threshold = (UINT64_MAX - rng) % rng_excl;
 
     while (leftover < threshold) {
       x = next_uint64(bitgen_state);
@@ -1387,8 +1254,7 @@ static NPY_INLINE uint32_t buffered_bounded_lemire_uint32(
 
   if (leftover < rng_excl) {
     /* `rng_excl` is a simple upper bound for `threshold`. */
-    const uint32_t threshold = -rng_excl % rng_excl;
-    /* Same as: threshold=((uint64_t)(0x100000000ULL - rng_excl)) % rng_excl; */
+    const uint32_t threshold = (UINT32_MAX - rng) % rng_excl;
 
     while (leftover < threshold) {
       m = ((uint64_t)next_uint32(bitgen_state)) * rng_excl;
@@ -1422,8 +1288,7 @@ static NPY_INLINE uint16_t buffered_bounded_lemire_uint16(
 
   if (leftover < rng_excl) {
     /* `rng_excl` is a simple upper bound for `threshold`. */
-    const uint16_t threshold = -rng_excl % rng_excl;
-    /* Same as: threshold=((uint32_t)(0x10000ULL - rng_excl)) % rng_excl; */
+    const uint16_t threshold = (UINT16_MAX - rng) % rng_excl;
 
     while (leftover < threshold) {
       m = ((uint32_t)buffered_uint16(bitgen_state, bcnt, buf)) * rng_excl;
@@ -1458,8 +1323,7 @@ static NPY_INLINE uint8_t buffered_bounded_lemire_uint8(bitgen_t *bitgen_state,
 
   if (leftover < rng_excl) {
     /* `rng_excl` is a simple upper bound for `threshold`. */
-    const uint8_t threshold = -rng_excl % rng_excl;
-    /* Same as: threshold=((uint16_t)(0x100ULL - rng_excl)) % rng_excl; */
+    const uint8_t threshold = (UINT8_MAX - rng) % rng_excl;
 
     while (leftover < threshold) {
       m = ((uint16_t)buffered_uint8(bitgen_state, bcnt, buf)) * rng_excl;
diff --git a/numpy/random/src/distributions/random_hypergeometric.c b/numpy/random/src/distributions/random_hypergeometric.c
index 94dc6380f..0da49bd62 100644
--- a/numpy/random/src/distributions/random_hypergeometric.c
+++ b/numpy/random/src/distributions/random_hypergeometric.c
@@ -1,4 +1,4 @@
-#include "distributions.h"
+#include "numpy/random/distributions.h"
 #include "logfactorial.h"
 #include <stdint.h>
 
@@ -188,8 +188,8 @@ static int64_t hypergeometric_hrua(bitgen_t *bitgen_state,
     while (1) {
         double U, V, X, T;
         double gp;
-        U = random_double(bitgen_state);
-        V = random_double(bitgen_state);  // "U star" in Stadlober (1989)
+        U = next_double(bitgen_state);
+        V = next_double(bitgen_state);  // "U star" in Stadlober (1989)
         X = a + h*(V - 0.5) / U;
 
         // fast rejection:
diff --git a/numpy/random/src/distributions/random_mvhg_count.c b/numpy/random/src/distributions/random_mvhg_count.c
new file mode 100644
index 000000000..0c46ea417
--- /dev/null
+++ b/numpy/random/src/distributions/random_mvhg_count.c
@@ -0,0 +1,131 @@
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#include "numpy/random/distributions.h"
+
+/*
+ *  random_mvhg_count
+ *
+ *  Draw variates from the multivariate hypergeometric distribution--
+ *  the "count" algorithm.
+ *
+ *  Parameters
+ *  ----------
+ *  bitgen_t *bitgen_state
+ *      Pointer to a `bitgen_t` instance.
+ *  int64_t total
+ *      The sum of the values in the array `colors`.  (This is redundant
+ *      information, but we know the caller has already computed it, so
+ *      we might as well use it.)
+ *  size_t num_colors
+ *      The length of the `colors` array.
+ *  int64_t *colors
+ *      The array of colors (i.e. the number of each type in the collection
+ *      from which the random variate is drawn).
+ *  int64_t nsample
+ *      The number of objects drawn without replacement for each variate.
+ *      `nsample` must not exceed sum(colors).  This condition is not checked;
+ *      it is assumed that the caller has already validated the value.
+ *  size_t num_variates
+ *      The number of variates to be produced and put in the array
+ *      pointed to by `variates`.  One variate is a vector of length
+ *      `num_colors`, so the array pointed to by `variates` must have length
+ *      `num_variates * num_colors`.
+ *  int64_t *variates
+ *      The array that will hold the result.  It must have length
+ *      `num_variates * num_colors`.
+ *      The array is not initialized in the function; it is expected that the
+ *      array has been initialized with zeros when the function is called.
+ *
+ *  Notes
+ *  -----
+ *  The "count" algorithm for drawing one variate is roughly equivalent to the
+ *  following numpy code:
+ *
+ *      choices = np.repeat(np.arange(len(colors)), colors)
+ *      selection = np.random.choice(choices, nsample, replace=False)
+ *      variate = np.bincount(selection, minlength=len(colors))
+ *
+ *  This function uses a temporary array with length sum(colors).
+ *
+ *  Assumptions on the arguments (not checked in the function):
+ *    *  colors[k] >= 0  for k in range(num_colors)
+ *    *  total = sum(colors)
+ *    *  0 <= nsample <= total
+ *    *  the product total * sizeof(size_t) does not exceed SIZE_MAX
+ *    *  the product num_variates * num_colors does not overflow
+ */
+
+int random_mvhg_count(bitgen_t *bitgen_state,
+                      int64_t total,
+                      size_t num_colors, int64_t *colors,
+                      int64_t nsample,
+                      size_t num_variates, int64_t *variates)
+{
+    size_t *choices;
+    bool more_than_half;
+
+    if ((total == 0) || (nsample == 0) || (num_variates == 0)) {
+        // Nothing to do.
+        return 0;
+    }
+
+    choices = malloc(total * (sizeof *choices));
+    if (choices == NULL) {
+        return -1;
+    }
+
+    /*
+     *  If colors contains, for example, [3 2 5], then choices
+     *  will contain [0 0 0 1 1 2 2 2 2 2].
+     */
+    for (size_t i = 0, k = 0; i < num_colors; ++i) {
+        for (int64_t j = 0; j < colors[i]; ++j) {
+            choices[k] = i;
+            ++k;
+        }
+    }
+
+    more_than_half = nsample > (total / 2);
+    if (more_than_half) {
+        nsample = total - nsample;
+    }
+
+    for (size_t i = 0; i < num_variates * num_colors; i += num_colors) {
+        /*
+         *  Fisher-Yates shuffle, but only loop through the first
+         *  `nsample` entries of `choices`.  After the loop,
+         *  choices[:nsample] contains a random sample from the
+         *  the full array.
+         */
+        for (size_t j = 0; j < (size_t) nsample; ++j) {
+            size_t tmp, k;
+            // Note: nsample is not greater than total, so there is no danger
+            // of integer underflow in `(size_t) total - j - 1`.
+            k = j + (size_t) random_interval(bitgen_state,
+                                             (size_t) total - j - 1);
+            tmp = choices[k];
+            choices[k] = choices[j];
+            choices[j] = tmp;
+        }
+        /*
+         *  Count the number of occurrences of each value in choices[:nsample].
+         *  The result, stored in sample[i:i+num_colors], is the sample from
+         *  the multivariate hypergeometric distribution.
+         */
+        for (size_t j = 0; j < (size_t) nsample; ++j) {
+            variates[i + choices[j]] += 1;
+        }
+
+        if (more_than_half) {
+            for (size_t k = 0; k < num_colors; ++k) {
+                variates[i + k] = colors[k] - variates[i + k];
+            }
+        }
+    }
+
+    free(choices);
+
+    return 0;
+}
diff --git a/numpy/random/src/distributions/random_mvhg_marginals.c b/numpy/random/src/distributions/random_mvhg_marginals.c
new file mode 100644
index 000000000..7e4c24988
--- /dev/null
+++ b/numpy/random/src/distributions/random_mvhg_marginals.c
@@ -0,0 +1,138 @@
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <math.h>
+
+#include "numpy/random/distributions.h"
+#include "logfactorial.h"
+
+
+/*
+ *  random_mvhg_marginals
+ *
+ *  Draw samples from the multivariate hypergeometric distribution--
+ *  the "marginals" algorithm.
+ *
+ *  This version generates the sample by iteratively calling
+ *  hypergeometric() (the univariate hypergeometric distribution).
+ *
+ *  Parameters
+ *  ----------
+ *  bitgen_t *bitgen_state
+ *      Pointer to a `bitgen_t` instance.
+ *  int64_t total
+ *      The sum of the values in the array `colors`.  (This is redundant
+ *      information, but we know the caller has already computed it, so
+ *      we might as well use it.)
+ *  size_t num_colors
+ *      The length of the `colors` array.  The functions assumes
+ *      num_colors > 0.
+ *  int64_t *colors
+ *      The array of colors (i.e. the number of each type in the collection
+ *      from which the random variate is drawn).
+ *  int64_t nsample
+ *      The number of objects drawn without replacement for each variate.
+ *      `nsample` must not exceed sum(colors).  This condition is not checked;
+ *      it is assumed that the caller has already validated the value.
+ *  size_t num_variates
+ *      The number of variates to be produced and put in the array
+ *      pointed to by `variates`.  One variate is a vector of length
+ *      `num_colors`, so the array pointed to by `variates` must have length
+ *      `num_variates * num_colors`.
+ *  int64_t *variates
+ *      The array that will hold the result.  It must have length
+ *      `num_variates * num_colors`.
+ *      The array is not initialized in the function; it is expected that the
+ *      array has been initialized with zeros when the function is called.
+ *
+ *  Notes
+ *  -----
+ *  Here's an example that demonstrates the idea of this algorithm.
+ *
+ *  Suppose the urn contains red, green, blue and yellow marbles.
+ *  Let nred be the number of red marbles, and define the quantities for
+ *  the other colors similarly.  The total number of marbles is
+ *
+ *      total = nred + ngreen + nblue + nyellow.
+ *
+ *  To generate a sample using rk_hypergeometric:
+ *
+ *     red_sample = hypergeometric(ngood=nred, nbad=total - nred,
+ *                                 nsample=nsample)
+ *
+ *  This gives us the number of red marbles in the sample.  The number of
+ *  marbles in the sample that are *not* red is nsample - red_sample.
+ *  To figure out the distribution of those marbles, we again use
+ *  rk_hypergeometric:
+ *
+ *      green_sample = hypergeometric(ngood=ngreen,
+ *                                    nbad=total - nred - ngreen,
+ *                                    nsample=nsample - red_sample)
+ *
+ *  Similarly,
+ *
+ *      blue_sample = hypergeometric(
+ *                        ngood=nblue,
+ *                        nbad=total - nred - ngreen - nblue,
+ *                        nsample=nsample - red_sample - green_sample)
+ *
+ *  Finally,
+ *
+ *      yellow_sample = total - (red_sample + green_sample + blue_sample).
+ *
+ *  The above sequence of steps is implemented as a loop for an arbitrary
+ *  number of colors in the innermost loop in the code below.  `remaining`
+ *  is the value passed to `nbad`; it is `total - colors[0]` in the first
+ *  call to random_hypergeometric(), and then decreases by `colors[j]` in
+ *  each iteration.  `num_to_sample` is the `nsample` argument.  It
+ *  starts at this function's `nsample` input, and is decreased by the
+ *  result of the call to random_hypergeometric() in each iteration.
+ *
+ *  Assumptions on the arguments (not checked in the function):
+ *    *  colors[k] >= 0  for k in range(num_colors)
+ *    *  total = sum(colors)
+ *    *  0 <= nsample <= total
+ *    *  the product num_variates * num_colors does not overflow
+ */
+
+void random_mvhg_marginals(bitgen_t *bitgen_state,
+                           int64_t total,
+                           size_t num_colors, int64_t *colors,
+                           int64_t nsample,
+                           size_t num_variates, int64_t *variates)
+{
+    bool more_than_half;
+
+    if ((total == 0) || (nsample == 0) || (num_variates == 0)) {
+        // Nothing to do.
+        return;
+    }
+
+    more_than_half = nsample > (total / 2);
+    if (more_than_half) {
+        nsample = total - nsample;
+    }
+
+    for (size_t i = 0; i < num_variates * num_colors; i += num_colors) {
+        int64_t num_to_sample = nsample;
+        int64_t remaining = total;
+        for (size_t j = 0; (num_to_sample > 0) && (j + 1 < num_colors); ++j) {
+            int64_t r;
+            remaining -= colors[j];
+            r = random_hypergeometric(bitgen_state,
+                                      colors[j], remaining, num_to_sample);
+            variates[i + j] = r;
+            num_to_sample -= r;
+        }
+
+        if (num_to_sample > 0) {
+            variates[i + num_colors - 1] = num_to_sample;
+        }
+
+        if (more_than_half) {
+            for (size_t k = 0; k < num_colors; ++k) {
+                variates[i + k] = colors[k] - variates[i + k];
+            }
+        }
+    }
+}
diff --git a/numpy/random/src/legacy/legacy-distributions.c b/numpy/random/src/legacy/legacy-distributions.c
index 684b3d762..fd067fe8d 100644
--- a/numpy/random/src/legacy/legacy-distributions.c
+++ b/numpy/random/src/legacy/legacy-distributions.c
@@ -1,4 +1,4 @@
-#include "legacy-distributions.h"
+#include "include/legacy-distributions.h"
 
 
 static NPY_INLINE double legacy_double(aug_bitgen_t *aug_state) {
@@ -294,8 +294,8 @@ static RAND_INT_TYPE random_hypergeometric_hrua(bitgen_t *bitgen_state,
   d7 = sqrt((double)(popsize - m) * sample * d4 * d5 / (popsize - 1) + 0.5);
   d8 = D1 * d7 + D2;
   d9 = (RAND_INT_TYPE)floor((double)(m + 1) * (mingoodbad + 1) / (popsize + 2));
-  d10 = (loggam(d9 + 1) + loggam(mingoodbad - d9 + 1) + loggam(m - d9 + 1) +
-         loggam(maxgoodbad - m + d9 + 1));
+  d10 = (random_loggam(d9 + 1) + random_loggam(mingoodbad - d9 + 1) +
+         random_loggam(m - d9 + 1) + random_loggam(maxgoodbad - m + d9 + 1));
   d11 = MIN(MIN(m, mingoodbad) + 1.0, floor(d6 + 16 * d7));
   /* 16 for 16-decimal-digit precision in D1 and D2 */
 
@@ -309,8 +309,8 @@ static RAND_INT_TYPE random_hypergeometric_hrua(bitgen_t *bitgen_state,
       continue;
 
     Z = (RAND_INT_TYPE)floor(W);
-    T = d10 - (loggam(Z + 1) + loggam(mingoodbad - Z + 1) + loggam(m - Z + 1) +
-               loggam(maxgoodbad - m + Z + 1));
+    T = d10 - (random_loggam(Z + 1) + random_loggam(mingoodbad - Z + 1) +
+               random_loggam(m - Z + 1) + random_loggam(maxgoodbad - m + Z + 1));
 
     /* fast acceptance: */
     if ((X * (4.0 - X) - 3.0) <= T)
diff --git a/numpy/random/tests/test_direct.py b/numpy/random/tests/test_direct.py
index 0f57c4bd4..34d7bd278 100644
--- a/numpy/random/tests/test_direct.py
+++ b/numpy/random/tests/test_direct.py
@@ -10,7 +10,7 @@ from numpy.random import (
     Generator, MT19937, PCG64, Philox, RandomState, SeedSequence, SFC64,
     default_rng
 )
-from numpy.random.common import interface
+from numpy.random._common import interface
 
 try:
     import cffi  # noqa: F401
@@ -120,7 +120,7 @@ def gauss_from_uint(x, n, bits):
     return gauss[:n]
 
 def test_seedsequence():
-    from numpy.random.bit_generator import (ISeedSequence,
+    from numpy.random._bit_generator import (ISeedSequence,
                                             ISpawnableSeedSequence,
                                             SeedlessSeedSequence)
 
diff --git a/numpy/random/tests/test_extending.py b/numpy/random/tests/test_extending.py
new file mode 100644
index 000000000..efd922ff5
--- /dev/null
+++ b/numpy/random/tests/test_extending.py
@@ -0,0 +1,38 @@
+import os, sys
+import pytest
+import warnings
+
+try:
+    with warnings.catch_warnings(record=True) as w:
+        # numba issue gh-4733
+        warnings.filterwarnings('always', '', DeprecationWarning)
+        import numba
+    import cffi
+except ImportError:
+    numba = None
+
+try:
+    import cython
+except ImportError:
+    cython = None
+
+@pytest.mark.skipif(cython is None, reason="requires cython")
+def test_cython():
+    curdir = os.getcwd()
+    argv = sys.argv
+    examples = (os.path.dirname(__file__), '..', '_examples')
+    try:
+        os.chdir(os.path.join(*examples))
+        sys.argv = argv[:1] + ['build']
+        with warnings.catch_warnings(record=True) as w:
+            # setuptools issue gh-1885
+            warnings.filterwarnings('always', '', DeprecationWarning)
+            from numpy.random._examples.cython import setup
+    finally:
+        sys.argv = argv
+        os.chdir(curdir)
+
+@pytest.mark.skipif(numba is None, reason="requires numba")
+def test_numba():
+        from numpy.random._examples.numba import extending
+
diff --git a/numpy/random/tests/test_generator_mt19937.py b/numpy/random/tests/test_generator_mt19937.py
index 391c33c1a..d835f16bd 100644
--- a/numpy/random/tests/test_generator_mt19937.py
+++ b/numpy/random/tests/test_generator_mt19937.py
@@ -3,8 +3,10 @@ import sys
 import pytest
 
 import numpy as np
+from numpy.dual import cholesky, eigh, svd
+from numpy.linalg import LinAlgError
 from numpy.testing import (
-    assert_, assert_raises, assert_equal,
+    assert_, assert_raises, assert_equal, assert_allclose,
     assert_warns, assert_no_warnings, assert_array_equal,
     assert_array_almost_equal, suppress_warnings)
 
@@ -115,6 +117,140 @@ class TestMultinomial(object):
         assert_array_equal(non_contig, contig)
 
 
+class TestMultivariateHypergeometric(object):
+
+    def setup(self):
+        self.seed = 8675309
+
+    def test_argument_validation(self):
+        # Error cases...
+
+        # `colors` must be a 1-d sequence
+        assert_raises(ValueError, random.multivariate_hypergeometric,
+                      10, 4)
+
+        # Negative nsample
+        assert_raises(ValueError, random.multivariate_hypergeometric,
+                      [2, 3, 4], -1)
+
+        # Negative color
+        assert_raises(ValueError, random.multivariate_hypergeometric,
+                      [-1, 2, 3], 2)
+
+        # nsample exceeds sum(colors)
+        assert_raises(ValueError, random.multivariate_hypergeometric,
+                      [2, 3, 4], 10)
+
+        # nsample exceeds sum(colors) (edge case of empty colors)
+        assert_raises(ValueError, random.multivariate_hypergeometric,
+                      [], 1)
+
+        # Validation errors associated with very large values in colors.
+        assert_raises(ValueError, random.multivariate_hypergeometric,
+                      [999999999, 101], 5, 1, 'marginals')
+
+        int64_info = np.iinfo(np.int64)
+        max_int64 = int64_info.max
+        max_int64_index = max_int64 // int64_info.dtype.itemsize
+        assert_raises(ValueError, random.multivariate_hypergeometric,
+                      [max_int64_index - 100, 101], 5, 1, 'count')
+
+    @pytest.mark.parametrize('method', ['count', 'marginals'])
+    def test_edge_cases(self, method):
+        # Set the seed, but in fact, all the results in this test are
+        # deterministic, so we don't really need this.
+        random = Generator(MT19937(self.seed))
+
+        x = random.multivariate_hypergeometric([0, 0, 0], 0, method=method)
+        assert_array_equal(x, [0, 0, 0])
+
+        x = random.multivariate_hypergeometric([], 0, method=method)
+        assert_array_equal(x, [])
+
+        x = random.multivariate_hypergeometric([], 0, size=1, method=method)
+        assert_array_equal(x, np.empty((1, 0), dtype=np.int64))
+
+        x = random.multivariate_hypergeometric([1, 2, 3], 0, method=method)
+        assert_array_equal(x, [0, 0, 0])
+
+        x = random.multivariate_hypergeometric([9, 0, 0], 3, method=method)
+        assert_array_equal(x, [3, 0, 0])
+
+        colors = [1, 1, 0, 1, 1]
+        x = random.multivariate_hypergeometric(colors, sum(colors),
+                                               method=method)
+        assert_array_equal(x, colors)
+
+        x = random.multivariate_hypergeometric([3, 4, 5], 12, size=3,
+                                               method=method)
+        assert_array_equal(x, [[3, 4, 5]]*3)
+
+    # Cases for nsample:
+    #     nsample < 10
+    #     10 <= nsample < colors.sum()/2
+    #     colors.sum()/2 < nsample < colors.sum() - 10
+    #     colors.sum() - 10 < nsample < colors.sum()
+    @pytest.mark.parametrize('nsample', [8, 25, 45, 55])
+    @pytest.mark.parametrize('method', ['count', 'marginals'])
+    @pytest.mark.parametrize('size', [5, (2, 3), 150000])
+    def test_typical_cases(self, nsample, method, size):
+        random = Generator(MT19937(self.seed))
+
+        colors = np.array([10, 5, 20, 25])
+        sample = random.multivariate_hypergeometric(colors, nsample, size,
+                                                    method=method)
+        if isinstance(size, int):
+            expected_shape = (size,) + colors.shape
+        else:
+            expected_shape = size + colors.shape
+        assert_equal(sample.shape, expected_shape)
+        assert_((sample >= 0).all())
+        assert_((sample <= colors).all())
+        assert_array_equal(sample.sum(axis=-1),
+                           np.full(size, fill_value=nsample, dtype=int))
+        if isinstance(size, int) and size >= 100000:
+            # This sample is large enough to compare its mean to
+            # the expected values.
+            assert_allclose(sample.mean(axis=0),
+                            nsample * colors / colors.sum(),
+                            rtol=1e-3, atol=0.005)
+
+    def test_repeatability1(self):
+        random = Generator(MT19937(self.seed))
+        sample = random.multivariate_hypergeometric([3, 4, 5], 5, size=5,
+                                                    method='count')
+        expected = np.array([[2, 1, 2],
+                             [2, 1, 2],
+                             [1, 1, 3],
+                             [2, 0, 3],
+                             [2, 1, 2]])
+        assert_array_equal(sample, expected)
+
+    def test_repeatability2(self):
+        random = Generator(MT19937(self.seed))
+        sample = random.multivariate_hypergeometric([20, 30, 50], 50,
+                                                    size=5,
+                                                    method='marginals')
+        expected = np.array([[ 9, 17, 24],
+                             [ 7, 13, 30],
+                             [ 9, 15, 26],
+                             [ 9, 17, 24],
+                             [12, 14, 24]])
+        assert_array_equal(sample, expected)
+
+    def test_repeatability3(self):
+        random = Generator(MT19937(self.seed))
+        sample = random.multivariate_hypergeometric([20, 30, 50], 12,
+                                                    size=5,
+                                                    method='marginals')
+        expected = np.array([[2, 3, 7],
+                             [5, 3, 4],
+                             [2, 5, 5],
+                             [5, 3, 4],
+                             [1, 5, 6]])
+        assert_array_equal(sample, expected)
+
+
 class TestSetState(object):
     def setup(self):
         self.seed = 1234567890
@@ -329,11 +465,11 @@ class TestIntegers(object):
                'int16':  '39624ead49ad67e37545744024d2648b',
                'int32':  '5c4810373f979336c6c0c999996e47a1',
                'int64':  'ab126c15edff26f55c50d2b7e37391ac',
-               'int8':   'd1746364b48a020dab9ef0568e6c0cd2',
+               'int8':   'ba71ccaffeeeb9eeb1860f8075020b9c',
                'uint16': '39624ead49ad67e37545744024d2648b',
                'uint32': '5c4810373f979336c6c0c999996e47a1',
                'uint64': 'ab126c15edff26f55c50d2b7e37391ac',
-               'uint8':  'd1746364b48a020dab9ef0568e6c0cd2'}
+               'uint8':  'ba71ccaffeeeb9eeb1860f8075020b9c'}
 
         for dt in self.itype[1:]:
             random = Generator(MT19937(1234))
@@ -358,9 +494,8 @@ class TestIntegers(object):
 
     def test_repeatability_broadcasting(self, endpoint):
         for dt in self.itype:
-            lbnd = 0 if dt in (np.bool, bool, np.bool_) else np.iinfo(dt).min
-            ubnd = 2 if dt in (
-                np.bool, bool, np.bool_) else np.iinfo(dt).max + 1
+            lbnd = 0 if dt in (bool, np.bool_) else np.iinfo(dt).min
+            ubnd = 2 if dt in (bool, np.bool_) else np.iinfo(dt).max + 1
             ubnd = ubnd - 1 if endpoint else ubnd
 
             # view as little endian for hash
@@ -399,8 +534,8 @@ class TestIntegers(object):
                 assert_raises(ValueError, random.integers, low_a, high_a,
                               endpoint=endpoint, dtype=dtype)
 
-                low_o = np.array([[low]*10], dtype=np.object)
-                high_o = np.array([high] * 10, dtype=np.object)
+                low_o = np.array([[low]*10], dtype=object)
+                high_o = np.array([high] * 10, dtype=object)
                 assert_raises(ValueError, random.integers, low_o, high,
                               endpoint=endpoint, dtype=dtype)
                 assert_raises(ValueError, random.integers, low, high_o,
@@ -442,7 +577,7 @@ class TestIntegers(object):
             sample = self.rfunc(lbnd, ubnd, endpoint=endpoint, dtype=dt)
             assert_equal(sample.dtype, dt)
 
-        for dt in (bool, int, np.long):
+        for dt in (bool, int, np.compat.long):
             lbnd = 0 if dt is bool else np.iinfo(dt).min
             ubnd = 2 if dt is bool else np.iinfo(dt).max + 1
             ubnd = ubnd - 1 if endpoint else ubnd
@@ -484,6 +619,24 @@ class TestIntegers(object):
         with pytest.raises(ValueError):
             random.integers(0, 200, size=10, dtype=other_byteord_dt)
 
+    # chi2max is the maximum acceptable chi-squared value.
+    @pytest.mark.slow
+    @pytest.mark.parametrize('sample_size,high,dtype,chi2max',
+        [(5000000, 5, np.int8, 125.0),          # p-value ~4.6e-25
+         (5000000, 7, np.uint8, 150.0),         # p-value ~7.7e-30
+         (10000000, 2500, np.int16, 3300.0),    # p-value ~3.0e-25
+         (50000000, 5000, np.uint16, 6500.0),   # p-value ~3.5e-25
+        ])
+    def test_integers_small_dtype_chisquared(self, sample_size, high,
+                                             dtype, chi2max):
+        # Regression test for gh-14774.
+        samples = random.integers(high, size=sample_size, dtype=dtype)
+
+        values, counts = np.unique(samples, return_counts=True)
+        expected = sample_size / high
+        chi2 = ((counts - expected)**2 / expected).sum()
+        assert chi2 < chi2max
+
 
 class TestRandomDist(object):
     # Make sure the random distribution returns the correct value for a
@@ -1044,12 +1197,13 @@ class TestRandomDist(object):
                              [5, 5, 3, 1, 2, 4]]])
         assert_array_equal(actual, desired)
 
-    def test_multivariate_normal(self):
+    @pytest.mark.parametrize("method", ["svd", "eigh", "cholesky"])
+    def test_multivariate_normal(self, method):
         random = Generator(MT19937(self.seed))
         mean = (.123456789, 10)
         cov = [[1, 0], [0, 1]]
         size = (3, 2)
-        actual = random.multivariate_normal(mean, cov, size)
+        actual = random.multivariate_normal(mean, cov, size, method=method)
         desired = np.array([[[-1.747478062846581,  11.25613495182354  ],
                              [-0.9967333370066214, 10.342002097029821 ]],
                             [[ 0.7850019631242964, 11.181113712443013 ],
@@ -1060,15 +1214,24 @@ class TestRandomDist(object):
         assert_array_almost_equal(actual, desired, decimal=15)
 
         # Check for default size, was raising deprecation warning
-        actual = random.multivariate_normal(mean, cov)
+        actual = random.multivariate_normal(mean, cov, method=method)
         desired = np.array([0.233278563284287, 9.424140804347195])
         assert_array_almost_equal(actual, desired, decimal=15)
+        # Check that non symmetric covariance input raises exception when
+        # check_valid='raises' if using default svd method.
+        mean = [0, 0]
+        cov = [[1, 2], [1, 2]]
+        assert_raises(ValueError, random.multivariate_normal, mean, cov,
+                      check_valid='raise')
 
         # Check that non positive-semidefinite covariance warns with
         # RuntimeWarning
-        mean = [0, 0]
         cov = [[1, 2], [2, 1]]
         assert_warns(RuntimeWarning, random.multivariate_normal, mean, cov)
+        assert_warns(RuntimeWarning, random.multivariate_normal, mean, cov,
+                     method='eigh')
+        assert_raises(LinAlgError, random.multivariate_normal, mean, cov,
+                      method='cholesky')
 
         # and that it doesn't warn with RuntimeWarning check_valid='ignore'
         assert_no_warnings(random.multivariate_normal, mean, cov,
@@ -1077,10 +1240,12 @@ class TestRandomDist(object):
         # and that it raises with RuntimeWarning check_valid='raises'
         assert_raises(ValueError, random.multivariate_normal, mean, cov,
                       check_valid='raise')
+        assert_raises(ValueError, random.multivariate_normal, mean, cov,
+                      check_valid='raise', method='eigh')
 
         cov = np.array([[1, 0.1], [0.1, 1]], dtype=np.float32)
         with suppress_warnings() as sup:
-            random.multivariate_normal(mean, cov)
+            random.multivariate_normal(mean, cov, method=method)
             w = sup.record(RuntimeWarning)
             assert len(w) == 0
 
@@ -2054,7 +2219,7 @@ class TestSingleEltArrayInput(object):
             assert_equal(out.shape, self.tgtShape)
 
     def test_integers(self, endpoint):
-        itype = [np.bool, np.int8, np.uint8, np.int16, np.uint16,
+        itype = [np.bool_, np.int8, np.uint8, np.int16, np.uint16,
                  np.int32, np.uint32, np.int64, np.uint64]
         func = random.integers
         high = np.array([1])
diff --git a/numpy/random/tests/test_random.py b/numpy/random/tests/test_random.py
index 37bd121f3..2e2ecedf8 100644
--- a/numpy/random/tests/test_random.py
+++ b/numpy/random/tests/test_random.py
@@ -269,7 +269,7 @@ class TestRandint(object):
             sample = self.rfunc(lbnd, ubnd, dtype=dt)
             assert_equal(sample.dtype, np.dtype(dt))
 
-        for dt in (bool, int, np.long):
+        for dt in (bool, int, np.compat.long):
             lbnd = 0 if dt is bool else np.iinfo(dt).min
             ubnd = 2 if dt is bool else np.iinfo(dt).max + 1
 
diff --git a/numpy/random/tests/test_randomstate.py b/numpy/random/tests/test_randomstate.py
index a0edc5c23..c12b685ad 100644
--- a/numpy/random/tests/test_randomstate.py
+++ b/numpy/random/tests/test_randomstate.py
@@ -11,7 +11,8 @@ from numpy.testing import (
         suppress_warnings
         )
 
-from numpy.random import MT19937, PCG64, mtrand as random
+from numpy.random import MT19937, PCG64
+from numpy import random
 
 INT_FUNCS = {'binomial': (100.0, 0.6),
              'geometric': (.5,),
@@ -228,7 +229,7 @@ class TestSetState(object):
         new_state = ('Unknown', ) + state[1:]
         assert_raises(ValueError, self.random_state.set_state, new_state)
         assert_raises(TypeError, self.random_state.set_state,
-                      np.array(new_state, dtype=np.object))
+                      np.array(new_state, dtype=object))
         state = self.random_state.get_state(legacy=False)
         del state['bit_generator']
         assert_raises(ValueError, self.random_state.set_state, state)
@@ -381,7 +382,7 @@ class TestRandint(object):
             sample = self.rfunc(lbnd, ubnd, dtype=dt)
             assert_equal(sample.dtype, np.dtype(dt))
 
-        for dt in (bool, int, np.long):
+        for dt in (bool, int, np.compat.long):
             lbnd = 0 if dt is bool else np.iinfo(dt).min
             ubnd = 2 if dt is bool else np.iinfo(dt).max + 1
 
@@ -454,7 +455,7 @@ class TestRandomDist(object):
         random.seed(self.seed)
         rs = random.RandomState(self.seed)
         actual = rs.tomaxint(size=(3, 2))
-        if np.iinfo(np.int).max == 2147483647:
+        if np.iinfo(int).max == 2147483647:
             desired = np.array([[1328851649,  731237375],
                                 [1270502067,  320041495],
                                 [1908433478,  499156889]], dtype=np.int64)
diff --git a/numpy/random/tests/test_randomstate_regression.py b/numpy/random/tests/test_randomstate_regression.py
index edf32ea97..bdc2214b6 100644
--- a/numpy/random/tests/test_randomstate_regression.py
+++ b/numpy/random/tests/test_randomstate_regression.py
@@ -8,7 +8,7 @@ from numpy.testing import (
 from numpy.compat import long
 import numpy as np
 
-from numpy.random import mtrand as random
+from numpy import random
 
 
 class TestRegression(object):
diff --git a/numpy/random/tests/test_seed_sequence.py b/numpy/random/tests/test_seed_sequence.py
index 8d6d604a2..fe23680ed 100644
--- a/numpy/random/tests/test_seed_sequence.py
+++ b/numpy/random/tests/test_seed_sequence.py
@@ -1,7 +1,7 @@
 import numpy as np
 from numpy.testing import assert_array_equal
 
-from numpy.random.bit_generator import SeedSequence
+from numpy.random import SeedSequence
 
 
 def test_reference_data():
diff --git a/numpy/random/tests/test_smoke.py b/numpy/random/tests/test_smoke.py
index 6e641b5f4..58ef6a09a 100644
--- a/numpy/random/tests/test_smoke.py
+++ b/numpy/random/tests/test_smoke.py
@@ -8,7 +8,7 @@ from numpy.testing import assert_equal, assert_, assert_array_equal
 from numpy.random import (Generator, MT19937, PCG64, Philox, SFC64)
 
 @pytest.fixture(scope='module',
-                params=(np.bool, np.int8, np.int16, np.int32, np.int64,
+                params=(np.bool_, np.int8, np.int16, np.int32, np.int64,
                         np.uint8, np.uint16, np.uint32, np.uint64))
 def dtype(request):
     return request.param
@@ -655,7 +655,7 @@ class RNG(object):
             rg.standard_gamma(1.0, out=existing[::3])
 
     def test_integers_broadcast(self, dtype):
-        if dtype == np.bool:
+        if dtype == np.bool_:
             upper = 2
             lower = 0
         else:
@@ -672,7 +672,7 @@ class RNG(object):
         assert_equal(a, c)
         self._reset_state()
         d = self.rg.integers(np.array(
-            [lower] * 10), np.array([upper], dtype=np.object), size=10,
+            [lower] * 10), np.array([upper], dtype=object), size=10,
             dtype=dtype)
         assert_equal(a, d)
         self._reset_state()
@@ -701,7 +701,7 @@ class RNG(object):
         assert out.shape == (1,)
 
     def test_integers_broadcast_errors(self, dtype):
-        if dtype == np.bool:
+        if dtype == np.bool_:
             upper = 2
             lower = 0
         else:
diff --git a/numpy/testing/_private/utils.py b/numpy/testing/_private/utils.py
index 8a31fcf15..b14c776d9 100644
--- a/numpy/testing/_private/utils.py
+++ b/numpy/testing/_private/utils.py
@@ -284,6 +284,10 @@ def assert_equal(actual, desired, err_msg='', verbose=True):
     check that all elements of these objects are equal. An exception is raised
     at the first conflicting values.
 
+    When one of `actual` and `desired` is a scalar and the other is array_like,
+    the function checks that each element of the array_like object is equal to
+    the scalar.
+
     This function handles NaN comparisons as if NaN was a "normal" number.
     That is, no assertion is raised if both objects have NaNs in the same
     positions.  This is in contrast to the IEEE standard on NaNs, which says
@@ -374,21 +378,6 @@ def assert_equal(actual, desired, err_msg='', verbose=True):
     if isscalar(desired) != isscalar(actual):
         raise AssertionError(msg)
 
-    # Inf/nan/negative zero handling
-    try:
-        isdesnan = gisnan(desired)
-        isactnan = gisnan(actual)
-        if isdesnan and isactnan:
-            return  # both nan, so equal
-
-        # handle signed zero specially for floats
-        if desired == 0 and actual == 0:
-            if not signbit(desired) == signbit(actual):
-                raise AssertionError(msg)
-
-    except (TypeError, ValueError, NotImplementedError):
-        pass
-
     try:
         isdesnat = isnat(desired)
         isactnat = isnat(actual)
@@ -404,6 +393,33 @@ def assert_equal(actual, desired, err_msg='', verbose=True):
     except (TypeError, ValueError, NotImplementedError):
         pass
 
+    # Inf/nan/negative zero handling
+    try:
+        isdesnan = gisnan(desired)
+        isactnan = gisnan(actual)
+        if isdesnan and isactnan:
+            return  # both nan, so equal
+
+        # handle signed zero specially for floats
+        array_actual = array(actual)
+        array_desired = array(desired)
+        if (array_actual.dtype.char in 'Mm' or
+                array_desired.dtype.char in 'Mm'):
+            # version 1.18
+            # until this version, gisnan failed for datetime64 and timedelta64.
+            # Now it succeeds but comparison to scalar with a different type
+            # emits a DeprecationWarning.
+            # Avoid that by skipping the next check
+            raise NotImplementedError('cannot compare to a scalar '
+                                      'with a different type')
+
+        if desired == 0 and actual == 0:
+            if not signbit(desired) == signbit(actual):
+                raise AssertionError(msg)
+
+    except (TypeError, ValueError, NotImplementedError):
+        pass
+
     try:
         # Explicitly use __eq__ for comparison, gh-2552
         if not (desired == actual):
@@ -841,10 +857,11 @@ def assert_array_equal(x, y, err_msg='', verbose=True):
     Raises an AssertionError if two array_like objects are not equal.
 
     Given two array_like objects, check that the shape is equal and all
-    elements of these objects are equal. An exception is raised at
-    shape mismatch or conflicting values. In contrast to the standard usage
-    in numpy, NaNs are compared like numbers, no assertion is raised if
-    both objects have NaNs in the same positions.
+    elements of these objects are equal (but see the Notes for the special
+    handling of a scalar). An exception is raised at shape mismatch or
+    conflicting values. In contrast to the standard usage in numpy, NaNs
+    are compared like numbers, no assertion is raised if both objects have
+    NaNs in the same positions.
 
     The usual caution for verifying equality with floating point numbers is
     advised.
@@ -871,6 +888,12 @@ def assert_array_equal(x, y, err_msg='', verbose=True):
                      relative and/or absolute precision.
     assert_array_almost_equal_nulp, assert_array_max_ulp, assert_equal
 
+    Notes
+    -----
+    When one of `x` and `y` is a scalar and the other is array_like, the
+    function checks that each element of the array_like object is equal to
+    the scalar.
+
     Examples
     --------
     The first assert does not raise an exception:
@@ -878,7 +901,7 @@ def assert_array_equal(x, y, err_msg='', verbose=True):
     >>> np.testing.assert_array_equal([1.0,2.33333,np.nan],
     ...                               [np.exp(0),2.33333, np.nan])
 
-    Assert fails with numerical inprecision with floats:
+    Assert fails with numerical imprecision with floats:
 
     >>> np.testing.assert_array_equal([1.0,np.pi,np.nan],
     ...                               [1, np.sqrt(np.pi)**2, np.nan])
@@ -899,6 +922,12 @@ def assert_array_equal(x, y, err_msg='', verbose=True):
     ...                            [1, np.sqrt(np.pi)**2, np.nan],
     ...                            rtol=1e-10, atol=0)
 
+    As mentioned in the Notes section, `assert_array_equal` has special
+    handling for scalars. Here the test checks that each value in `x` is 3:
+
+    >>> x = np.full((2, 5), fill_value=3)
+    >>> np.testing.assert_array_equal(x, 3)
+
     """
     __tracebackhide__ = True  # Hide traceback for py.test
     assert_array_compare(operator.__eq__, x, y, err_msg=err_msg,
diff --git a/numpy/tests/test_public_api.py b/numpy/tests/test_public_api.py
index e3621c0fd..0484bb8cd 100644
--- a/numpy/tests/test_public_api.py
+++ b/numpy/tests/test_public_api.py
@@ -298,15 +298,7 @@ PRIVATE_BUT_PRESENT_MODULES = ['numpy.' + s for s in [
     "ma.timer_comparison",
     "matrixlib",
     "matrixlib.defmatrix",
-    "random.bit_generator",
-    "random.bounded_integers",
-    "random.common",
-    "random.generator",
-    "random.mt19937",
     "random.mtrand",
-    "random.pcg64",
-    "random.philox",
-    "random.sfc64",
     "testing.print_coercion_tables",
     "testing.utils",
 ]]
@@ -394,7 +386,7 @@ SKIP_LIST_2 = [
     'numpy.matlib.fft',
     'numpy.matlib.random',
     'numpy.matlib.ctypeslib',
-    'numpy.matlib.ma'
+    'numpy.matlib.ma',
 ]