130 files changed, 4379 insertions, 1327 deletions
diff --git a/INSTALL.rst.txt b/INSTALL.rst.txt
index bd2f4f92c..d6c42bad7 100644
--- a/INSTALL.rst.txt
+++ b/INSTALL.rst.txt
@@ -28,7 +28,7 @@ Building NumPy requires the following installed software:
 
 2) Cython >= 0.29.2 (for development versions of numpy, not for released
                      versions)
-                     
+
 3) pytest__ (optional) 1.15 or later
 
    This is required for testing numpy, but not for using it.
@@ -37,7 +37,7 @@ Python__ http://www.python.org
 pytest__ http://pytest.readthedocs.io
 
 
-.. note:: 
+.. note::
 
    If you want to build NumPy in order to work on NumPy itself, use
    ``runtests.py``.  For more details, see
@@ -77,11 +77,7 @@ skipped when running the test suite if no Fortran compiler is available.  For
 building Scipy a Fortran compiler is needed though, so we include some details
 on Fortran compilers in the rest of this section.
 
-On OS X and Linux, all common compilers will work.  Note that C99 support is
-required.  For compilers that don't support the C99 language standard by
-default (such as ``gcc`` versions < 5.0), it should be enabled.  For ``gcc``::
-
-    export CFLAGS='-std=c99'
+On OS X and Linux, all common compilers will work.
 
 For Fortran, ``gfortran`` works, ``g77`` does not.  In case ``g77`` is
 installed then ``g77`` will be detected and used first.  To explicitly select
@@ -117,9 +113,9 @@ details.
 Windows
 -------
 
-The Intel compilers work with Intel MKL, see the application note linked above. 
+The Intel compilers work with Intel MKL, see the application note linked above.
 MingwPy__ works with OpenBLAS.
-For an overview of the state of BLAS/LAPACK libraries on Windows, see 
+For an overview of the state of BLAS/LAPACK libraries on Windows, see
 `here <https://mingwpy.github.io/blas_lapack.html>`_.
 
 OS X
diff --git a/MANIFEST.in b/MANIFEST.in
index 7ab57eb8c..b58f85d4d 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -15,6 +15,7 @@ include tox.ini
 include .coveragerc
 include test_requirements.txt
 recursive-include numpy/random *.pyx *.pxd *.pyx.in *.pxd.in
+include numpy/random/include/*
 include numpy/__init__.pxd
 # Add build support that should go in sdist, but not go in bdist/be installed
 # Note that sub-directories that don't have __init__ are apparently not
diff --git a/README.md b/README.md
index 0599c46f7..ae92b6309 100644
--- a/README.md
+++ b/README.md
@@ -10,7 +10,7 @@
 NumPy is the fundamental package needed for scientific computing with Python.
 
 - **Website:** https://www.numpy.org
-- **Documentation:** http://docs.scipy.org/
+- **Documentation:** https://docs.scipy.org/
 - **Mailing list:** https://mail.python.org/mailman/listinfo/numpy-discussion
 - **Source code:** https://github.com/numpy/numpy
 - **Contributing:** https://www.numpy.org/devdocs/dev/index.html
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 633808c0b..bfb033bb8 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -46,7 +46,7 @@ stages:
             python3 -m pip install --user -r test_requirements.txt && \
             python3 -m pip install . && \
             F77=gfortran-5 F90=gfortran-5 \
-            CFLAGS='-UNDEBUG -std=c99' python3 runtests.py -n --debug-info --mode=full -- -rsx --junitxml=junit/test-results.xml && \
+            CFLAGS=-UNDEBUG python3 runtests.py -n --debug-info --mode=full -- -rsx --junitxml=junit/test-results.xml && \
             python3 tools/openblas_support.py --check_version $(OpenBLAS_version)"
       displayName: 'Run 32-bit Ubuntu Docker Build / Tests'
     - task: PublishTestResults@2
@@ -64,7 +64,7 @@ stages:
       # the docs even though i.e., numba uses another in their
       # azure config for mac os -- Microsoft has indicated
       # they will patch this issue
-      vmImage: macOS-10.13
+      vmImage: macOS-10.14
     steps:
     # the @0 refers to the (major) version of the *task* on Microsoft's
     # end, not the order in the build matrix nor anything to do
@@ -180,3 +180,25 @@ stages:
         testResultsFiles: '**/test-*.xml'
         testRunTitle: 'Publish test results for PyPy3'
         failTaskOnFailedTests: true
+  - job: Linux_gcc48
+    pool:
+      vmImage: 'ubuntu-18.04'
+    steps:
+    - script: |
+            if ! `gcc-4.8 2>/dev/null`; then
+                sudo apt install gcc-4.8
+            fi
+      displayName: 'add gcc 4.8'
+    - script: |
+            python3 -m pip install --user --upgrade pip setuptools
+            python3 -m pip install --user -r test_requirements.txt
+            CPPFLAGS='' CC=gcc-4.8 F77=gfortran-5 F90=gfortran-5 \
+            python3 runtests.py --debug-info --mode=full -- -rsx --junitxml=junit/test-results.xml
+      displayName: 'Run gcc4.8 Build / Tests'
+    - task: PublishTestResults@2
+      condition: succeededOrFailed()
+      inputs:
+        testResultsFiles: '**/test-*.xml'
+        failTaskOnFailedTests: true
+        testRunTitle: 'Publish test results for gcc 4.8'
+
diff --git a/doc/changelog/1.17.3-changelog.rst b/doc/changelog/1.17.3-changelog.rst
new file mode 100644
index 000000000..f911c8465
--- /dev/null
+++ b/doc/changelog/1.17.3-changelog.rst
@@ -0,0 +1,32 @@
+
+Contributors
+============
+
+A total of 7 people contributed to this release.  People with a "+" by their
+names contributed a patch for the first time.
+
+* Allan Haldane
+* Charles Harris
+* Kevin Sheppard
+* Matti Picus
+* Ralf Gommers
+* Sebastian Berg
+* Warren Weckesser
+
+Pull requests merged
+====================
+
+A total of 12 pull requests were merged for this release.
+
+* `#14456 <https://github.com/numpy/numpy/pull/14456>`__: MAINT: clean up pocketfft modules inside numpy.fft namespace.
+* `#14463 <https://github.com/numpy/numpy/pull/14463>`__: BUG: random.hypergeometic assumes npy_long is npy_int64, hung...
+* `#14502 <https://github.com/numpy/numpy/pull/14502>`__: BUG: random: Revert gh-14458 and refix gh-14557.
+* `#14504 <https://github.com/numpy/numpy/pull/14504>`__: BUG: add a specialized loop for boolean matmul.
+* `#14506 <https://github.com/numpy/numpy/pull/14506>`__: MAINT: Update pytest version for Python 3.8
+* `#14512 <https://github.com/numpy/numpy/pull/14512>`__: DOC: random: fix doc linking, was referencing private submodules.
+* `#14513 <https://github.com/numpy/numpy/pull/14513>`__: BUG,MAINT: Some fixes and minor cleanup based on clang analysis
+* `#14515 <https://github.com/numpy/numpy/pull/14515>`__: BUG: Fix randint when range is 2**32
+* `#14519 <https://github.com/numpy/numpy/pull/14519>`__: MAINT: remove the entropy c-extension module
+* `#14563 <https://github.com/numpy/numpy/pull/14563>`__: DOC: remove note about Pocketfft license file (non-existing here).
+* `#14578 <https://github.com/numpy/numpy/pull/14578>`__: BUG: random: Create a legacy implementation of random.binomial.
+* `#14687 <https://github.com/numpy/numpy/pull/14687>`__: BUG: properly define PyArray_DescrCheck
diff --git a/doc/neps/nep-0000.rst b/doc/neps/nep-0000.rst
index 97b69279b..0a2dbdefb 100644
--- a/doc/neps/nep-0000.rst
+++ b/doc/neps/nep-0000.rst
@@ -75,9 +75,11 @@ request`_ to the ``doc/neps`` directory with the name ``nep-<n>.rst``
 where ``<n>`` is an appropriately assigned four-digit number (e.g.,
 ``nep-0000.rst``). The draft must use the :doc:`nep-template` file.
 
-Once the PR is in place, the NEP should be announced on the mailing
-list for discussion (comments on the PR itself should be restricted to
-minor editorial and technical fixes).
+Once the PR for the NEP is in place, a post should be made to the
+mailing list containing the sections upto "Backward compatibility",
+with the purpose of limiting discussion there to usage and impact.
+Discussion on the pull request will have a broader scope, also including
+details of implementation.
 
 At the earliest convenience, the PR should be merged (regardless of
 whether it is accepted during discussion).  Additional PRs may be made
diff --git a/doc/neps/nep-0019-rng-policy.rst b/doc/neps/nep-0019-rng-policy.rst
index 9704b24ca..4f766fa2d 100644
--- a/doc/neps/nep-0019-rng-policy.rst
+++ b/doc/neps/nep-0019-rng-policy.rst
@@ -3,7 +3,7 @@ NEP 19 — Random Number Generator Policy
 =======================================
 
 :Author: Robert Kern <robert.kern@gmail.com>
-:Status: Accepted
+:Status: Final
 :Type: Standards Track
 :Created: 2018-05-24
 :Updated: 2019-05-21
diff --git a/doc/neps/nep-0029-deprecation_policy.rst b/doc/neps/nep-0029-deprecation_policy.rst
index 1e5aa5b2c..2f5c8ecb5 100644
--- a/doc/neps/nep-0029-deprecation_policy.rst
+++ b/doc/neps/nep-0029-deprecation_policy.rst
@@ -4,9 +4,10 @@ NEP 29 — Recommend Python and Numpy version support as a community policy stan
 
 
 :Author: Thomas A Caswell <tcaswell@gmail.com>, Andreas Mueller, Brian Granger, Madicken Munk, Ralf Gommers, Matt Haberland <mhaberla@calpoly.edu>, Matthias Bussonnier <bussonniermatthias@gmail.com>, Stefan van der Walt <stefanv@berkeley.edu>
-:Status: Draft
-:Type: Informational Track
+:Status: Final
+:Type: Informational
 :Created: 2019-07-13
+:Resolution: https://mail.python.org/pipermail/numpy-discussion/2019-October/080128.html
 
 
 Abstract
diff --git a/doc/neps/nep-0031-uarray.rst b/doc/neps/nep-0031-uarray.rst
new file mode 100644
index 000000000..3519b6bc0
--- /dev/null
+++ b/doc/neps/nep-0031-uarray.rst
@@ -0,0 +1,637 @@
+============================================================
+NEP 31 — Context-local and global overrides of the NumPy API
+============================================================
+
+:Author: Hameer Abbasi <habbasi@quansight.com>
+:Author: Ralf Gommers <rgommers@quansight.com>
+:Author: Peter Bell <pbell@quansight.com>
+:Status: Draft
+:Type: Standards Track
+:Created: 2019-08-22
+
+
+Abstract
+--------
+
+This NEP proposes to make all of NumPy's public API overridable via an
+extensible backend mechanism.
+
+Acceptance of this NEP means NumPy would provide global and context-local
+overrides, as well as a dispatch mechanism similar to NEP-18 [2]_. First
+experiences with ``__array_function__`` show that it is necessary to be able
+to override NumPy functions that *do not take an array-like argument*, and
+hence aren't overridable via ``__array_function__``. The most pressing need is
+array creation and coercion functions, such as ``numpy.zeros`` or
+``numpy.asarray``; see e.g. NEP-30 [9]_.
+
+This NEP proposes to allow, in an opt-in fashion, overriding any part of the
+NumPy API. It is intended as a comprehensive resolution to NEP-22 [3]_, and
+obviates the need to add an ever-growing list of new protocols for each new
+type of function or object that needs to become overridable.
+
+Motivation and Scope
+--------------------
+
+The motivation behind ``uarray`` is manyfold: First, there have been several
+attempts to allow dispatch of parts of the NumPy API, including (most
+prominently), the ``__array_ufunc__`` protocol in NEP-13 [4]_, and the
+``__array_function__`` protocol in NEP-18 [2]_, but this has shown the need
+for further protocols to be developed, including a protocol for coercion (see
+[5]_, [9]_). The reasons these overrides are needed have been extensively
+discussed in the references, and this NEP will not attempt to go into the
+details of why these are needed; but in short: It is necessary for library
+authors to be able to coerce arbitrary objects into arrays of their own types,
+such as CuPy needing to coerce to a CuPy array, for example, instead of
+a NumPy array. In simpler words, one needs things like ``np.asarray(...)`` or
+an alternative to "just work" and return duck-arrays.
+
+The primary end-goal of this NEP is to make the following possible:
+
+.. code:: python
+
+    # On the library side
+    import numpy.overridable as unp
+
+    def library_function(array):
+        array = unp.asarray(array)
+        # Code using unumpy as usual
+        return array
+
+    # On the user side:
+    import numpy.overridable as unp
+    import uarray as ua
+    import dask.array as da
+
+    ua.register_backend(da) # Can be done within Dask itself
+
+    library_function(dask_array)  # works and returns dask_array
+
+    with unp.set_backend(da):
+        library_function([1, 2, 3, 4])  # actually returns a Dask array.
+
+Here, ``backend`` can be any compatible object defined either by NumPy or an
+external library, such as Dask or CuPy. Ideally, it should be the module
+``dask.array`` or ``cupy`` itself.
+
+These kinds of overrides are useful for both the end-user as well as library
+authors. End-users may have written or wish to write code that they then later
+speed up or move to a different implementation, say PyData/Sparse. They can do
+this simply by setting a backend. Library authors may also wish to write code
+that is portable across array implementations, for example ``sklearn`` may wish
+to write code for a machine learning algorithm that is portable across array
+implementations while also using array creation functions.
+
+This NEP takes a holistic approach: It assumes that there are parts of
+the API that need to be overridable, and that these will grow over time. It
+provides a general framework and a mechanism to avoid a design of a new
+protocol each time this is required. This was the goal of ``uarray``: to
+allow for overrides in an API without needing the design of a new protocol.
+
+This NEP proposes the following: That ``unumpy`` [8]_  becomes the
+recommended override mechanism for the parts of the NumPy API not yet covered
+by ``__array_function__`` or ``__array_ufunc__``, and that ``uarray`` is
+vendored into a new namespace within NumPy to give users and downstream
+dependencies access to these overrides.  This vendoring mechanism is similar
+to what SciPy decided to do for making ``scipy.fft`` overridable (see [10]_).
+
+
+Detailed description
+--------------------
+
+Using overrides
+~~~~~~~~~~~~~~~
+
+Here are a few examples of how an end-user would use overrides.
+
+.. code:: python
+
+    data = da.from_zarr('myfile.zarr')
+    # result should still be dask, all things being equal
+    result = library_function(data)
+    result.to_zarr('output.zarr')
+
+This would keep on working, assuming the Dask backend was either set or
+registered. Registration can also be done at import-time.
+
+Now consider another function, and what would need to happen in order to
+make this work:
+
+.. code:: python
+
+    from dask import array as da
+    from magic_library import pytorch_predict
+
+    data = da.from_zarr('myfile.zarr')
+    # normally here one would use e.g. data.map_overlap
+    result = pytorch_predict(data)
+    result.to_zarr('output.zarr')
+
+This would work in two scenarios: The first is that ``pytorch_predict`` was a
+multimethod, and implemented by the Dask backend. Dask could provide utility
+functions to allow external libraries to register implementations.
+
+The second, and perhaps more useful way, is that ``pytorch_predict`` was defined
+in an idiomatic style true to NumPy in terms of other multimethods, and that Dask
+implemented the required multimethods itself, e.g. ``np.convolve``. If this
+happened, then the above example would work without either ``magic_library``
+or Dask having to do anything specific to the other.
+
+Composing backends
+~~~~~~~~~~~~~~~~~~
+
+There are some backends which may depend on other backends, for example xarray
+depending on `numpy.fft`, and transforming a time axis into a frequency axis,
+or Dask/xarray holding an array other than a NumPy array inside it. This would
+be handled in the following manner inside code::
+
+    with ua.set_backend(cupy), ua.set_backend(dask.array):
+        # Code that has distributed GPU arrays here
+
+Proposals
+~~~~~~~~~
+
+The only change this NEP proposes at its acceptance, is to make ``unumpy`` the
+officially recommended way to override NumPy, along with making some submodules
+overridable by default via ``uarray``. ``unumpy`` will remain a separate
+repository/package (which we propose to vendor to avoid a hard dependency, and
+use the separate ``unumpy`` package only if it is installed, rather than depend
+on for the time being). In concrete terms, ``numpy.overridable`` becomes an
+alias for ``unumpy``, if available with a fallback to the a vendored version if
+not. ``uarray`` and ``unumpy`` and will be developed primarily with the input
+of duck-array authors and secondarily, custom dtype authors, via the usual
+GitHub workflow. There are a few reasons for this:
+
+* Faster iteration in the case of bugs or issues.
+* Faster design changes, in the case of needed functionality.
+* ``unumpy`` will work with older versions of NumPy as well.
+* The user and library author opt-in to the override process,
+  rather than breakages happening when it is least expected.
+  In simple terms, bugs in ``unumpy`` mean that ``numpy`` remains
+  unaffected.
+* For ``numpy.fft``, ``numpy.linalg`` and ``numpy.random``, the functions in
+  the main namespace will mirror those in the ``numpy.overridable`` namespace.
+  The reason for this is that there may exist functions in the in these
+  submodules that need backends, even for ``numpy.ndarray`` inputs.
+
+Advantanges of ``unumpy`` over other solutions
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+``unumpy`` offers a number of advantanges over the approach of defining a new
+protocol for every problem encountered: Whenever there is something requiring
+an override, ``unumpy`` will be able to offer a unified API with very minor
+changes. For example:
+
+* ``ufunc`` objects can be overridden via their ``__call__``, ``reduce`` and
+  other methods.
+* Other functions can be overridden in a similar fashion.
+* ``np.asduckarray`` goes away, and becomes ``np.overridable.asarray`` with a
+  backend set.
+* The same holds for array creation functions such as ``np.zeros``,
+  ``np.empty`` and so on.
+
+This also holds for the future: Making something overridable would require only
+minor changes to ``unumpy``.
+
+Another promise ``unumpy`` holds is one of default implementations. Default
+implementations can be provided for any multimethod, in terms of others. This
+allows one to override a large part of the NumPy API by defining only a small
+part of it. This is to ease the creation of new duck-arrays, by providing
+default implementations of many functions that can be easily expressed in
+terms of others, as well as a repository of utility functions that help in the
+implementation of duck-arrays that most duck-arrays would require. This would
+allow us to avoid designing entire protocols, e.g., a protocol for stacking
+and concatenating would be replaced by simply implementing ``stack`` and/or
+``concatenate`` and then providing default implementations for everything else
+in that class. The same applies for transposing, and many other functions for
+which protocols haven't been proposed, such as ``isin`` in terms of ``in1d``,
+``setdiff1d`` in terms of ``unique``, and so on.
+
+It also allows one to override functions in a manner which
+``__array_function__`` simply cannot, such as overriding ``np.einsum`` with the
+version from the ``opt_einsum`` package, or Intel MKL overriding FFT, BLAS
+or ``ufunc`` objects. They would define a backend with the appropriate
+multimethods, and the user would select them via a ``with`` statement, or
+registering them as a backend.
+
+The last benefit is a clear way to coerce to a given backend (via the
+``coerce`` keyword in ``ua.set_backend``), and a protocol
+for coercing not only arrays, but also ``dtype`` objects and ``ufunc`` objects
+with similar ones from other libraries. This is due to the existence of actual,
+third party dtype packages, and their desire to blend into the NumPy ecosystem
+(see [6]_). This is a separate issue compared to the C-level dtype redesign
+proposed in [7]_, it's about allowing third-party dtype implementations to
+work with NumPy, much like third-party array implementations. These can provide
+features such as, for example, units, jagged arrays or other such features that
+are outside the scope of NumPy.
+
+Mixing NumPy and ``unumpy`` in the same file
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Normally, one would only want to import only one of ``unumpy`` or ``numpy``,
+you would import it as ``np`` for familiarity. However, there may be situations
+where one wishes to mix NumPy and the overrides, and there are a few ways to do
+this, depending on the user's style::
+
+    from numpy import overridable as unp
+    import numpy as np
+
+or::
+
+    import numpy as np
+
+    # Use unumpy via np.overridable
+
+Duck-array coercion
+~~~~~~~~~~~~~~~~~~~
+
+There are inherent problems about returning objects that are not NumPy arrays
+from ``numpy.array`` or ``numpy.asarray``, particularly in the context of C/C++
+or Cython code that may get an object with a different memory layout than the
+one it expects. However, we believe this problem may apply not only to these
+two functions but all functions that return NumPy arrays. For this reason,
+overrides are opt-in for the user, by using the submodule ``numpy.overridable``
+rather than ``numpy``. NumPy will continue to work unaffected by anything in
+``numpy.overridable``.
+
+If the user wishes to obtain a NumPy array, there are two ways of doing it:
+
+1. Use ``numpy.asarray`` (the non-overridable version).
+2. Use ``numpy.overridable.asarray`` with the NumPy backend set and coercion
+   enabled
+
+Aliases outside of the ``numpy.overridable`` namespace
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+All functionality in ``numpy.random``, ``numpy.linalg`` and ``numpy.fft``
+will be aliased to their respective overridable versions inside
+``numpy.overridable``. The reason for this is that there are alternative
+implementations of RNGs (``mkl-random``), linear algebra routines (``eigen``,
+``blis``) and FFT routines (``mkl-fft``, ``pyFFTW``) that need to operate on
+``numpy.ndarray`` inputs, but still need the ability to switch behaviour.
+
+This is different from monkeypatching in a few different ways:
+
+* The caller-facing signature of the function is always the same,
+  so there is at least the loose sense of an API contract. Monkeypatching
+  does not provide this ability.
+* There is the ability of locally switching the backend.
+* It has been `suggested <http://numpy-discussion.10968.n7.nabble.com/NEP-31-Context-local-and-global-overrides-of-the-NumPy-API-tp47452p47472.html>`_
+  that the reason that 1.17 hasn't landed in the Anaconda defaults channel is
+  due to the incompatibility between monkeypatching and ``__array_function__``,
+  as monkeypatching would bypass the protocol completely.
+* Statements of the form ``from numpy import x; x`` and ``np.x`` would have
+  different results depending on whether the import was made before or
+  after monkeypatching happened.
+
+All this isn't possible at all with ``__array_function__`` or
+``__array_ufunc__``.
+
+It has been formally realised (at least in part) that a backend system is
+needed for this, in the `NumPy roadmap <https://numpy.org/neps/roadmap.html#other-functionality>`_.
+
+For ``numpy.random``, it's still necessary to make the C-API fit the one
+proposed in `NEP-19 <https://numpy.org/neps/nep-0019-rng-policy.html>`_.
+This is impossible for `mkl-random`, because then it would need to be
+rewritten to fit that framework. The guarantees on stream
+compatibility will be the same as before, but if there's a backend that affects
+``numpy.random`` set, we make no guarantees about stream compatibility, and it
+is up to the backend author to provide their own guarantees.
+
+Providing a way for implicit dispatch
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+It has been suggested that the ability to dispatch methods which do not take
+a dispatchable is needed, while guessing that backend from another dispatchable.
+
+As a concrete example, consider the following:
+
+.. code:: python
+
+    with unumpy.determine_backend(array_like, np.ndarray):
+        unumpy.arange(len(array_like))
+
+While this does not exist yet in ``uarray``, it is trivial to add it. The need for
+this kind of code exists because one might want to have an alternative for the
+proposed ``*_like`` functions, or the ``like=`` keyword argument. The need for these
+exists because there are functions in the NumPy API that do not take a dispatchable
+argument, but there is still the need to select a backend based on a different
+dispatchable.
+
+The need for an opt-in module
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The need for an opt-in module is realised because of a few reasons:
+
+* There are parts of the API (like `numpy.asarray`) that simply cannot be
+  overridden due to incompatibility concerns with C/Cython extensions, however,
+  one may want to coerce to a duck-array using ``asarray`` with a backend set.
+* There are possible issues around an implicit option and monkeypatching, such
+  as those mentioned above.
+
+NEP 18 notes that this may require maintenance of two separate APIs. However,
+this burden may be lessened by, for example, parametrizing all tests over
+``numpy.overridable`` separately via a fixture. This also has the side-effect
+of thoroughly testing it, unlike ``__array_function__``. We also feel that it
+provides an oppurtunity to separate the NumPy API contract properly from the
+implementation.
+
+Benefits to end-users and mixing backends
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Mixing backends is easy in ``uarray``, one only has to do:
+
+.. code:: python
+
+    # Explicitly say which backends you want to mix
+    ua.register_backend(backend1)
+    ua.register_backend(backend2)
+    ua.register_backend(backend3)
+
+    # Freely use code that mixes backends here.
+
+The benefits to end-users extend beyond just writing new code. Old code
+(usually in the form of scripts) can be easily ported to different backends
+by a simple import switch and a line adding the preferred backend. This way,
+users may find it easier to port existing code to GPU or distributed computing.
+
+Related Work
+------------
+
+Other override mechanisms
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+* NEP-18, the ``__array_function__`` protocol. [2]_
+* NEP-13, the ``__array_ufunc__`` protocol. [3]_
+* NEP-30, the ``__duck_array__`` protocol. [9]_
+
+Existing NumPy-like array implementations
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+* Dask: https://dask.org/
+* CuPy: https://cupy.chainer.org/
+* PyData/Sparse: https://sparse.pydata.org/
+* Xnd: https://xnd.readthedocs.io/
+* Astropy's Quantity: https://docs.astropy.org/en/stable/units/
+
+Existing and potential consumers of alternative arrays
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+* Dask: https://dask.org/
+* scikit-learn: https://scikit-learn.org/
+* xarray: https://xarray.pydata.org/
+* TensorLy: http://tensorly.org/
+
+Existing alternate dtype implementations
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+* ``ndtypes``: https://ndtypes.readthedocs.io/en/latest/
+* Datashape: https://datashape.readthedocs.io
+* Plum: https://plum-py.readthedocs.io/
+
+Alternate implementations of parts of the NumPy API
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+* ``mkl_random``: https://github.com/IntelPython/mkl_random
+* ``mkl_fft``: https://github.com/IntelPython/mkl_fft
+* ``bottleneck``: https://github.com/pydata/bottleneck
+* ``opt_einsum``: https://github.com/dgasmith/opt_einsum
+
+Implementation
+--------------
+
+The implementation of this NEP will require the following steps:
+
+* Implementation of ``uarray`` multimethods corresponding to the
+  NumPy API, including classes for overriding ``dtype``, ``ufunc``
+  and ``array`` objects, in the ``unumpy`` repository, which are usually
+  very easy to create.
+* Moving backends from ``unumpy`` into the respective array libraries.
+
+Maintenance can be eased by testing over ``{numpy, unumpy}`` via parameterized
+tests. If a new argument is added to a method, the corresponding argument
+extractor and replacer will need to be updated within ``unumpy``.
+
+A lot of argument extractors can be re-used from the existing implementation
+of the ``__array_function__`` protocol, and the replacers can be usually
+re-used across many methods.
+
+For the parts of the namespace which are going to be overridable by default,
+the main method will need to be renamed and hidden behind a ``uarray`` multimethod.
+
+Default implementations are usually seen in the documentation using the words
+"equivalent to", and thus, are easily available.
+
+``uarray`` Primer
+~~~~~~~~~~~~~~~~~
+
+**Note:** *This section will not attempt to go into too much detail about
+uarray, that is the purpose of the uarray documentation.* [1]_
+*However, the NumPy community will have input into the design of
+uarray, via the issue tracker.*
+
+``unumpy`` is the interface that defines a set of overridable functions
+(multimethods) compatible with the numpy API. To do this, it uses the
+``uarray`` library. ``uarray`` is a general purpose tool for creating
+multimethods that dispatch to one of multiple different possible backend
+implementations. In this sense, it is similar to the ``__array_function__``
+protocol but with the key difference that the backend is explicitly installed
+by the end-user and not coupled into the array type.
+
+Decoupling the backend from the array type gives much more flexibility to
+end-users and backend authors. For example, it is possible to:
+
+* override functions not taking arrays as arguments
+* create backends out of source from the array type
+* install multiple backends for the same array type
+
+This decoupling also means that ``uarray`` is not constrained to dispatching
+over array-like types. The backend is free to inspect the entire set of
+function arguments to determine if it can implement the function e.g. ``dtype``
+parameter dispatching.
+
+Defining backends
+^^^^^^^^^^^^^^^^^
+
+``uarray`` consists of two main protocols: ``__ua_convert__`` and
+``__ua_function__``, called in that order, along with ``__ua_domain__``.
+``__ua_convert__`` is for conversion and coercion. It has the signature
+``(dispatchables, coerce)``, where ``dispatchables`` is an iterable of
+``ua.Dispatchable`` objects and ``coerce`` is a boolean indicating whether or
+not to force the conversion. ``ua.Dispatchable`` is a simple class consisting
+of three simple values: ``type``, ``value``, and ``coercible``.
+``__ua_convert__`` returns an iterable of the converted values, or
+``NotImplemented`` in the case of failure.
+
+``__ua_function__`` has the signature ``(func, args, kwargs)`` and defines
+the actual implementation of the function. It recieves the function and its
+arguments. Returning ``NotImplemented`` will cause a move to the default
+implementation of the function if one exists, and failing that, the next
+backend.
+
+Here is what will happen assuming a ``uarray`` multimethod is called:
+
+1. We canonicalise the arguments so any arguments without a default
+   are placed in ``*args`` and those with one are placed in ``**kwargs``.
+2. We check the list of backends.
+
+   a. If it is empty, we try the default implementation.
+
+3. We check if the backend's ``__ua_convert__`` method exists. If it exists:
+
+   a. We pass it the output of the dispatcher,
+      which is an iterable of ``ua.Dispatchable`` objects.
+   b. We feed this output, along with the arguments,
+      to the argument replacer. ``NotImplemented`` means we move to 3
+      with the next backend.
+   c. We store the replaced arguments as the new arguments.
+
+4. We feed the arguments into ``__ua_function__``, and return the output, and
+   exit if it isn't ``NotImplemented``.
+5. If the default implementation exists, we try it with the current backend.
+6. On failure,  we move to 3 with the next backend. If there are no more
+   backends, we move to 7.
+7. We raise a ``ua.BackendNotImplementedError``.
+
+Defining overridable multimethods
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+To define an overridable function (a multimethod), one needs a few things:
+
+1. A dispatcher that returns an iterable of ``ua.Dispatchable`` objects.
+2. A reverse dispatcher that replaces dispatchable values with the supplied
+   ones.
+3. A domain.
+4. Optionally, a default implementation, which can be provided in terms of
+   other multimethods.
+
+As an example, consider the following::
+
+    import uarray as ua
+
+    def full_argreplacer(args, kwargs, dispatchables):
+        def full(shape, fill_value, dtype=None, order='C'):
+            return (shape, fill_value), dict(
+                dtype=dispatchables[0],
+                order=order
+            )
+
+        return full(*args, **kwargs)
+
+    @ua.create_multimethod(full_argreplacer, domain="numpy")
+    def full(shape, fill_value, dtype=None, order='C'):
+        return (ua.Dispatchable(dtype, np.dtype),)
+
+A large set of examples can be found in the ``unumpy`` repository, [8]_.
+This simple act of overriding callables allows us to override:
+
+* Methods
+* Properties, via ``fget`` and ``fset``
+* Entire objects, via ``__get__``.
+
+Examples for NumPy
+^^^^^^^^^^^^^^^^^^
+
+A library that implements a NumPy-like API will use it in the following
+manner (as an example)::
+
+    import numpy.overridable as unp
+    _ua_implementations = {}
+
+    __ua_domain__ = "numpy"
+
+    def __ua_function__(func, args, kwargs):
+        fn = _ua_implementations.get(func, None)
+        return fn(*args, **kwargs) if fn is not None else NotImplemented
+
+    def implements(ua_func):
+        def inner(func):
+            _ua_implementations[ua_func] = func
+            return func
+
+        return inner
+
+    @implements(unp.asarray)
+    def asarray(a, dtype=None, order=None):
+        # Code here
+        # Either this method or __ua_convert__ must
+        # return NotImplemented for unsupported types,
+        # Or they shouldn't be marked as dispatchable.
+
+    # Provides a default implementation for ones and zeros.
+    @implements(unp.full)
+    def full(shape, fill_value, dtype=None, order='C'):
+        # Code here
+
+Backward compatibility
+----------------------
+
+There are no backward incompatible changes proposed in this NEP.
+
+Alternatives
+------------
+
+The current alternative to this problem is a combination of NEP-18 [2]_,
+NEP-13 [4]_ and NEP-30 [9]_ plus adding more protocols (not yet specified)
+in addition to it. Even then, some parts of the NumPy API will remain
+non-overridable, so it's a partial alternative.
+
+The main alternative to vendoring ``unumpy`` is to simply move it into NumPy
+completely and not distribute it as a separate package. This would also achieve
+the proposed goals, however we prefer to keep it a separate package for now,
+for reasons already stated above.
+
+The third alternative is to move ``unumpy`` into the NumPy organisation and
+develop it as a NumPy project. This will also achieve the said goals, and is
+also a possibility that can be considered by this NEP. However, the act of
+doing an extra ``pip install`` or ``conda install`` may discourage some users
+from adopting this method.
+
+An alternative to requiring opt-in is mainly to *not* override ``np.asarray``
+and ``np.array``, and making the rest of the NumPy API surface overridable,
+instead providing ``np.duckarray`` and ``np.asduckarray``
+as duck-array friendly alternatives that used the respective overrides. However,
+this has the downside of adding a minor overhead to NumPy calls.
+
+Discussion
+----------
+
+* ``uarray`` blogpost: https://labs.quansight.org/blog/2019/07/uarray-update-api-changes-overhead-and-comparison-to-__array_function__/
+* The discussion section of NEP-18: https://numpy.org/neps/nep-0018-array-function-protocol.html#discussion
+* NEP-22: https://numpy.org/neps/nep-0022-ndarray-duck-typing-overview.html
+* Dask issue #4462: https://github.com/dask/dask/issues/4462
+* PR #13046: https://github.com/numpy/numpy/pull/13046
+* Dask issue #4883: https://github.com/dask/dask/issues/4883
+* Issue #13831: https://github.com/numpy/numpy/issues/13831
+* Discussion PR 1: https://github.com/hameerabbasi/numpy/pull/3
+* Discussion PR 2: https://github.com/hameerabbasi/numpy/pull/4
+* Discussion PR 3: https://github.com/numpy/numpy/pull/14389
+
+
+References and Footnotes
+------------------------
+
+.. [1] uarray, A general dispatch mechanism for Python: https://uarray.readthedocs.io
+
+.. [2] NEP 18 — A dispatch mechanism for NumPy’s high level array functions: https://numpy.org/neps/nep-0018-array-function-protocol.html
+
+.. [3] NEP 22 — Duck typing for NumPy arrays – high level overview: https://numpy.org/neps/nep-0022-ndarray-duck-typing-overview.html
+
+.. [4] NEP 13 — A Mechanism for Overriding Ufuncs: https://numpy.org/neps/nep-0013-ufunc-overrides.html
+
+.. [5] Reply to Adding to the non-dispatched implementation of NumPy methods: http://numpy-discussion.10968.n7.nabble.com/Adding-to-the-non-dispatched-implementation-of-NumPy-methods-tp46816p46874.html
+
+.. [6] Custom Dtype/Units discussion: http://numpy-discussion.10968.n7.nabble.com/Custom-Dtype-Units-discussion-td43262.html
+
+.. [7] The epic dtype cleanup plan: https://github.com/numpy/numpy/issues/2899
+
+.. [8] unumpy: NumPy, but implementation-independent: https://unumpy.readthedocs.io
+
+.. [9] NEP 30 — Duck Typing for NumPy Arrays - Implementation: https://www.numpy.org/neps/nep-0030-duck-array-protocol.html
+
+.. [10] http://scipy.github.io/devdocs/fft.html#backend-control
+
+
+Copyright
+---------
+
+This document has been placed in the public domain.
diff --git a/doc/neps/nep-template.rst b/doc/neps/nep-template.rst
index c3d34ea46..42f717c7a 100644
--- a/doc/neps/nep-template.rst
+++ b/doc/neps/nep-template.rst
@@ -24,6 +24,26 @@ the existing problem, who it affects, what it is trying to solve, and why.
 This section should explicitly address the scope of and key requirements for
 the proposed change.
 
+Usage and Impact
+----------------
+
+This section describes how users of NumPy will use features described in this
+NEP. It should be comprised mainly of code examples that wouldn't be possible
+without acceptance and implementation of this NEP, as well as the impact the
+proposed changes would have on the ecosystem. This section should be written
+from the perspective of the users of NumPy, and the benefits it will provide
+them; and as such, it should include implementation details only if
+necessary to explain the functionality.
+
+Backward compatibility
+----------------------
+
+This section describes the ways in which the NEP breaks backward compatibility.
+
+The mailing list post will contain the NEP up to and including this section.
+Its purpose is to provide a high-level summary to users who are not interested
+in detailed technical discussion, but may have opinions around, e.g., usage and
+impact.
 
 Detailed description
 --------------------
@@ -54,12 +74,6 @@ be linked to from here.  (A NEP does not need to be implemented in a single
 pull request if it makes sense to implement it in discrete phases).
 
 
-Backward compatibility
-----------------------
-
-This section describes the ways in which the NEP breaks backward compatibility.
-
-
 Alternatives
 ------------
 
diff --git a/doc/newdtype_example/example.py b/doc/newdtype_example/example.py
deleted file mode 100644
index 6be9caa75..000000000
--- a/doc/newdtype_example/example.py
+++ /dev/null
@@ -1,18 +0,0 @@
-from __future__ import division, absolute_import, print_function
-
-import floatint.floatint as ff
-import numpy as np
-
-# Setting using array is hard because
-#  The parser doesn't stop at tuples always
-#  So, the setitem code will be called with scalars on the
-#  wrong shaped array.
-# But we can get a view as an ndarray of the given type:
-g = np.array([1, 2, 3, 4, 5, 6, 7, 8]).view(ff.floatint_type)
-
-# Now, the elements will be the scalar type associated
-#  with the ndarray.
-print(g[0])
-print(type(g[1]))
-
-# Now, you need to register ufuncs and more arrfuncs to do useful things...
diff --git a/doc/newdtype_example/floatint.c b/doc/newdtype_example/floatint.c
deleted file mode 100644
index 0cc198388..000000000
--- a/doc/newdtype_example/floatint.c
+++ /dev/null
@@ -1,152 +0,0 @@
-
-#include "Python.h"
-#include "structmember.h" /* for offset of macro if needed */
-#include "numpy/arrayobject.h"
-
-
-/* Use a Python float as the canonical type being added
-*/
-
-typedef struct _floatint {
-    PyObject_HEAD
-    npy_int32 first;
-    npy_int32 last;
-} PyFloatIntObject;
-
-static PyTypeObject PyFloatInt_Type = {
-    PyObject_HEAD_INIT(NULL)
-    0,                                          /*ob_size*/
-    "floatint.floatint",                        /*tp_name*/
-    sizeof(PyFloatIntObject),                   /*tp_basicsize*/
-};
-
-static PyArray_ArrFuncs _PyFloatInt_Funcs;
-
-#define _ALIGN(type) offsetof(struct {char c; type v;},v)
-
-/* The scalar-type */
-
-static PyArray_Descr _PyFloatInt_Dtype = {
-    PyObject_HEAD_INIT(NULL)
-    &PyFloatInt_Type,
-    'f',
-    '0',
-    '=',
-    0,
-    0,
-    sizeof(double),
-    _ALIGN(double),
-    NULL,
-    NULL,
-    NULL,
-    &_PyFloatInt_Funcs
-};
-
-static void
-twoint_copyswap(void *dst, void *src, int swap, void *arr)
-{
-    if (src != NULL) {
-        memcpy(dst, src, sizeof(double));
-    }
-
-    if (swap) {
-        register char *a, *b, c;
-        a = (char *)dst;
-        b = a + 7;
-        c = *a; *a++ = *b; *b-- = c;
-        c = *a; *a++ = *b; *b-- = c;
-        c = *a; *a++ = *b; *b-- = c;
-        c = *a; *a++ = *b; *b   = c;
-    }
-}
-
-static PyObject *
-twoint_getitem(char *ip, PyArrayObject *ap) {
-    npy_int32 a[2];
- 
-    if ((ap==NULL) || PyArray_ISBEHAVED_RO(ap)) {
-        a[0] = *((npy_int32 *)ip);
-        a[1] = *((npy_int32 *)ip + 1);
-    }
-    else {
-        ap->descr->f->copyswap(a, ip, !PyArray_ISNOTSWAPPED(ap), ap);
-    }
-    return Py_BuildValue("(ii)", a[0], a[1]);
-}
-
-static int
-twoint_setitem(PyObject *op, char *ov, PyArrayObject *ap) {
-    npy_int32 a[2];
-    
-    if (!PyTuple_Check(op)) {
-        PyErr_SetString(PyExc_TypeError, "must be a tuple");
-        return -1;
-    }
-    if (!PyArg_ParseTuple(op, "ii", a, a+1)) return -1;
-
-    if (ap == NULL || PyArray_ISBEHAVED(ap)) {
-        memcpy(ov, a, sizeof(double));
-    }
-    else {
-        ap->descr->f->copyswap(ov, a, !PyArray_ISNOTSWAPPED(ap), ap);
-    }
-    return 0;
-}
-
-static PyArray_Descr * _register_dtype(void)
-{
-    int userval;
-    PyArray_InitArrFuncs(&_PyFloatInt_Funcs); 
-    /* Add copyswap,
-       nonzero, getitem, setitem*/
-    _PyFloatInt_Funcs.copyswap = twoint_copyswap;
-    _PyFloatInt_Funcs.getitem = (PyArray_GetItemFunc *)twoint_getitem;
-    _PyFloatInt_Funcs.setitem = (PyArray_SetItemFunc *)twoint_setitem; 
-    _PyFloatInt_Dtype.ob_type = &PyArrayDescr_Type;
-
-    userval = PyArray_RegisterDataType(&_PyFloatInt_Dtype);
-    return PyArray_DescrFromType(userval);
-}
-
-
-/* Initialization function for the module (*must* be called init<name>) */
-
-PyMODINIT_FUNC initfloatint(void) {
-    PyObject *m, *d;
-    PyArray_Descr *dtype;
-
-    /* Create the module and add the functions */
-    m = Py_InitModule("floatint", NULL);
-
-    /* Import the array objects */
-    import_array();
-
-
-    /* Initialize the new float type */
-    
-    /* Add some symbolic constants to the module */
-    d = PyModule_GetDict(m);
-
-    if (PyType_Ready(&PyFloat_Type) < 0) return;
-    PyFloatInt_Type.tp_base = &PyFloat_Type;
-    /* This is only needed because we are sub-typing the
-       Float type and must pre-set some function pointers
-       to get PyType_Ready to fill in the rest.
-     */
-    PyFloatInt_Type.tp_alloc = PyType_GenericAlloc;
-    PyFloatInt_Type.tp_new = PyFloat_Type.tp_new;
-    PyFloatInt_Type.tp_dealloc = PyFloat_Type.tp_dealloc;
-    PyFloatInt_Type.tp_free = PyObject_Del;
-    if (PyType_Ready(&PyFloatInt_Type) < 0) return;
-    /* End specific code */
-    
-
-    dtype = _register_dtype();
-    Py_XINCREF(dtype);
-    if (dtype != NULL) {
-        PyDict_SetItemString(d, "floatint_type", (PyObject *)dtype);
-    }
-    Py_INCREF(&PyFloatInt_Type);
-    PyDict_SetItemString(d, "floatint", (PyObject *)&PyFloatInt_Type);
-    return;
-}
diff --git a/doc/newdtype_example/floatint/__init__.py b/doc/newdtype_example/floatint/__init__.py
deleted file mode 100644
index 1d0f69b67..000000000
--- a/doc/newdtype_example/floatint/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from __future__ import division, absolute_import, print_function
diff --git a/doc/newdtype_example/setup.py b/doc/newdtype_example/setup.py
deleted file mode 100644
index d7ab040a1..000000000
--- a/doc/newdtype_example/setup.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from __future__ import division, print_function
-
-from numpy.distutils.core import setup
-
-def configuration(parent_package = '', top_path=None):
-    from numpy.distutils.misc_util import Configuration
-    config = Configuration('floatint', parent_package, top_path)
-
-    config.add_extension('floatint',
-                         sources = ['floatint.c'])
-    return config
-
-setup(configuration=configuration)
diff --git a/doc/release/upcoming_changes/13794.new_function.rst b/doc/release/upcoming_changes/13794.new_function.rst
new file mode 100644
index 000000000..cf8b38bb0
--- /dev/null
+++ b/doc/release/upcoming_changes/13794.new_function.rst
@@ -0,0 +1,5 @@
+Multivariate hypergeometric distribution added to `numpy.random`
+----------------------------------------------------------------
+The method `multivariate_hypergeometric` has been added to the class
+`numpy.random.Generator`.  This method generates random variates from
+the multivariate hypergeometric probability distribution.
diff --git a/doc/release/upcoming_changes/14510.compatibility.rst b/doc/release/upcoming_changes/14510.compatibility.rst
index 63d46d2f7..fc5edbc39 100644
--- a/doc/release/upcoming_changes/14510.compatibility.rst
+++ b/doc/release/upcoming_changes/14510.compatibility.rst
@@ -1,7 +1,7 @@
-`numpy.lib.recfunctions.drop_fields` can no longer return `None`
-----------------------------------------------------------------
+`numpy.lib.recfunctions.drop_fields` can no longer return None
+--------------------------------------------------------------
 If ``drop_fields`` is used to drop all fields, previously the array would
-be completely discarded and `None` returned. Now it returns an array of the
+be completely discarded and None returned. Now it returns an array of the
 same shape as the input, but with no fields. The old behavior can be retained
 with::
 
@@ -9,4 +9,4 @@ with::
     if dropped_arr.dtype.names == ():
         dropped_arr = None
 
-converting the empty recarray to `None`
+converting the empty recarray to None
diff --git a/doc/release/upcoming_changes/14720.deprecation.rst b/doc/release/upcoming_changes/14720.deprecation.rst
new file mode 100644
index 000000000..46ad6d8f7
--- /dev/null
+++ b/doc/release/upcoming_changes/14720.deprecation.rst
@@ -0,0 +1,8 @@
+Deprecate the financial functions
+---------------------------------
+In accordance with
+`NEP-32 <https://numpy.org/neps/nep-0032-remove-financial-functions.html>`_,
+the functions `fv`, `ipmt`, `irr`, `mirr`, `nper`, `npv`, `pmt`, `ppmt`,
+`pv` and `rate` are deprecated, and will be removed from NumPy 1.20.
+The replacement for these functions is the Python package
+`numpy-financial <https://pypi.org/project/numpy-financial>`_.
diff --git a/doc/release/upcoming_changes/14771.improvement.rst b/doc/release/upcoming_changes/14771.improvement.rst
new file mode 100644
index 000000000..aaea8f8ed
--- /dev/null
+++ b/doc/release/upcoming_changes/14771.improvement.rst
@@ -0,0 +1,5 @@
+``std=c99`` added if compiler is named ``gcc``
+----------------------------------------------
+GCC before version 5 requires the ``-std=c99`` command line argument. Newer
+compilers automatically turn on C99 mode. The compiler setup code will
+automatically add the code if the compiler name has ``gcc`` in it.
diff --git a/doc/release/upcoming_changes/14777.compatibility.rst b/doc/release/upcoming_changes/14777.compatibility.rst
new file mode 100644
index 000000000..d594463de
--- /dev/null
+++ b/doc/release/upcoming_changes/14777.compatibility.rst
@@ -0,0 +1,5 @@
+Changed random variate stream from `numpy.random.Generator.integers`
+--------------------------------------------------------------------
+There was a bug in `numpy.random.Generator.integers` that caused biased
+sampling of 8 and 16 bit integer types. Fixing that bug has changed the
+output stream from what it was in previous releases.
diff --git a/doc/source/dev/development_environment.rst b/doc/source/dev/development_environment.rst
index 9d618cc9f..297502b31 100644
--- a/doc/source/dev/development_environment.rst
+++ b/doc/source/dev/development_environment.rst
@@ -11,8 +11,7 @@ Recommended development setup
 Since NumPy contains parts written in C and Cython that need to be
 compiled before use, make sure you have the necessary compilers and Python
 development headers installed - see :ref:`building-from-source`. Building
-NumPy as of version ``1.17`` requires a C99 compliant compiler. For
-some older compilers this may require ``export CFLAGS='-std=c99'``.
+NumPy as of version ``1.17`` requires a C99 compliant compiler.
 
 Having compiled code also means that importing NumPy from the development
 sources needs some additional steps, which are explained below.  For the rest
diff --git a/doc/source/reference/arrays.classes.rst b/doc/source/reference/arrays.classes.rst
index 39410b2a4..9dcbb6267 100644
--- a/doc/source/reference/arrays.classes.rst
+++ b/doc/source/reference/arrays.classes.rst
@@ -51,7 +51,7 @@ NumPy provides several hooks that classes can customize:
    .. versionadded:: 1.13
 
    Any class, ndarray subclass or not, can define this method or set it to
-   :obj:`None` in order to override the behavior of NumPy's ufuncs. This works
+   None in order to override the behavior of NumPy's ufuncs. This works
    quite similarly to Python's ``__mul__`` and other binary operation routines.
 
    - *ufunc* is the ufunc object that was called.
@@ -94,13 +94,13 @@ NumPy provides several hooks that classes can customize:
    :class:`ndarray` handles binary operations like ``arr + obj`` and ``arr
    < obj`` when ``arr`` is an :class:`ndarray` and ``obj`` is an instance
    of a custom class. There are two possibilities. If
-   ``obj.__array_ufunc__`` is present and not :obj:`None`, then
+   ``obj.__array_ufunc__`` is present and not None, then
    ``ndarray.__add__`` and friends will delegate to the ufunc machinery,
    meaning that ``arr + obj`` becomes ``np.add(arr, obj)``, and then
    :func:`~numpy.add` invokes ``obj.__array_ufunc__``. This is useful if you
    want to define an object that acts like an array.
 
-   Alternatively, if ``obj.__array_ufunc__`` is set to :obj:`None`, then as a
+   Alternatively, if ``obj.__array_ufunc__`` is set to None, then as a
    special case, special methods like ``ndarray.__add__`` will notice this
    and *unconditionally* raise :exc:`TypeError`. This is useful if you want to
    create objects that interact with arrays via binary operations, but
@@ -135,7 +135,7 @@ NumPy provides several hooks that classes can customize:
         place rather than separately by the ufunc machinery and by the binary
         operation rules (which gives preference to special methods of
         subclasses; the alternative way to enforce a one-place only hierarchy,
-        of setting :func:`__array_ufunc__` to :obj:`None`, would seem very
+        of setting :func:`__array_ufunc__` to None, would seem very
         unexpected and thus confusing, as then the subclass would not work at
         all with ufuncs).
       - :class:`ndarray` defines its own :func:`__array_ufunc__`, which,
@@ -280,7 +280,7 @@ NumPy provides several hooks that classes can customize:
 
 .. py:method:: class.__array_prepare__(array, context=None)
 
-   At the beginning of every :ref:`ufunc <ufuncs.output-type>`, this
+   At the beginning of every :ref:`ufunc <ufuncs-output-type>`, this
    method is called on the input object with the highest array
    priority, or the output object if one was specified. The output
    array is passed in and whatever is returned is passed to the ufunc.
@@ -295,7 +295,7 @@ NumPy provides several hooks that classes can customize:
 
 .. py:method:: class.__array_wrap__(array, context=None)
 
-   At the end of every :ref:`ufunc <ufuncs.output-type>`, this method
+   At the end of every :ref:`ufunc <ufuncs-output-type>`, this method
    is called on the input object with the highest array priority, or
    the output object if one was specified. The ufunc-computed array
    is passed in and whatever is returned is passed to the user.
@@ -322,7 +322,7 @@ NumPy provides several hooks that classes can customize:
 
    If a class (ndarray subclass or not) having the :func:`__array__`
    method is used as the output object of an :ref:`ufunc
-   <ufuncs.output-type>`, results will be written to the object
+   <ufuncs-output-type>`, results will be written to the object
    returned by :func:`__array__`. Similar conversion is done on
    input arrays.
 
diff --git a/doc/source/reference/arrays.dtypes.rst b/doc/source/reference/arrays.dtypes.rst
index ab743a8ee..231707b11 100644
--- a/doc/source/reference/arrays.dtypes.rst
+++ b/doc/source/reference/arrays.dtypes.rst
@@ -128,7 +128,7 @@ What can be converted to a data-type object is described below:
 
    Used as-is.
 
-:const:`None`
+None
 
    .. index::
       triple: dtype; construction; from None
@@ -392,7 +392,7 @@ Type strings
     their values must each be lists of the same length as the *names*
     and *formats* lists. The *offsets* value is a list of byte offsets
     (limited to `ctypes.c_int`) for each field, while the *titles* value is a
-    list of titles for each field (:const:`None` can be used if no title is
+    list of titles for each field (None can be used if no title is
     desired for that field). The *titles* can be any :class:`string`
     or :class:`unicode` object and will add another entry to the
     fields dictionary keyed by the title and referencing the same
diff --git a/doc/source/reference/arrays.interface.rst b/doc/source/reference/arrays.interface.rst
index f361ccb06..f36a083aa 100644
--- a/doc/source/reference/arrays.interface.rst
+++ b/doc/source/reference/arrays.interface.rst
@@ -138,18 +138,18 @@ This approach to the interface consists of the object having an
        This attribute can also be an object exposing the
        :c:func:`buffer interface <PyObject_AsCharBuffer>` which
        will be used to share the data. If this key is not present (or
-       returns :class:`None`), then memory sharing will be done
+       returns None), then memory sharing will be done
        through the buffer interface of the object itself.  In this
        case, the offset key can be used to indicate the start of the
        buffer.  A reference to the object exposing the array interface
        must be stored by the new object if the memory area is to be
        secured.
 
-       **Default**: :const:`None`
+       **Default**: None
 
    **strides** (optional)
 
-       Either :const:`None` to indicate a C-style contiguous array or
+       Either None to indicate a C-style contiguous array or
        a Tuple of strides which provides the number of bytes needed
        to jump to the next array element in the corresponding
        dimension. Each entry must be an integer (a Python
@@ -157,29 +157,29 @@ This approach to the interface consists of the object having an
        be larger than can be represented by a C "int" or "long"; the
        calling code should handle this appropriately, either by
        raising an error, or by using :c:type:`Py_LONG_LONG` in C. The
-       default is :const:`None` which implies a C-style contiguous
+       default is None which implies a C-style contiguous
        memory buffer.  In this model, the last dimension of the array
        varies the fastest.  For example, the default strides tuple
        for an object whose array entries are 8 bytes long and whose
        shape is (10,20,30) would be (4800, 240, 8)
 
-       **Default**: :const:`None` (C-style contiguous)
+       **Default**: None (C-style contiguous)
 
    **mask** (optional)
 
-       :const:`None` or an object exposing the array interface.  All
+       None or an object exposing the array interface.  All
        elements of the mask array should be interpreted only as true
        or not true indicating which elements of this array are valid.
        The shape of this object should be `"broadcastable"
        <arrays.broadcasting.broadcastable>` to the shape of the
        original array.
 
-       **Default**: :const:`None` (All array values are valid)
+       **Default**: None (All array values are valid)
 
    **offset** (optional)
 
        An integer offset into the array data region. This can only be
-       used when data is :const:`None` or returns a :class:`buffer`
+       used when data is None or returns a :class:`buffer`
        object.
 
        **Default**: 0.
diff --git a/doc/source/reference/arrays.ndarray.rst b/doc/source/reference/arrays.ndarray.rst
index 8f431bc9c..831d211bc 100644
--- a/doc/source/reference/arrays.ndarray.rst
+++ b/doc/source/reference/arrays.ndarray.rst
@@ -329,7 +329,7 @@ Item selection and manipulation
 -------------------------------
 
 For array methods that take an *axis* keyword, it defaults to
-:const:`None`. If axis is *None*, then the array is treated as a 1-D
+*None*. If axis is *None*, then the array is treated as a 1-D
 array. Any other value for *axis* represents the dimension along which
 the operation should proceed.
 
diff --git a/doc/source/reference/c-api/array.rst b/doc/source/reference/c-api/array.rst
index 08bf06b00..0530a5747 100644
--- a/doc/source/reference/c-api/array.rst
+++ b/doc/source/reference/c-api/array.rst
@@ -916,82 +916,82 @@ enumerated array data type. For the array type checking macros the
 argument must be a :c:type:`PyObject *<PyObject>` that can be directly interpreted as a
 :c:type:`PyArrayObject *`.
 
-.. c:function:: PyTypeNum_ISUNSIGNED(num)
+.. c:function:: PyTypeNum_ISUNSIGNED(int num)
 
-.. c:function:: PyDataType_ISUNSIGNED(descr)
+.. c:function:: PyDataType_ISUNSIGNED(PyArray_Descr *descr)
 
-.. c:function:: PyArray_ISUNSIGNED(obj)
+.. c:function:: PyArray_ISUNSIGNED(PyArrayObject *obj)
 
     Type represents an unsigned integer.
 
-.. c:function:: PyTypeNum_ISSIGNED(num)
+.. c:function:: PyTypeNum_ISSIGNED(int num)
 
-.. c:function:: PyDataType_ISSIGNED(descr)
+.. c:function:: PyDataType_ISSIGNED(PyArray_Descr *descr)
 
-.. c:function:: PyArray_ISSIGNED(obj)
+.. c:function:: PyArray_ISSIGNED(PyArrayObject *obj)
 
     Type represents a signed integer.
 
-.. c:function:: PyTypeNum_ISINTEGER(num)
+.. c:function:: PyTypeNum_ISINTEGER(int num)
 
-.. c:function:: PyDataType_ISINTEGER(descr)
+.. c:function:: PyDataType_ISINTEGER(PyArray_Descr* descr)
 
-.. c:function:: PyArray_ISINTEGER(obj)
+.. c:function:: PyArray_ISINTEGER(PyArrayObject *obj)
 
     Type represents any integer.
 
-.. c:function:: PyTypeNum_ISFLOAT(num)
+.. c:function:: PyTypeNum_ISFLOAT(int num)
 
-.. c:function:: PyDataType_ISFLOAT(descr)
+.. c:function:: PyDataType_ISFLOAT(PyArray_Descr* descr)
 
-.. c:function:: PyArray_ISFLOAT(obj)
+.. c:function:: PyArray_ISFLOAT(PyArrayObject *obj)
 
     Type represents any floating point number.
 
-.. c:function:: PyTypeNum_ISCOMPLEX(num)
+.. c:function:: PyTypeNum_ISCOMPLEX(int num)
 
-.. c:function:: PyDataType_ISCOMPLEX(descr)
+.. c:function:: PyDataType_ISCOMPLEX(PyArray_Descr* descr)
 
-.. c:function:: PyArray_ISCOMPLEX(obj)
+.. c:function:: PyArray_ISCOMPLEX(PyArrayObject *obj)
 
     Type represents any complex floating point number.
 
-.. c:function:: PyTypeNum_ISNUMBER(num)
+.. c:function:: PyTypeNum_ISNUMBER(int num)
 
-.. c:function:: PyDataType_ISNUMBER(descr)
+.. c:function:: PyDataType_ISNUMBER(PyArray_Descr* descr)
 
-.. c:function:: PyArray_ISNUMBER(obj)
+.. c:function:: PyArray_ISNUMBER(PyArrayObject *obj)
 
     Type represents any integer, floating point, or complex floating point
     number.
 
-.. c:function:: PyTypeNum_ISSTRING(num)
+.. c:function:: PyTypeNum_ISSTRING(int num)
 
-.. c:function:: PyDataType_ISSTRING(descr)
+.. c:function:: PyDataType_ISSTRING(PyArray_Descr* descr)
 
-.. c:function:: PyArray_ISSTRING(obj)
+.. c:function:: PyArray_ISSTRING(PyArrayObject *obj)
 
     Type represents a string data type.
 
-.. c:function:: PyTypeNum_ISPYTHON(num)
+.. c:function:: PyTypeNum_ISPYTHON(int num)
 
-.. c:function:: PyDataType_ISPYTHON(descr)
+.. c:function:: PyDataType_ISPYTHON(PyArray_Descr* descr)
 
-.. c:function:: PyArray_ISPYTHON(obj)
+.. c:function:: PyArray_ISPYTHON(PyArrayObject *obj)
 
     Type represents an enumerated type corresponding to one of the
     standard Python scalar (bool, int, float, or complex).
 
-.. c:function:: PyTypeNum_ISFLEXIBLE(num)
+.. c:function:: PyTypeNum_ISFLEXIBLE(int num)
 
-.. c:function:: PyDataType_ISFLEXIBLE(descr)
+.. c:function:: PyDataType_ISFLEXIBLE(PyArray_Descr* descr)
 
-.. c:function:: PyArray_ISFLEXIBLE(obj)
+.. c:function:: PyArray_ISFLEXIBLE(PyArrayObject *obj)
 
     Type represents one of the flexible array types ( :c:data:`NPY_STRING`,
     :c:data:`NPY_UNICODE`, or :c:data:`NPY_VOID` ).
 
-.. c:function:: PyDataType_ISUNSIZED(descr):
+.. c:function:: PyDataType_ISUNSIZED(PyArray_Descr* descr):
 
     Type has no size information attached, and can be resized. Should only be
     called on flexible dtypes. Types that are attached to an array will always
@@ -1001,41 +1001,41 @@ argument must be a :c:type:`PyObject *<PyObject>` that can be directly interpret
 
     For structured datatypes with no fields this function now returns False.
 
-.. c:function:: PyTypeNum_ISUSERDEF(num)
+.. c:function:: PyTypeNum_ISUSERDEF(int num)
 
-.. c:function:: PyDataType_ISUSERDEF(descr)
+.. c:function:: PyDataType_ISUSERDEF(PyArray_Descr* descr)
 
-.. c:function:: PyArray_ISUSERDEF(obj)
+.. c:function:: PyArray_ISUSERDEF(PyArrayObject *obj)
 
     Type represents a user-defined type.
 
-.. c:function:: PyTypeNum_ISEXTENDED(num)
+.. c:function:: PyTypeNum_ISEXTENDED(int num)
 
-.. c:function:: PyDataType_ISEXTENDED(descr)
+.. c:function:: PyDataType_ISEXTENDED(PyArray_Descr* descr)
 
-.. c:function:: PyArray_ISEXTENDED(obj)
+.. c:function:: PyArray_ISEXTENDED(PyArrayObject *obj)
 
     Type is either flexible or user-defined.
 
-.. c:function:: PyTypeNum_ISOBJECT(num)
+.. c:function:: PyTypeNum_ISOBJECT(int num)
 
-.. c:function:: PyDataType_ISOBJECT(descr)
+.. c:function:: PyDataType_ISOBJECT(PyArray_Descr* descr)
 
-.. c:function:: PyArray_ISOBJECT(obj)
+.. c:function:: PyArray_ISOBJECT(PyArrayObject *obj)
 
     Type represents object data type.
 
-.. c:function:: PyTypeNum_ISBOOL(num)
+.. c:function:: PyTypeNum_ISBOOL(int num)
 
-.. c:function:: PyDataType_ISBOOL(descr)
+.. c:function:: PyDataType_ISBOOL(PyArray_Descr* descr)
 
-.. c:function:: PyArray_ISBOOL(obj)
+.. c:function:: PyArray_ISBOOL(PyArrayObject *obj)
 
     Type represents Boolean data type.
 
-.. c:function:: PyDataType_HASFIELDS(descr)
+.. c:function:: PyDataType_HASFIELDS(PyArray_Descr* descr)
 
-.. c:function:: PyArray_HASFIELDS(obj)
+.. c:function:: PyArray_HASFIELDS(PyArrayObject *obj)
 
     Type has fields associated with it.
 
@@ -1584,7 +1584,7 @@ Flag checking
 For all of these macros *arr* must be an instance of a (subclass of)
 :c:data:`PyArray_Type`.
 
-.. c:function:: PyArray_CHKFLAGS(arr, flags)
+.. c:function:: PyArray_CHKFLAGS(PyObject *arr, flags)
 
     The first parameter, arr, must be an ndarray or subclass. The
     parameter, *flags*, should be an integer consisting of bitwise
@@ -1594,60 +1594,60 @@ For all of these macros *arr* must be an instance of a (subclass of)
     :c:data:`NPY_ARRAY_WRITEABLE`, :c:data:`NPY_ARRAY_WRITEBACKIFCOPY`,
     :c:data:`NPY_ARRAY_UPDATEIFCOPY`.
 
-.. c:function:: PyArray_IS_C_CONTIGUOUS(arr)
+.. c:function:: PyArray_IS_C_CONTIGUOUS(PyObject *arr)
 
     Evaluates true if *arr* is C-style contiguous.
 
-.. c:function:: PyArray_IS_F_CONTIGUOUS(arr)
+.. c:function:: PyArray_IS_F_CONTIGUOUS(PyObject *arr)
 
     Evaluates true if *arr* is Fortran-style contiguous.
 
-.. c:function:: PyArray_ISFORTRAN(arr)
+.. c:function:: PyArray_ISFORTRAN(PyObject *arr)
 
     Evaluates true if *arr* is Fortran-style contiguous and *not*
     C-style contiguous. :c:func:`PyArray_IS_F_CONTIGUOUS`
     is the correct way to test for Fortran-style contiguity.
 
-.. c:function:: PyArray_ISWRITEABLE(arr)
+.. c:function:: PyArray_ISWRITEABLE(PyObject *arr)
 
     Evaluates true if the data area of *arr* can be written to
 
-.. c:function:: PyArray_ISALIGNED(arr)
+.. c:function:: PyArray_ISALIGNED(PyObject *arr)
 
     Evaluates true if the data area of *arr* is properly aligned on
     the machine.
 
-.. c:function:: PyArray_ISBEHAVED(arr)
+.. c:function:: PyArray_ISBEHAVED(PyObject *arr)
 
     Evaluates true if the data area of *arr* is aligned and writeable
     and in machine byte-order according to its descriptor.
 
-.. c:function:: PyArray_ISBEHAVED_RO(arr)
+.. c:function:: PyArray_ISBEHAVED_RO(PyObject *arr)
 
     Evaluates true if the data area of *arr* is aligned and in machine
     byte-order.
 
-.. c:function:: PyArray_ISCARRAY(arr)
+.. c:function:: PyArray_ISCARRAY(PyObject *arr)
 
     Evaluates true if the data area of *arr* is C-style contiguous,
     and :c:func:`PyArray_ISBEHAVED` (*arr*) is true.
 
-.. c:function:: PyArray_ISFARRAY(arr)
+.. c:function:: PyArray_ISFARRAY(PyObject *arr)
 
     Evaluates true if the data area of *arr* is Fortran-style
     contiguous and :c:func:`PyArray_ISBEHAVED` (*arr*) is true.
 
-.. c:function:: PyArray_ISCARRAY_RO(arr)
+.. c:function:: PyArray_ISCARRAY_RO(PyObject *arr)
 
     Evaluates true if the data area of *arr* is C-style contiguous,
     aligned, and in machine byte-order.
 
-.. c:function:: PyArray_ISFARRAY_RO(arr)
+.. c:function:: PyArray_ISFARRAY_RO(PyObject *arr)
 
     Evaluates true if the data area of *arr* is Fortran-style
     contiguous, aligned, and in machine byte-order **.**
 
-.. c:function:: PyArray_ISONESEGMENT(arr)
+.. c:function:: PyArray_ISONESEGMENT(PyObject *arr)
 
     Evaluates true if the data area of *arr* consists of a single
     (C-style or Fortran-style) contiguous segment.
@@ -2053,7 +2053,7 @@ Calculation
 .. tip::
 
     Pass in :c:data:`NPY_MAXDIMS` for axis in order to achieve the same
-    effect that is obtained by passing in *axis* = :const:`None` in Python
+    effect that is obtained by passing in ``axis=None`` in Python
     (treating the array as a 1-d array).
 
 
@@ -2659,18 +2659,27 @@ cost of a slight overhead.
     The mode should be one of:
 
     .. c:macro:: NPY_NEIGHBORHOOD_ITER_ZERO_PADDING
+
             Zero padding. Outside bounds values will be 0.
+
     .. c:macro:: NPY_NEIGHBORHOOD_ITER_ONE_PADDING
+
             One padding, Outside bounds values will be 1.
+
     .. c:macro:: NPY_NEIGHBORHOOD_ITER_CONSTANT_PADDING
+
             Constant padding. Outside bounds values will be the
             same as the first item in fill_value.
+
     .. c:macro:: NPY_NEIGHBORHOOD_ITER_MIRROR_PADDING
+
             Mirror padding. Outside bounds values will be as if the
             array items were mirrored. For example, for the array [1, 2, 3, 4],
             x[-2] will be 2, x[-2] will be 1, x[4] will be 4, x[5] will be 1,
             etc...
+
     .. c:macro:: NPY_NEIGHBORHOOD_ITER_CIRCULAR_PADDING
+
             Circular padding. Outside bounds values will be as if the array
             was repeated. For example, for the array [1, 2, 3, 4], x[-2] will
             be 3, x[-2] will be 4, x[4] will be 1, x[5] will be 2, etc...
@@ -3508,6 +3517,10 @@ Miscellaneous Macros
 
     Evaluates as True if arrays *a1* and *a2* have the same shape.
 
+.. c:var:: a
+
+.. c:var:: b
+
 .. c:macro:: PyArray_MAX(a,b)
 
     Returns the maximum of *a* and *b*. If (*a*) or (*b*) are
diff --git a/doc/source/reference/maskedarray.baseclass.rst b/doc/source/reference/maskedarray.baseclass.rst
index 204ebfe08..5bbdd0299 100644
--- a/doc/source/reference/maskedarray.baseclass.rst
+++ b/doc/source/reference/maskedarray.baseclass.rst
@@ -160,9 +160,9 @@ replaced with ``n`` integers which will be interpreted as an n-tuple.
 Item selection and manipulation
 -------------------------------
 
-For array methods that take an *axis* keyword, it defaults to `None`.
-If axis is *None*, then the array is treated as a 1-D array.
-Any other value for *axis* represents the dimension along which
+For array methods that take an ``axis`` keyword, it defaults to None.
+If axis is None, then the array is treated as a 1-D array.
+Any other value for ``axis`` represents the dimension along which
 the operation should proceed.
 
 .. autosummary::
diff --git a/doc/source/reference/maskedarray.generic.rst b/doc/source/reference/maskedarray.generic.rst
index 7375d60fb..41c3ee564 100644
--- a/doc/source/reference/maskedarray.generic.rst
+++ b/doc/source/reference/maskedarray.generic.rst
@@ -74,7 +74,7 @@ To create an array with the second element invalid, we would do::
 To create a masked array where all values close to 1.e20 are invalid, we would
 do::
 
-   >>> z = masked_values([1.0, 1.e20, 3.0, 4.0], 1.e20)
+   >>> z = ma.masked_values([1.0, 1.e20, 3.0, 4.0], 1.e20)
 
 For a complete discussion of creation methods for masked arrays please see
 section :ref:`Constructing masked arrays <maskedarray.generic.constructing>`.
@@ -110,15 +110,15 @@ There are several ways to construct a masked array.
 
      >>> x = np.array([1, 2, 3])
      >>> x.view(ma.MaskedArray)
-     masked_array(data = [1 2 3],
-                  mask = False,
-            fill_value = 999999)
+     masked_array(data=[1, 2, 3],
+                  mask=False,
+            fill_value=999999)
      >>> x = np.array([(1, 1.), (2, 2.)], dtype=[('a',int), ('b', float)])
      >>> x.view(ma.MaskedArray)
-     masked_array(data = [(1, 1.0) (2, 2.0)],
-                  mask = [(False, False) (False, False)],
-            fill_value = (999999, 1e+20),
-                 dtype = [('a', '<i4'), ('b', '<f8')])
+     masked_array(data=[(1, 1.0), (2, 2.0)],
+                  mask=[(False, False), (False, False)],
+            fill_value=(999999, 1.e+20),
+                 dtype=[('a', '<i8'), ('b', '<f8')])
 
 * Yet another possibility is to use any of the following functions:
 
@@ -195,9 +195,9 @@ index. The inverse of the mask can be calculated with the
 
    >>> x = ma.array([[1, 2], [3, 4]], mask=[[0, 1], [1, 0]])
    >>> x[~x.mask]
-   masked_array(data = [1 4],
-                mask = [False False],
-          fill_value = 999999)
+   masked_array(data=[1, 4],
+                mask=[False, False],
+          fill_value=999999)
 
 Another way to retrieve the valid data is to use the :meth:`compressed`
 method, which returns a one-dimensional :class:`~numpy.ndarray` (or one of its
@@ -223,27 +223,26 @@ as invalid is to assign the special value :attr:`masked` to them::
    >>> x = ma.array([1, 2, 3])
    >>> x[0] = ma.masked
    >>> x
-   masked_array(data = [-- 2 3],
-                mask = [ True False False],
-          fill_value = 999999)
+   masked_array(data=[--, 2, 3],
+                mask=[ True, False, False],
+          fill_value=999999)
    >>> y = ma.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
    >>> y[(0, 1, 2), (1, 2, 0)] = ma.masked
    >>> y
-   masked_array(data =
-    [[1 -- 3]
-     [4 5 --]
-     [-- 8 9]],
-                mask =
-    [[False  True False]
-     [False False  True]
-     [ True False False]],
-          fill_value = 999999)
+   masked_array(
+     data=[[1, --, 3],
+           [4, 5, --],
+           [--, 8, 9]],
+     mask=[[False,  True, False],
+           [False, False,  True],
+           [ True, False, False]],
+     fill_value=999999)
    >>> z = ma.array([1, 2, 3, 4])
    >>> z[:-2] = ma.masked
    >>> z
-   masked_array(data = [-- -- 3 4],
-                mask = [ True  True False False],
-          fill_value = 999999)
+   masked_array(data=[--, --, 3, 4],
+                mask=[ True,  True, False, False],
+          fill_value=999999)
 
 
 A second possibility is to modify the :attr:`~MaskedArray.mask` directly,
@@ -263,9 +262,10 @@ mask::
    >>> x = ma.array([1, 2, 3], mask=[0, 0, 1])
    >>> x.mask = True
    >>> x
-   masked_array(data = [-- -- --],
-                mask = [ True  True  True],
-          fill_value = 999999)
+   masked_array(data=[--, --, --],
+                mask=[ True,  True,  True],
+          fill_value=999999,
+               dtype=int64)
 
 Finally, specific entries can be masked and/or unmasked by assigning to the
 mask a sequence of booleans::
@@ -273,9 +273,9 @@ mask a sequence of booleans::
    >>> x = ma.array([1, 2, 3])
    >>> x.mask = [0, 1, 0]
    >>> x
-   masked_array(data = [1 -- 3],
-                mask = [False  True False],
-          fill_value = 999999)
+   masked_array(data=[1, --, 3],
+                mask=[False,  True, False],
+          fill_value=999999)
 
 Unmasking an entry
 ~~~~~~~~~~~~~~~~~~
@@ -285,14 +285,14 @@ new valid values to them::
 
    >>> x = ma.array([1, 2, 3], mask=[0, 0, 1])
    >>> x
-   masked_array(data = [1 2 --],
-                mask = [False False  True],
-          fill_value = 999999)
+   masked_array(data=[1, 2, --],
+                mask=[False, False,  True],
+          fill_value=999999)
    >>> x[-1] = 5
    >>> x
-   masked_array(data = [1 2 5],
-                mask = [False False False],
-          fill_value = 999999)
+   masked_array(data=[1, 2, 5],
+                mask=[False, False, False],
+          fill_value=999999)
 
 .. note::
    Unmasking an entry by direct assignment will silently fail if the masked
@@ -304,21 +304,27 @@ new valid values to them::
 
       >>> x = ma.array([1, 2, 3], mask=[0, 0, 1], hard_mask=True)
       >>> x
-      masked_array(data = [1 2 --],
-                   mask = [False False  True],
-             fill_value = 999999)
+      masked_array(data=[1, 2, --],
+                   mask=[False, False,  True],
+             fill_value=999999)
       >>> x[-1] = 5
       >>> x
-      masked_array(data = [1 2 --],
-                   mask = [False False  True],
-             fill_value = 999999)
+      masked_array(data=[1, 2, --],
+                   mask=[False, False,  True],
+             fill_value=999999)
       >>> x.soften_mask()
+      masked_array(data=[1, 2, --],
+                   mask=[False, False,  True],
+             fill_value=999999)
       >>> x[-1] = 5
       >>> x
-      masked_array(data = [1 2 5],
-                   mask = [False False  False],
-             fill_value = 999999)
+      masked_array(data=[1, 2, 5],
+                   mask=[False, False, False],
+             fill_value=999999)
       >>> x.harden_mask()
+      masked_array(data=[1, 2, 5],
+                   mask=[False, False, False],
+             fill_value=999999)
 
 
 To unmask all masked entries of a masked array (provided the mask isn't a hard
@@ -327,15 +333,14 @@ mask::
 
    >>> x = ma.array([1, 2, 3], mask=[0, 0, 1])
    >>> x
-   masked_array(data = [1 2 --],
-                mask = [False False  True],
-          fill_value = 999999)
+   masked_array(data=[1, 2, --],
+                mask=[False, False,  True],
+          fill_value=999999)
    >>> x.mask = ma.nomask
    >>> x
-   masked_array(data = [1 2 3],
-                mask = [False False False],
-          fill_value = 999999)
-
+   masked_array(data=[1, 2, 3],
+                mask=[False, False, False],
+          fill_value=999999)
 
 
 Indexing and slicing
@@ -353,9 +358,7 @@ the mask is ``True``)::
    >>> x[0]
    1
    >>> x[-1]
-   masked_array(data = --,
-                mask = True,
-          fill_value = 1e+20)
+   masked
    >>> x[-1] is ma.masked
    True
 
@@ -370,10 +373,7 @@ is masked.
    >>> y[0]
    (1, 2)
    >>> y[-1]
-   masked_array(data = (3, --),
-                mask = (False, True),
-          fill_value = (999999, 999999),
-               dtype = [('a', '<i4'), ('b', '<i4')])
+   (3, --)
 
 
 When accessing a slice, the output is a masked array whose
@@ -385,20 +385,19 @@ required to ensure propagation of any modification of the mask to the original.
    >>> x = ma.array([1, 2, 3, 4, 5], mask=[0, 1, 0, 0, 1])
    >>> mx = x[:3]
    >>> mx
-   masked_array(data = [1 -- 3],
-                mask = [False  True False],
-          fill_value = 999999)
+   masked_array(data=[1, --, 3],
+                mask=[False,  True, False],
+          fill_value=999999)
    >>> mx[1] = -1
    >>> mx
-   masked_array(data = [1 -1 3],
-                mask = [False False False],
-          fill_value = 999999)
+   masked_array(data=[1, -1, 3],
+                mask=[False, False, False],
+          fill_value=999999)
    >>> x.mask
-   array([False,  True, False, False,  True])
+   array([False, False, False, False,  True])
    >>> x.data
    array([ 1, -1,  3,  4,  5])
 
-
 Accessing a field of a masked array with structured datatype returns a
 :class:`MaskedArray`.
 
@@ -421,9 +420,9 @@ ufuncs. Unary and binary functions that have a validity domain (such as
 constant whenever the input is masked or falls outside the validity domain::
 
    >>> ma.log([-1, 0, 1, 2])
-   masked_array(data = [-- -- 0.0 0.69314718056],
-                mask = [ True  True False False],
-          fill_value = 1e+20)
+   masked_array(data=[--, --, 0.0, 0.6931471805599453],
+                mask=[ True,  True, False, False],
+          fill_value=1e+20)
 
 Masked arrays also support standard numpy ufuncs. The output is then a masked
 array. The result of a unary ufunc is masked wherever the input is masked. The
@@ -435,10 +434,9 @@ input fall outside the validity domain::
 
    >>> x = ma.array([-1, 1, 0, 2, 3], mask=[0, 0, 0, 0, 1])
    >>> np.log(x)
-   masked_array(data = [-- -- 0.0 0.69314718056 --],
-                mask = [ True  True False False  True],
-          fill_value = 1e+20)
-
+   masked_array(data=[--, 0.0, --, 0.6931471805599453, --],
+                mask=[ True, False,  True, False,  True],
+          fill_value=1e+20)
 
 
 Examples
@@ -454,11 +452,11 @@ of anomalies (deviations from the average)::
    >>> import numpy.ma as ma
    >>> x = [0.,1.,-9999.,3.,4.]
    >>> mx = ma.masked_values (x, -9999.)
-   >>> print mx.mean()
+   >>> print(mx.mean())
    2.0
-   >>> print mx - mx.mean()
+   >>> print(mx - mx.mean())
    [-2.0 -1.0 -- 1.0 2.0]
-   >>> print mx.anom()
+   >>> print(mx.anom())
    [-2.0 -1.0 -- 1.0 2.0]
 
 
@@ -468,7 +466,7 @@ Filling in the missing data
 Suppose now that we wish to print that same data, but with the missing values
 replaced by the average value.
 
-   >>> print mx.filled(mx.mean())
+   >>> print(mx.filled(mx.mean()))
    [ 0.  1.  2.  3.  4.]
 
 
@@ -478,10 +476,10 @@ Numerical operations
 Numerical operations can be easily performed without worrying about missing
 values, dividing by zero, square roots of negative numbers, etc.::
 
-   >>> import numpy as np, numpy.ma as ma
+   >>> import numpy.ma as ma
    >>> x = ma.array([1., -1., 3., 4., 5., 6.], mask=[0,0,0,0,1,0])
    >>> y = ma.array([1., 2., 0., 4., 5., 6.], mask=[0,0,0,0,0,1])
-   >>> print np.sqrt(x/y)
+   >>> print(ma.sqrt(x/y))
    [1.0 -- -- 1.0 -- --]
 
 Four values of the output are invalid: the first one comes from taking the
@@ -492,8 +490,10 @@ the last two where the inputs were masked.
 Ignoring extreme values
 -----------------------
 
-Let's consider an array ``d`` of random floats between 0 and 1. We wish to
+Let's consider an array ``d`` of floats between 0 and 1. We wish to
 compute the average of the values of ``d`` while ignoring any data outside
-the range ``[0.1, 0.9]``::
+the range ``[0.2, 0.9]``::
 
-   >>> print ma.masked_outside(d, 0.1, 0.9).mean()
+   >>> d = np.linspace(0, 1, 20)
+   >>> print(d.mean() - ma.masked_outside(d, 0.2, 0.9).mean())
+   -0.05263157894736836
diff --git a/doc/source/reference/random/bit_generators/bitgenerators.rst b/doc/source/reference/random/bit_generators/bitgenerators.rst
deleted file mode 100644
index 1474f7dac..000000000
--- a/doc/source/reference/random/bit_generators/bitgenerators.rst
+++ /dev/null
@@ -1,11 +0,0 @@
-:orphan:
-
-BitGenerator
-------------
-
-.. currentmodule:: numpy.random.bit_generator
-
-.. autosummary::
-   :toctree: generated/
-
-    BitGenerator
diff --git a/doc/source/reference/random/bit_generators/index.rst b/doc/source/reference/random/bit_generators/index.rst
index 35d9e5d09..94d3d8a3c 100644
--- a/doc/source/reference/random/bit_generators/index.rst
+++ b/doc/source/reference/random/bit_generators/index.rst
@@ -1,5 +1,3 @@
-.. _bit_generator:
-
 .. currentmodule:: numpy.random
 
 Bit Generators
@@ -35,14 +33,18 @@ The included BitGenerators are:
 .. _`Random123`: https://www.deshawresearch.com/resources_random123.html
 .. _`SFC author's page`: http://pracrand.sourceforge.net/RNG_engines.txt
 
+.. autosummary::
+    :toctree: generated/
+
+    BitGenerator
+
 .. toctree::
-   :maxdepth: 1
+    :maxdepth: 1
 
-   BitGenerator <bitgenerators>
-   MT19937 <mt19937>
-   PCG64 <pcg64>
-   Philox <philox>
-   SFC64 <sfc64>
+    MT19937 <mt19937>
+    PCG64 <pcg64>
+    Philox <philox>
+    SFC64 <sfc64>
 
 Seeding and Entropy
 -------------------
@@ -53,14 +55,14 @@ seed. All of the provided BitGenerators will take an arbitrary-sized
 non-negative integer, or a list of such integers, as a seed. BitGenerators
 need to take those inputs and process them into a high-quality internal state
 for the BitGenerator. All of the BitGenerators in numpy delegate that task to
-`~SeedSequence`, which uses hashing techniques to ensure that even low-quality
+`SeedSequence`, which uses hashing techniques to ensure that even low-quality
 seeds generate high-quality initial states.
 
 .. code-block:: python
 
-  from numpy.random import PCG64
+    from numpy.random import PCG64
 
-  bg = PCG64(12345678903141592653589793)
+    bg = PCG64(12345678903141592653589793)
 
 .. end_block
 
@@ -75,14 +77,14 @@ user, which is up to you.
 
 .. code-block:: python
 
-  from numpy.random import PCG64, SeedSequence
+    from numpy.random import PCG64, SeedSequence
 
-  # Get the user's seed somehow, maybe through `argparse`.
-  # If the user did not provide a seed, it should return `None`.
-  seed = get_user_seed()
-  ss = SeedSequence(seed)
-  print('seed = {}'.format(ss.entropy))
-  bg = PCG64(ss)
+    # Get the user's seed somehow, maybe through `argparse`.
+    # If the user did not provide a seed, it should return `None`.
+    seed = get_user_seed()
+    ss = SeedSequence(seed)
+    print('seed = {}'.format(ss.entropy))
+    bg = PCG64(ss)
 
 .. end_block
 
@@ -104,9 +106,6 @@ or using ``secrets.randbits(128)`` from the standard library are both
 convenient ways.
 
 .. autosummary::
-   :toctree: generated/
+    :toctree: generated/
 
     SeedSequence
-    bit_generator.ISeedSequence
-    bit_generator.ISpawnableSeedSequence
-    bit_generator.SeedlessSeedSequence
diff --git a/doc/source/reference/random/generator.rst b/doc/source/reference/random/generator.rst
index 068143270..a2cbb493a 100644
--- a/doc/source/reference/random/generator.rst
+++ b/doc/source/reference/random/generator.rst
@@ -62,6 +62,7 @@ Distributions
    ~numpy.random.Generator.lognormal
    ~numpy.random.Generator.logseries
    ~numpy.random.Generator.multinomial
+   ~numpy.random.Generator.multivariate_hypergeometric
    ~numpy.random.Generator.multivariate_normal
    ~numpy.random.Generator.negative_binomial
    ~numpy.random.Generator.noncentral_chisquare
diff --git a/doc/source/reference/random/index.rst b/doc/source/reference/random/index.rst
index b0283f3a7..9b19620d8 100644
--- a/doc/source/reference/random/index.rst
+++ b/doc/source/reference/random/index.rst
@@ -123,7 +123,7 @@ The `Generator` is the user-facing object that is nearly identical to
   rg.random()
 
 One can also instantiate `Generator` directly with a `BitGenerator` instance.
-To use the older `~mt19937.MT19937` algorithm, one can instantiate it directly
+To use the older `MT19937` algorithm, one can instantiate it directly
 and pass it to `Generator`.
 
 .. code-block:: python
diff --git a/doc/source/reference/random/new-or-different.rst b/doc/source/reference/random/new-or-different.rst
index c8815f98f..b3bddb443 100644
--- a/doc/source/reference/random/new-or-different.rst
+++ b/doc/source/reference/random/new-or-different.rst
@@ -10,9 +10,10 @@ What's New or Different
   The Box-Muller method used to produce NumPy's normals is no longer available
   in `Generator`.  It is not possible to reproduce the exact random
   values using ``Generator`` for the normal distribution or any other
-  distribution that relies on the normal such as the `gamma` or
-  `standard_t`. If you require bitwise backward compatible
-  streams, use `RandomState`.
+  distribution that relies on the normal such as the `Generator.gamma` or
+  `Generator.standard_t`. If you require bitwise backward compatible
+  streams, use `RandomState`, i.e., `RandomState.gamma` or
+  `RandomState.standard_t`.
 
 Quick comparison of legacy `mtrand <legacy>`_ to the new `Generator`
 
@@ -20,9 +21,9 @@ Quick comparison of legacy `mtrand <legacy>`_ to the new `Generator`
 Feature            Older Equivalent     Notes
 ------------------ -------------------- -------------
 `~.Generator`      `~.RandomState`      ``Generator`` requires a stream
-                                        source, called a `BitGenerator
-                                        <bit_generators>` A number of these
-                                        are provided.  ``RandomState`` uses
+                                        source, called a `BitGenerator`
+                                        A number of these are provided.
+                                        ``RandomState`` uses
                                         the Mersenne Twister `~.MT19937` by
                                         default, but can also be instantiated
                                         with any BitGenerator.
diff --git a/doc/source/reference/random/parallel.rst b/doc/source/reference/random/parallel.rst
index 2f79f22d8..721584014 100644
--- a/doc/source/reference/random/parallel.rst
+++ b/doc/source/reference/random/parallel.rst
@@ -18,10 +18,10 @@ a `~BitGenerator`. It uses hashing techniques to ensure that low-quality seeds
 are turned into high quality initial states (at least, with very high
 probability).
 
-For example, `~mt19937.MT19937` has a state consisting of 624
+For example, `MT19937` has a state consisting of 624
 `uint32` integers. A naive way to take a 32-bit integer seed would be to just set
 the last element of the state to the 32-bit seed and leave the rest 0s. This is
-a valid state for `~mt19937.MT19937`, but not a good one. The Mersenne Twister
+a valid state for `MT19937`, but not a good one. The Mersenne Twister
 algorithm `suffers if there are too many 0s`_. Similarly, two adjacent 32-bit
 integer seeds (i.e. ``12345`` and ``12346``) would produce very similar
 streams.
@@ -91,15 +91,15 @@ territory ([2]_).
 .. [2] In this calculation, we can ignore the amount of numbers drawn from each
        stream. Each of the PRNGs we provide has some extra protection built in
        that avoids overlaps if the `~SeedSequence` pools differ in the
-       slightest bit. `~pcg64.PCG64` has :math:`2^{127}` separate cycles
+       slightest bit. `PCG64` has :math:`2^{127}` separate cycles
        determined by the seed in addition to the position in the
        :math:`2^{128}` long period for each cycle, so one has to both get on or
        near the same cycle *and* seed a nearby position in the cycle.
-       `~philox.Philox` has completely independent cycles determined by the seed.
-       `~sfc64.SFC64` incorporates a 64-bit counter so every unique seed is at
+       `Philox` has completely independent cycles determined by the seed.
+       `SFC64` incorporates a 64-bit counter so every unique seed is at
        least :math:`2^{64}` iterations away from any other seed. And
-       finally, `~mt19937.MT19937` has just an unimaginably huge period. Getting
-       a collision internal to `~SeedSequence` is the way a failure would be
+       finally, `MT19937` has just an unimaginably huge period. Getting
+       a collision internal to `SeedSequence` is the way a failure would be
        observed.
 
 .. _`implements an algorithm`: http://www.pcg-random.org/posts/developing-a-seed_seq-alternative.html
@@ -113,10 +113,10 @@ territory ([2]_).
 Independent Streams
 -------------------
 
-:class:`~philox.Philox` is a counter-based RNG based which generates values by
+`Philox` is a counter-based RNG based which generates values by
 encrypting an incrementing counter using weak cryptographic primitives. The
 seed determines the key that is used for the encryption. Unique keys create
-unique, independent streams. :class:`~philox.Philox` lets you bypass the
+unique, independent streams. `Philox` lets you bypass the
 seeding algorithm to directly set the 128-bit key. Similar, but different, keys
 will still create independent streams.
 
diff --git a/doc/source/reference/random/performance.rst b/doc/source/reference/random/performance.rst
index 2d5fca496..d70dd064a 100644
--- a/doc/source/reference/random/performance.rst
+++ b/doc/source/reference/random/performance.rst
@@ -5,21 +5,21 @@ Performance
 
 Recommendation
 **************
-The recommended generator for general use is :class:`~pcg64.PCG64`. It is
+The recommended generator for general use is `PCG64`. It is
 statistically high quality, full-featured, and fast on most platforms, but
 somewhat slow when compiled for 32-bit processes.
 
-:class:`~philox.Philox` is fairly slow, but its statistical properties have
+`Philox` is fairly slow, but its statistical properties have
 very high quality, and it is easy to get assuredly-independent stream by using
 unique keys. If that is the style you wish to use for parallel streams, or you
 are porting from another system that uses that style, then
-:class:`~philox.Philox` is your choice.
+`Philox` is your choice.
 
-:class:`~sfc64.SFC64` is statistically high quality and very fast. However, it
+`SFC64` is statistically high quality and very fast. However, it
 lacks jumpability. If you are not using that capability and want lots of speed,
 even on 32-bit processes, this is your choice.
 
-:class:`~mt19937.MT19937` `fails some statistical tests`_ and is not especially
+`MT19937` `fails some statistical tests`_ and is not especially
 fast compared to modern PRNGs. For these reasons, we mostly do not recommend
 using it on its own, only through the legacy `~.RandomState` for
 reproducing old results. That said, it has a very long history as a default in
@@ -31,20 +31,20 @@ Timings
 *******
 
 The timings below are the time in ns to produce 1 random value from a
-specific distribution.  The original :class:`~mt19937.MT19937` generator is
+specific distribution.  The original `MT19937` generator is
 much slower since it requires 2 32-bit values to equal the output of the
 faster generators.
 
 Integer performance has a similar ordering.
 
 The pattern is similar for other, more complex generators. The normal
-performance of the legacy :class:`~.RandomState` generator is much
+performance of the legacy `RandomState` generator is much
 lower than the other since it uses the Box-Muller transformation rather
 than the Ziggurat generator. The performance gap for Exponentials is also
 large due to the cost of computing the log function to invert the CDF.
 The column labeled MT19973 is used the same 32-bit generator as
-:class:`~.RandomState` but produces random values using
-:class:`~Generator`.
+`RandomState` but produces random values using
+`Generator`.
 
 .. csv-table::
     :header: ,MT19937,PCG64,Philox,SFC64,RandomState
@@ -61,7 +61,7 @@ The column labeled MT19973 is used the same 32-bit generator as
     Poissons,67.6,52.4,69.2,46.4,78.1
 
 The next table presents the performance in percentage relative to values
-generated by the legacy generator, `RandomState(MT19937())`. The overall
+generated by the legacy generator, ``RandomState(MT19937())``. The overall
 performance was computed using a geometric mean.
 
 .. csv-table::
diff --git a/doc/source/reference/routines.array-manipulation.rst b/doc/source/reference/routines.array-manipulation.rst
index cc93d1029..bf43232ef 100644
--- a/doc/source/reference/routines.array-manipulation.rst
+++ b/doc/source/reference/routines.array-manipulation.rst
@@ -9,6 +9,7 @@ Basic operations
    :toctree: generated/
 
     copyto
+    shape
 
 Changing array shape
 ====================
diff --git a/doc/source/reference/ufuncs.rst b/doc/source/reference/ufuncs.rst
index 3a3b67632..0416d6efc 100644
--- a/doc/source/reference/ufuncs.rst
+++ b/doc/source/reference/ufuncs.rst
@@ -100,7 +100,7 @@ is true:
    - *d* acts like a (5,6) array where the single value is repeated.
 
 
-.. _ufuncs.output-type:
+.. _ufuncs-output-type:
 
 Output type determination
 =========================
@@ -320,7 +320,7 @@ advanced usage and will not typically be used.
     .. versionadded:: 1.10
 
     The 'out' keyword argument is expected to be a tuple with one entry per
-    output (which can be `None` for arrays to be allocated by the ufunc).
+    output (which can be None for arrays to be allocated by the ufunc).
     For ufuncs with a single output, passing a single array (instead of a
     tuple holding a single array) is also valid.
 
@@ -494,7 +494,7 @@ keyword, and an *out* keyword, and the arrays must all have dimension >= 1.
 The *axis* keyword specifies the axis of the array over which the reduction
 will take place (with negative values counting backwards). Generally, it is an
 integer, though for :meth:`ufunc.reduce`, it can also be a tuple of `int` to
-reduce over several axes at once, or `None`, to reduce over all axes.
+reduce over several axes at once, or None, to reduce over all axes.
 The *dtype* keyword allows you to manage a very common problem that arises
 when naively using :meth:`ufunc.reduce`. Sometimes you may
 have an array of a certain data type and wish to add up all of its
diff --git a/doc/source/release.rst b/doc/source/release.rst
index fb4e2b14d..3bfe81243 100644
--- a/doc/source/release.rst
+++ b/doc/source/release.rst
@@ -6,7 +6,8 @@ Release Notes
     :maxdepth: 3
 
     1.18.0 <release/1.18.0-notes>
-    1.17.1 <release/1.17.2-notes>
+    1.17.3 <release/1.17.3-notes>
+    1.17.2 <release/1.17.2-notes>
     1.17.1 <release/1.17.1-notes>
     1.17.0 <release/1.17.0-notes>
     1.16.5 <release/1.16.5-notes>
diff --git a/doc/source/release/1.17.0-notes.rst b/doc/source/release/1.17.0-notes.rst
index 8d69e36d9..a0e737982 100644
--- a/doc/source/release/1.17.0-notes.rst
+++ b/doc/source/release/1.17.0-notes.rst
@@ -239,7 +239,7 @@ New extensible `numpy.random` module with selectable random number generators
 -----------------------------------------------------------------------------
 A new extensible `numpy.random` module along with four selectable random number
 generators and improved seeding designed for use in parallel processes has been
-added. The currently available :ref:`Bit Generators <bit_generator>` are
+added. The currently available `Bit Generators` are
 `~mt19937.MT19937`, `~pcg64.PCG64`, `~philox.Philox`, and `~sfc64.SFC64`.
 ``PCG64`` is the new default while ``MT19937`` is retained for backwards
 compatibility. Note that the legacy random module is unchanged and is now
diff --git a/doc/source/release/1.17.3-notes.rst b/doc/source/release/1.17.3-notes.rst
new file mode 100644
index 000000000..e33ca1917
--- /dev/null
+++ b/doc/source/release/1.17.3-notes.rst
@@ -0,0 +1,59 @@
+.. currentmodule:: numpy
+
+==========================
+NumPy 1.17.3 Release Notes
+==========================
+
+This release contains fixes for bugs reported against NumPy 1.17.2 along with a
+some documentation improvements. The Python versions supported in this release
+are 3.5-3.8.
+
+Downstream developers should use Cython >= 0.29.13 for Python 3.8 support and
+OpenBLAS >= 3.7 to avoid errors on the Skylake architecture.
+
+
+Highlights
+==========
+
+- Wheels for Python 3.8
+- Boolean ``matmul`` fixed to use booleans instead of integers.
+
+
+Compatibility notes
+===================
+
+- The seldom used ``PyArray_DescrCheck`` macro has been changed/fixed.
+
+
+Contributors
+============
+
+A total of 7 people contributed to this release.  People with a "+" by their
+names contributed a patch for the first time.
+
+* Allan Haldane
+* Charles Harris
+* Kevin Sheppard
+* Matti Picus
+* Ralf Gommers
+* Sebastian Berg
+* Warren Weckesser
+
+
+Pull requests merged
+====================
+
+A total of 12 pull requests were merged for this release.
+
+* `#14456 <https://github.com/numpy/numpy/pull/14456>`__: MAINT: clean up pocketfft modules inside numpy.fft namespace.
+* `#14463 <https://github.com/numpy/numpy/pull/14463>`__: BUG: random.hypergeometic assumes npy_long is npy_int64, hung...
+* `#14502 <https://github.com/numpy/numpy/pull/14502>`__: BUG: random: Revert gh-14458 and refix gh-14557.
+* `#14504 <https://github.com/numpy/numpy/pull/14504>`__: BUG: add a specialized loop for boolean matmul.
+* `#14506 <https://github.com/numpy/numpy/pull/14506>`__: MAINT: Update pytest version for Python 3.8
+* `#14512 <https://github.com/numpy/numpy/pull/14512>`__: DOC: random: fix doc linking, was referencing private submodules.
+* `#14513 <https://github.com/numpy/numpy/pull/14513>`__: BUG,MAINT: Some fixes and minor cleanup based on clang analysis
+* `#14515 <https://github.com/numpy/numpy/pull/14515>`__: BUG: Fix randint when range is 2**32
+* `#14519 <https://github.com/numpy/numpy/pull/14519>`__: MAINT: remove the entropy c-extension module
+* `#14563 <https://github.com/numpy/numpy/pull/14563>`__: DOC: remove note about Pocketfft license file (non-existing here).
+* `#14578 <https://github.com/numpy/numpy/pull/14578>`__: BUG: random: Create a legacy implementation of random.binomial.
+* `#14687 <https://github.com/numpy/numpy/pull/14687>`__: BUG: properly define PyArray_DescrCheck
diff --git a/doc/source/user/c-info.beyond-basics.rst b/doc/source/user/c-info.beyond-basics.rst
index dd25861b4..62e8139fe 100644
--- a/doc/source/user/c-info.beyond-basics.rst
+++ b/doc/source/user/c-info.beyond-basics.rst
@@ -217,14 +217,13 @@ type will behave much like a regular data-type except ufuncs must have
 1-d loops registered to handle it separately. Also checking for
 whether or not other data-types can be cast "safely" to and from this
 new type or not will always return "can cast" unless you also register
-which types your new data-type can be cast to and from. Adding
-data-types is one of the less well-tested areas for NumPy 1.0, so
-there may be bugs remaining in the approach. Only add a new data-type
-if you can't do what you want to do using the OBJECT or VOID
-data-types that are already available. As an example of what I
-consider a useful application of the ability to add data-types is the
-possibility of adding a data-type of arbitrary precision floats to
-NumPy.
+which types your new data-type can be cast to and from.
+
+The NumPy source code includes an example of a custom data-type as part
+of its test suite. The file ``_rational_tests.c.src`` in the source code
+directory  ``numpy/numpy/core/src/umath/`` contains an implementation of
+a data-type that represents a rational number as the ratio of two 32 bit
+integers.
 
 .. index::
    pair: dtype; adding new
diff --git a/doc/source/user/quickstart.rst b/doc/source/user/quickstart.rst
index a23a7b2c7..6211d0c69 100644
--- a/doc/source/user/quickstart.rst
+++ b/doc/source/user/quickstart.rst
@@ -206,8 +206,8 @@ of elements that we want, instead of the step::
     `empty_like`,
     `arange`,
     `linspace`,
-    `numpy.random.mtrand.RandomState.rand`,
-    `numpy.random.mtrand.RandomState.randn`,
+    `numpy.random.RandomState.rand`,
+    `numpy.random.RandomState.randn`,
     `fromfunction`,
     `fromfile`
 
diff --git a/numpy/core/_add_newdocs.py b/numpy/core/_add_newdocs.py
index ecff99587..aeff2427e 100644
--- a/numpy/core/_add_newdocs.py
+++ b/numpy/core/_add_newdocs.py
@@ -1331,9 +1331,9 @@ add_newdoc('numpy.core.multiarray', 'arange',
 
     See Also
     --------
-    linspace : Evenly spaced numbers with careful handling of endpoints.
-    ogrid: Arrays of evenly spaced numbers in N-dimensions.
-    mgrid: Grid-shaped arrays of evenly spaced numbers in N-dimensions.
+    numpy.linspace : Evenly spaced numbers with careful handling of endpoints.
+    numpy.ogrid: Arrays of evenly spaced numbers in N-dimensions.
+    numpy.mgrid: Grid-shaped arrays of evenly spaced numbers in N-dimensions.
 
     Examples
     --------
@@ -3711,10 +3711,10 @@ add_newdoc('numpy.core.multiarray', 'ndarray', ('sort',
     See Also
     --------
     numpy.sort : Return a sorted copy of an array.
-    argsort : Indirect sort.
-    lexsort : Indirect stable sort on multiple keys.
-    searchsorted : Find elements in sorted array.
-    partition: Partial sort.
+    numpy.argsort : Indirect sort.
+    numpy.lexsort : Indirect stable sort on multiple keys.
+    numpy.searchsorted : Find elements in sorted array.
+    numpy.partition: Partial sort.
 
     Notes
     -----
@@ -4502,7 +4502,7 @@ add_newdoc('numpy.core', 'ufunc',
         Alternate array object(s) in which to put the result; if provided, it
         must have a shape that the inputs broadcast to. A tuple of arrays
         (possible only as a keyword argument) must have length equal to the
-        number of outputs; use `None` for uninitialized outputs to be
+        number of outputs; use None for uninitialized outputs to be
         allocated by the ufunc.
     where : array_like, optional
         This condition is broadcast over the input. At locations where the
@@ -4696,7 +4696,7 @@ add_newdoc('numpy.core', 'ufunc', ('signature',
     -----
     Generalized ufuncs are used internally in many linalg functions, and in
     the testing suite; the examples below are taken from these.
-    For ufuncs that operate on scalars, the signature is `None`, which is
+    For ufuncs that operate on scalars, the signature is None, which is
     equivalent to '()' for every argument.
 
     Examples
@@ -4747,7 +4747,7 @@ add_newdoc('numpy.core', 'ufunc', ('reduce',
 
         .. versionadded:: 1.7.0
 
-        If this is `None`, a reduction is performed over all the axes.
+        If this is None, a reduction is performed over all the axes.
         If this is a tuple of ints, a reduction is performed on multiple
         axes, instead of a single axis or all the axes as before.
 
@@ -4760,7 +4760,7 @@ add_newdoc('numpy.core', 'ufunc', ('reduce',
         to the data-type of the output array if this is provided, or
         the data-type of the input array if no output array is provided.
     out : ndarray, None, or tuple of ndarray and None, optional
-        A location into which the result is stored. If not provided or `None`,
+        A location into which the result is stored. If not provided or None,
         a freshly-allocated array is returned. For consistency with
         ``ufunc.__call__``, if given as a keyword, this may be wrapped in a
         1-element tuple.
@@ -4877,7 +4877,7 @@ add_newdoc('numpy.core', 'ufunc', ('accumulate',
         to the data-type of the output array if such is provided, or the
         the data-type of the input array if no output array is provided.
     out : ndarray, None, or tuple of ndarray and None, optional
-        A location into which the result is stored. If not provided or `None`,
+        A location into which the result is stored. If not provided or None,
         a freshly-allocated array is returned. For consistency with
         ``ufunc.__call__``, if given as a keyword, this may be wrapped in a
         1-element tuple.
@@ -4959,7 +4959,7 @@ add_newdoc('numpy.core', 'ufunc', ('reduceat',
         to the data type of the output array if this is provided, or
         the data type of the input array if no output array is provided.
     out : ndarray, None, or tuple of ndarray and None, optional
-        A location into which the result is stored. If not provided or `None`,
+        A location into which the result is stored. If not provided or None,
         a freshly-allocated array is returned. For consistency with
         ``ufunc.__call__``, if given as a keyword, this may be wrapped in a
         1-element tuple.
@@ -5332,7 +5332,8 @@ add_newdoc('numpy.core.multiarray', 'dtype', ('descr',
     `__array_interface__` attribute.
 
     Warning: This attribute exists specifically for `__array_interface__`,
-    and is not a datatype description compatible with `np.dtype`.
+    and passing it directly to `np.dtype` will not accurately reconstruct
+    some dtypes (e.g., scalar and subarray dtypes).
 
     Examples
     --------
diff --git a/numpy/core/_internal.py b/numpy/core/_internal.py
index 5fd643505..05e401e0b 100644
--- a/numpy/core/_internal.py
+++ b/numpy/core/_internal.py
@@ -313,7 +313,7 @@ class _ctypes(object):
         crashing. User Beware! The value of this attribute is exactly the same
         as ``self._array_interface_['data'][0]``.
 
-        Note that unlike `data_as`, a reference will not be kept to the array:
+        Note that unlike ``data_as``, a reference will not be kept to the array:
         code like ``ctypes.c_void_p((a + b).ctypes.data)`` will result in a
         pointer to a deallocated array, and should be spelt
         ``(a + b).ctypes.data_as(ctypes.c_void_p)``
diff --git a/numpy/core/arrayprint.py b/numpy/core/arrayprint.py
index 8a7626d9d..401018015 100644
--- a/numpy/core/arrayprint.py
+++ b/numpy/core/arrayprint.py
@@ -111,7 +111,7 @@ def set_printoptions(precision=None, threshold=None, edgeitems=None,
     ----------
     precision : int or None, optional
         Number of digits of precision for floating point output (default 8).
-        May be `None` if `floatmode` is not `fixed`, to print as many digits as
+        May be None if `floatmode` is not `fixed`, to print as many digits as
         necessary to uniquely specify the value.
     threshold : int, optional
         Total number of array elements which trigger summarization
diff --git a/numpy/core/code_generators/generate_umath.py b/numpy/core/code_generators/generate_umath.py
index 0d3bbffe9..e0b6a654c 100644
--- a/numpy/core/code_generators/generate_umath.py
+++ b/numpy/core/code_generators/generate_umath.py
@@ -287,7 +287,7 @@ defdict = {
     Ufunc(2, 1, None, # Zero is only a unit to the right, not the left
           docstrings.get('numpy.core.umath.subtract'),
           'PyUFunc_SubtractionTypeResolver',
-          TD(notimes_or_obj, simd=[('avx2', ints)]),
+          TD(ints + inexact, simd=[('avx2', ints)]),
           [TypeDescription('M', FullTypeDescr, 'Mm', 'M'),
            TypeDescription('m', FullTypeDescr, 'mm', 'm'),
            TypeDescription('M', FullTypeDescr, 'MM', 'm'),
@@ -409,7 +409,7 @@ defdict = {
     Ufunc(1, 1, None,
           docstrings.get('numpy.core.umath.negative'),
           'PyUFunc_NegativeTypeResolver',
-          TD(bints+flts+timedeltaonly, simd=[('avx2', ints)]),
+          TD(ints+flts+timedeltaonly, simd=[('avx2', ints)]),
           TD(cmplx, f='neg'),
           TD(O, f='PyNumber_Negative'),
           ),
diff --git a/numpy/core/code_generators/ufunc_docstrings.py b/numpy/core/code_generators/ufunc_docstrings.py
index 1ac477b54..4dec73505 100644
--- a/numpy/core/code_generators/ufunc_docstrings.py
+++ b/numpy/core/code_generators/ufunc_docstrings.py
@@ -22,7 +22,7 @@ subst = {
     'PARAMS': textwrap.dedent("""
         out : ndarray, None, or tuple of ndarray and None, optional
             A location into which the result is stored. If provided, it must have
-            a shape that the inputs broadcast to. If not provided or `None`,
+            a shape that the inputs broadcast to. If not provided or None,
             a freshly-allocated array is returned. A tuple (possible only as a
             keyword argument) must have length equal to the number of outputs.
         where : array_like, optional
@@ -2596,7 +2596,7 @@ add_newdoc('numpy.core.umath', 'matmul',
     out : ndarray, optional
         A location into which the result is stored. If provided, it must have
         a shape that matches the signature `(n,k),(k,m)->(n,m)`. If not
-        provided or `None`, a freshly-allocated array is returned.
+        provided or None, a freshly-allocated array is returned.
     **kwargs
         For other keyword-only arguments, see the
         :ref:`ufunc docs <ufuncs.kwargs>`.
diff --git a/numpy/core/defchararray.py b/numpy/core/defchararray.py
index a941c5b81..2d89d6fe0 100644
--- a/numpy/core/defchararray.py
+++ b/numpy/core/defchararray.py
@@ -82,7 +82,7 @@ def _clean_args(*args):
 
     Many of the Python string operations that have optional arguments
     do not use 'None' to indicate a default value.  In these cases,
-    we need to remove all `None` arguments, and those following them.
+    we need to remove all None arguments, and those following them.
     """
     newargs = []
     for chk in args:
@@ -1333,7 +1333,7 @@ def rsplit(a, sep=None, maxsplit=None):
     a : array_like of str or unicode
 
     sep : str or unicode, optional
-        If `sep` is not specified or `None`, any whitespace string
+        If `sep` is not specified or None, any whitespace string
         is a separator.
     maxsplit : int, optional
         If `maxsplit` is given, at most `maxsplit` splits are done,
@@ -1417,7 +1417,7 @@ def split(a, sep=None, maxsplit=None):
     a : array_like of str or unicode
 
     sep : str or unicode, optional
-       If `sep` is not specified or `None`, any whitespace string is a
+       If `sep` is not specified or None, any whitespace string is a
        separator.
 
     maxsplit : int, optional
@@ -2659,7 +2659,7 @@ def array(obj, itemsize=None, copy=True, unicode=None, order=None):
     unicode : bool, optional
         When true, the resulting `chararray` can contain Unicode
         characters, when false only 8-bit characters.  If unicode is
-        `None` and `obj` is one of the following:
+        None and `obj` is one of the following:
 
           - a `chararray`,
           - an ndarray of type `str` or `unicode`
@@ -2799,7 +2799,7 @@ def asarray(obj, itemsize=None, unicode=None, order=None):
     unicode : bool, optional
         When true, the resulting `chararray` can contain Unicode
         characters, when false only 8-bit characters.  If unicode is
-        `None` and `obj` is one of the following:
+        None and `obj` is one of the following:
 
           - a `chararray`,
           - an ndarray of type `str` or 'unicode`
diff --git a/numpy/core/fromnumeric.py b/numpy/core/fromnumeric.py
index 6c0b9cde9..5f7716455 100644
--- a/numpy/core/fromnumeric.py
+++ b/numpy/core/fromnumeric.py
@@ -1409,7 +1409,7 @@ def squeeze(a, axis=None):
     Raises
     ------
     ValueError
-        If `axis` is not `None`, and an axis being squeezed is not of length 1
+        If `axis` is not None, and an axis being squeezed is not of length 1
 
     See Also
     --------
@@ -1945,7 +1945,7 @@ def compress(condition, a, axis=None, out=None):
     take, choose, diag, diagonal, select
     ndarray.compress : Equivalent method in ndarray
     np.extract: Equivalent method when working on 1-D arrays
-    numpy.doc.ufuncs : Section "Output arguments"
+    ufuncs-output-type
 
     Examples
     --------
@@ -1995,14 +1995,14 @@ def clip(a, a_min, a_max, out=None, **kwargs):
     ----------
     a : array_like
         Array containing elements to clip.
-    a_min : scalar or array_like or `None`
-        Minimum value. If `None`, clipping is not performed on lower
+    a_min : scalar or array_like or None
+        Minimum value. If None, clipping is not performed on lower
         interval edge. Not more than one of `a_min` and `a_max` may be
-        `None`.
-    a_max : scalar or array_like or `None`
-        Maximum value. If `None`, clipping is not performed on upper
+        None.
+    a_max : scalar or array_like or None
+        Maximum value. If None, clipping is not performed on upper
         interval edge. Not more than one of `a_min` and `a_max` may be
-        `None`. If `a_min` or `a_max` are array_like, then the three
+        None. If `a_min` or `a_max` are array_like, then the three
         arrays will be broadcasted to match their shapes.
     out : ndarray, optional
         The results will be placed in this array. It may be the input
@@ -2023,7 +2023,7 @@ def clip(a, a_min, a_max, out=None, **kwargs):
 
     See Also
     --------
-    numpy.doc.ufuncs : Section "Output arguments"
+    ufuncs-output-type
 
     Examples
     --------
@@ -2206,7 +2206,7 @@ def any(a, axis=None, out=None, keepdims=np._NoValue):
         Input array or object that can be converted to an array.
     axis : None or int or tuple of ints, optional
         Axis or axes along which a logical OR reduction is performed.
-        The default (`axis` = `None`) is to perform a logical OR over all
+        The default (``axis=None``) is to perform a logical OR over all
         the dimensions of the input array. `axis` may be negative, in
         which case it counts from the last to the first axis.
 
@@ -2219,7 +2219,7 @@ def any(a, axis=None, out=None, keepdims=np._NoValue):
         the same shape as the expected output and its type is preserved
         (e.g., if it is of type float, then it will remain so, returning
         1.0 for True and 0.0 for False, regardless of the type of `a`).
-        See `doc.ufuncs` (Section "Output arguments") for details.
+        See `ufuncs-output-type` for more details.
 
     keepdims : bool, optional
         If this is set to True, the axes which are reduced are left
@@ -2292,7 +2292,7 @@ def all(a, axis=None, out=None, keepdims=np._NoValue):
         Input array or object that can be converted to an array.
     axis : None or int or tuple of ints, optional
         Axis or axes along which a logical AND reduction is performed.
-        The default (`axis` = `None`) is to perform a logical AND over all
+        The default (``axis=None``) is to perform a logical AND over all
         the dimensions of the input array. `axis` may be negative, in
         which case it counts from the last to the first axis.
 
@@ -2304,8 +2304,8 @@ def all(a, axis=None, out=None, keepdims=np._NoValue):
         Alternate output array in which to place the result.
         It must have the same shape as the expected output and its
         type is preserved (e.g., if ``dtype(out)`` is float, the result
-        will consist of 0.0's and 1.0's).  See `doc.ufuncs` (Section
-        "Output arguments") for more details.
+        will consist of 0.0's and 1.0's). See `ufuncs-output-type` for more
+        details.
 
     keepdims : bool, optional
         If this is set to True, the axes which are reduced are left
@@ -2383,8 +2383,8 @@ def cumsum(a, axis=None, dtype=None, out=None):
     out : ndarray, optional
         Alternative output array in which to place the result. It must
         have the same shape and buffer length as the expected output
-        but the type will be cast if necessary. See `doc.ufuncs`
-        (Section "Output arguments") for more details.
+        but the type will be cast if necessary. See `ufuncs-output-type` for
+        more details.
 
     Returns
     -------
@@ -2529,7 +2529,7 @@ def amax(a, axis=None, out=None, keepdims=np._NoValue, initial=np._NoValue,
     out : ndarray, optional
         Alternative output array in which to place the result.  Must
         be of the same shape and buffer length as the expected output.
-        See `doc.ufuncs` (Section "Output arguments") for more details.
+        See `ufuncs-output-type` for more details.
 
     keepdims : bool, optional
         If this is set to True, the axes which are reduced are left
@@ -2654,7 +2654,7 @@ def amin(a, axis=None, out=None, keepdims=np._NoValue, initial=np._NoValue,
     out : ndarray, optional
         Alternative output array in which to place the result.  Must
         be of the same shape and buffer length as the expected output.
-        See `doc.ufuncs` (Section "Output arguments") for more details.
+        See `ufuncs-output-type` for more details.
 
     keepdims : bool, optional
         If this is set to True, the axes which are reduced are left
@@ -2861,7 +2861,7 @@ def prod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue,
     See Also
     --------
     ndarray.prod : equivalent method
-    numpy.doc.ufuncs : Section "Output arguments"
+    ufuncs-output-type
 
     Notes
     -----
@@ -2957,7 +2957,7 @@ def cumprod(a, axis=None, dtype=None, out=None):
 
     See Also
     --------
-    numpy.doc.ufuncs : Section "Output arguments"
+    ufuncs-output-type
 
     Notes
     -----
@@ -3103,8 +3103,8 @@ def around(a, decimals=0, out=None):
     out : ndarray, optional
         Alternative output array in which to place the result. It must have
         the same shape as the expected output, but the type of the output
-        values will be cast if necessary. See `doc.ufuncs` (Section
-        "Output arguments") for details.
+        values will be cast if necessary. See `ufuncs-output-type` for more
+        details.
 
     Returns
     -------
@@ -3218,7 +3218,7 @@ def mean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
         Alternate output array in which to place the result.  The default
         is ``None``; if provided, it must have the same shape as the
         expected output, but the type will be cast if necessary.
-        See `doc.ufuncs` for details.
+        See `ufuncs-output-type` for more details.
 
     keepdims : bool, optional
         If this is set to True, the axes which are reduced are left
@@ -3353,7 +3353,7 @@ def std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
     See Also
     --------
     var, mean, nanmean, nanstd, nanvar
-    numpy.doc.ufuncs : Section "Output arguments"
+    ufuncs-output-type
 
     Notes
     -----
@@ -3478,7 +3478,7 @@ def var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
     See Also
     --------
     std, mean, nanmean, nanstd, nanvar
-    numpy.doc.ufuncs : Section "Output arguments"
+    ufuncs-output-type
 
     Notes
     -----
diff --git a/numpy/core/numeric.py b/numpy/core/numeric.py
index 750f69db8..1e011e2e7 100644
--- a/numpy/core/numeric.py
+++ b/numpy/core/numeric.py
@@ -292,7 +292,7 @@ def full(shape, fill_value, dtype=None, order='C'):
     fill_value : scalar
         Fill value.
     dtype : data-type, optional
-        The desired data-type for the array  The default, `None`, means
+        The desired data-type for the array  The default, None, means
          `np.array(fill_value).dtype`.
     order : {'C', 'F'}, optional
         Whether to store multidimensional data in C- or Fortran-contiguous
@@ -2098,9 +2098,9 @@ def allclose(a, b, rtol=1.e-5, atol=1.e-8, equal_nan=False):
     `atol` are added together to compare against the absolute difference
     between `a` and `b`.
 
-    If either array contains one or more NaNs, False is returned.
-    Infs are treated as equal if they are in the same place and of the same
-    sign in both arrays.
+    NaNs are treated as equal if they are in the same place and if
+    ``equal_nan=True``.  Infs are treated as equal if they are in the same
+    place and of the same sign in both arrays.
 
     Parameters
     ----------
@@ -2112,7 +2112,7 @@ def allclose(a, b, rtol=1.e-5, atol=1.e-8, equal_nan=False):
         The absolute tolerance parameter (see Notes).
     equal_nan : bool
         Whether to compare NaN's as equal.  If True, NaN's in `a` will be
-        considered equal to NaN's in `b`.
+        considered equal to NaN's in `b` in the output array.
 
         .. versionadded:: 1.10.0
 
diff --git a/numpy/core/setup.py b/numpy/core/setup.py
index 5f2f4a7b2..a33318472 100644
--- a/numpy/core/setup.py
+++ b/numpy/core/setup.py
@@ -655,6 +655,9 @@ def configuration(parent_package='',top_path=None):
         # compiler does not work).
         st = config_cmd.try_link('int main(void) { return 0;}')
         if not st:
+            # rerun the failing command in verbose mode
+            config_cmd.compiler.verbose = True
+            config_cmd.try_link('int main(void) { return 0;}')
             raise RuntimeError("Broken toolchain: cannot link a simple C program")
         mlibs = check_mathlib(config_cmd)
 
diff --git a/numpy/core/shape_base.py b/numpy/core/shape_base.py
index d7e769e62..369d956fb 100644
--- a/numpy/core/shape_base.py
+++ b/numpy/core/shape_base.py
@@ -472,7 +472,7 @@ def _block_check_depths_match(arrays, parent_index=[]):
     first_index : list of int
         The full index of an element from the bottom of the nesting in
         `arrays`. If any element at the bottom is an empty list, this will
-        refer to it, and the last index along the empty axis will be `None`.
+        refer to it, and the last index along the empty axis will be None.
     max_arr_ndim : int
         The maximum of the ndims of the arrays nested in `arrays`.
     final_size: int
diff --git a/numpy/core/src/common/binop_override.h b/numpy/core/src/common/binop_override.h
index 47df63e38..c5e7ab808 100644
--- a/numpy/core/src/common/binop_override.h
+++ b/numpy/core/src/common/binop_override.h
@@ -129,11 +129,14 @@ binop_should_defer(PyObject *self, PyObject *other, int inplace)
      * check whether __array_ufunc__ equals None.
      */
     attr = PyArray_LookupSpecial(other, "__array_ufunc__");
-    if (attr) {
+    if (attr != NULL) {
         defer = !inplace && (attr == Py_None);
         Py_DECREF(attr);
         return defer;
     }
+    else if (PyErr_Occurred()) {
+        PyErr_Clear(); /* TODO[gh-14801]: propagate crashes during attribute access? */
+    }
     /*
      * Otherwise, we need to check for the legacy __array_priority__. But if
      * other.__class__ is a subtype of self.__class__, then it's already had
diff --git a/numpy/core/src/common/get_attr_string.h b/numpy/core/src/common/get_attr_string.h
index d458d9550..d3401aea6 100644
--- a/numpy/core/src/common/get_attr_string.h
+++ b/numpy/core/src/common/get_attr_string.h
@@ -40,18 +40,14 @@ _is_basic_python_type(PyTypeObject *tp)
 }
 
 /*
- * Stripped down version of PyObject_GetAttrString,
- * avoids lookups for None, tuple, and List objects,
- * and doesn't create a PyErr since this code ignores it.
+ * Stripped down version of PyObject_GetAttrString(obj, name) that does not
+ * raise PyExc_AttributeError.
  *
- * This can be much faster then PyObject_GetAttrString where
- * exceptions are not used by caller.
+ * This allows it to avoid creating then discarding exception objects when
+ * performing lookups on objects without any attributes.
  *
- * 'obj' is the object to search for attribute.
- *
- * 'name' is the attribute to search for.
- *
- * Returns attribute value on success, NULL on failure.
+ * Returns attribute value on success, NULL without an exception set if
+ * there is no such attribute, and NULL with an exception on failure.
  */
 static NPY_INLINE PyObject *
 maybe_get_attr(PyObject *obj, char *name)
@@ -62,7 +58,7 @@ maybe_get_attr(PyObject *obj, char *name)
     /* Attribute referenced by (char *)name */
     if (tp->tp_getattr != NULL) {
         res = (*tp->tp_getattr)(obj, name);
-        if (res == NULL) {
+        if (res == NULL && PyErr_ExceptionMatches(PyExc_AttributeError)) {
             PyErr_Clear();
         }
     }
@@ -78,7 +74,7 @@ maybe_get_attr(PyObject *obj, char *name)
         }
         res = (*tp->tp_getattro)(obj, w);
         Py_DECREF(w);
-        if (res == NULL) {
+        if (res == NULL && PyErr_ExceptionMatches(PyExc_AttributeError)) {
             PyErr_Clear();
         }
     }
diff --git a/numpy/core/src/common/ufunc_override.c b/numpy/core/src/common/ufunc_override.c
index 89f08a9cb..3f699bcdd 100644
--- a/numpy/core/src/common/ufunc_override.c
+++ b/numpy/core/src/common/ufunc_override.c
@@ -36,6 +36,9 @@ PyUFuncOverride_GetNonDefaultArrayUfunc(PyObject *obj)
      */
     cls_array_ufunc = PyArray_LookupSpecial(obj, "__array_ufunc__");
     if (cls_array_ufunc == NULL) {
+        if (PyErr_Occurred()) {
+            PyErr_Clear(); /* TODO[gh-14801]: propagate crashes during attribute access? */
+        }
         return NULL;
     }
     /* Ignore if the same as ndarray.__array_ufunc__ */
diff --git a/numpy/core/src/multiarray/arrayfunction_override.c b/numpy/core/src/multiarray/arrayfunction_override.c
index 62e597764..9ea8efdd9 100644
--- a/numpy/core/src/multiarray/arrayfunction_override.c
+++ b/numpy/core/src/multiarray/arrayfunction_override.c
@@ -26,6 +26,7 @@ static PyObject *
 get_array_function(PyObject *obj)
 {
     static PyObject *ndarray_array_function = NULL;
+    PyObject *array_function;
 
     if (ndarray_array_function == NULL) {
         ndarray_array_function = get_ndarray_array_function();
@@ -37,7 +38,12 @@ get_array_function(PyObject *obj)
         return ndarray_array_function;
     }
 
-    return PyArray_LookupSpecial(obj, "__array_function__");
+    array_function = PyArray_LookupSpecial(obj, "__array_function__");
+    if (array_function == NULL && PyErr_Occurred()) {
+        PyErr_Clear(); /* TODO[gh-14801]: propagate crashes during attribute access? */
+    }
+
+    return array_function;
 }
 
 
diff --git a/numpy/core/src/multiarray/arrayobject.c b/numpy/core/src/multiarray/arrayobject.c
index 4e229e321..5ed5b7635 100644
--- a/numpy/core/src/multiarray/arrayobject.c
+++ b/numpy/core/src/multiarray/arrayobject.c
@@ -557,7 +557,7 @@ PyArray_DebugPrint(PyArrayObject *obj)
     printf(" ndim   : %d\n", fobj->nd);
     printf(" shape  :");
     for (i = 0; i < fobj->nd; ++i) {
-        printf(" %d", (int)fobj->dimensions[i]);
+        printf(" %" NPY_INTP_FMT, fobj->dimensions[i]);
     }
     printf("\n");
 
@@ -567,7 +567,7 @@ PyArray_DebugPrint(PyArrayObject *obj)
     printf(" data   : %p\n", fobj->data);
     printf(" strides:");
     for (i = 0; i < fobj->nd; ++i) {
-        printf(" %d", (int)fobj->strides[i]);
+        printf(" %" NPY_INTP_FMT, fobj->strides[i]);
     }
     printf("\n");
 
diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src
index fca7ce591..2b6f0361d 100644
--- a/numpy/core/src/multiarray/arraytypes.c.src
+++ b/numpy/core/src/multiarray/arraytypes.c.src
@@ -1081,6 +1081,7 @@ TIMEDELTA_setitem(PyObject *op, void *ov, void *vap)
  *           npy_long, npy_ulong, npy_longlong, npy_ulonglong,
  *           npy_float, npy_double, npy_longdouble,
  *           npy_datetime, npy_timedelta#
+ * #supports_nat = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1#
  */
 
 /**begin repeat1
@@ -1092,6 +1093,7 @@ TIMEDELTA_setitem(PyObject *op, void *ov, void *vap)
  *             npy_long, npy_ulong, npy_longlong, npy_ulonglong,
  *             npy_float, npy_double, npy_longdouble,
  *             npy_datetime, npy_timedelta#
+ * #floatingpoint = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0#
  */
 static void
 @FROMTYPE@_to_@TOTYPE@(void *input, void *output, npy_intp n,
@@ -1101,7 +1103,15 @@ static void
     @totype@ *op = output;
 
     while (n--) {
-        *op++ = (@totype@)*ip++;
+        @fromtype@ f = *ip++;
+        @totype@ t = (@totype@)f;
+#if @supports_nat@ && @floatingpoint@
+        /* Avoid undefined behaviour for NaN -> NaT */
+        if (npy_isnan(f)) {
+            t = (@totype@)NPY_DATETIME_NAT;
+        }
+#endif
+        *op++ = t;
     }
 }
 /**end repeat1**/
@@ -1119,7 +1129,15 @@ static void
     @totype@ *op = output;
 
     while (n--) {
-        *op++ = (@totype@)*ip;
+        @fromtype@ f = *ip;
+        @totype@ t = (@totype@)f;
+#if @supports_nat@
+        /* Avoid undefined behaviour for NaN -> NaT */
+        if (npy_isnan(f)) {
+            t = (@totype@)NPY_DATETIME_NAT;
+        }
+#endif
+        *op++ = t;
         ip += 2;
     }
 }
diff --git a/numpy/core/src/multiarray/common.c b/numpy/core/src/multiarray/common.c
index 3270bc20d..a71b0818c 100644
--- a/numpy/core/src/multiarray/common.c
+++ b/numpy/core/src/multiarray/common.c
@@ -367,6 +367,10 @@ PyArray_DTypeFromObjectHelper(PyObject *obj, int maxdims,
         }
         Py_DECREF(ip);
     }
+    else if (PyErr_Occurred()) {
+        PyErr_Clear(); /* TODO[gh-14801]: propagate crashes during attribute access? */
+    }
+
 
     /* The array struct interface */
     ip = PyArray_LookupSpecial_OnInstance(obj, "__array_struct__");
@@ -389,6 +393,9 @@ PyArray_DTypeFromObjectHelper(PyObject *obj, int maxdims,
         }
         Py_DECREF(ip);
     }
+    else if (PyErr_Occurred()) {
+        PyErr_Clear(); /* TODO[gh-14801]: propagate crashes during attribute access? */
+    }
 
     /* The old buffer interface */
 #if !defined(NPY_PY3K)
@@ -419,6 +426,9 @@ PyArray_DTypeFromObjectHelper(PyObject *obj, int maxdims,
             goto fail;
         }
     }
+    else if (PyErr_Occurred()) {
+        PyErr_Clear(); /* TODO[gh-14801]: propagate crashes during attribute access? */
+    }
 
     /*
      * If we reached the maximum recursion depth without hitting one
diff --git a/numpy/core/src/multiarray/conversion_utils.c b/numpy/core/src/multiarray/conversion_utils.c
index 4baa02052..5f0ad5817 100644
--- a/numpy/core/src/multiarray/conversion_utils.c
+++ b/numpy/core/src/multiarray/conversion_utils.c
@@ -667,8 +667,8 @@ PyArray_ConvertClipmodeSequence(PyObject *object, NPY_CLIPMODE *modes, int n)
     if (object && (PyTuple_Check(object) || PyList_Check(object))) {
         if (PySequence_Size(object) != n) {
             PyErr_Format(PyExc_ValueError,
-                    "list of clipmodes has wrong length (%d instead of %d)",
-                    (int)PySequence_Size(object), n);
+                    "list of clipmodes has wrong length (%zd instead of %d)",
+                    PySequence_Size(object), n);
             return NPY_FAIL;
         }
 
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index 5174bd889..62804b979 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -544,8 +544,8 @@ setArrayFromSequence(PyArrayObject *a, PyObject *s,
      */
     if (slen != PyArray_DIMS(a)[dim] && slen != 1) {
         PyErr_Format(PyExc_ValueError,
-                 "cannot copy sequence with size %d to array axis "
-                 "with dimension %d", (int)slen, (int)PyArray_DIMS(a)[dim]);
+                 "cannot copy sequence with size %zd to array axis "
+                 "with dimension %" NPY_INTP_FMT, slen, PyArray_DIMS(a)[dim]);
         goto fail;
     }
 
@@ -852,6 +852,10 @@ discover_dimensions(PyObject *obj, int *maxndim, npy_intp *d, int check_it,
             return 0;
         }
     }
+    else if (PyErr_Occurred()) {
+        PyErr_Clear(); /* TODO[gh-14801]: propagate crashes during attribute access? */
+    }
+
 
     /* obj has the __array_interface__ interface */
     e = PyArray_LookupSpecial_OnInstance(obj, "__array_interface__");
@@ -881,6 +885,9 @@ discover_dimensions(PyObject *obj, int *maxndim, npy_intp *d, int check_it,
             return 0;
         }
     }
+    else if (PyErr_Occurred()) {
+        PyErr_Clear(); /* TODO[gh-14801]: propagate crashes during attribute access? */
+    }
 
     seq = PySequence_Fast(obj, "Could not convert object to sequence");
     if (seq == NULL) {
@@ -2351,7 +2358,11 @@ PyArray_FromStructInterface(PyObject *input)
 
     attr = PyArray_LookupSpecial_OnInstance(input, "__array_struct__");
     if (attr == NULL) {
-        return Py_NotImplemented;
+        if (PyErr_Occurred()) {
+            return NULL;
+        } else {
+            return Py_NotImplemented;
+        }
     }
     if (!NpyCapsule_Check(attr)) {
         goto fail;
@@ -2463,6 +2474,9 @@ PyArray_FromInterface(PyObject *origin)
     iface = PyArray_LookupSpecial_OnInstance(origin,
                                                     "__array_interface__");
     if (iface == NULL) {
+        if (PyErr_Occurred()) {
+            PyErr_Clear(); /* TODO[gh-14801]: propagate crashes during attribute access? */
+        }
         return Py_NotImplemented;
     }
     if (!PyDict_Check(iface)) {
@@ -2716,6 +2730,9 @@ PyArray_FromArrayAttr(PyObject *op, PyArray_Descr *typecode, PyObject *context)
 
     array_meth = PyArray_LookupSpecial_OnInstance(op, "__array__");
     if (array_meth == NULL) {
+        if (PyErr_Occurred()) {
+            PyErr_Clear(); /* TODO[gh-14801]: propagate crashes during attribute access? */
+        }
         return Py_NotImplemented;
     }
     if (context == NULL) {
@@ -2894,8 +2911,8 @@ PyArray_CopyAsFlat(PyArrayObject *dst, PyArrayObject *src, NPY_ORDER order)
     src_size = PyArray_SIZE(src);
     if (dst_size != src_size) {
         PyErr_Format(PyExc_ValueError,
-                "cannot copy from array of size %d into an array "
-                "of size %d", (int)src_size, (int)dst_size);
+                "cannot copy from array of size %" NPY_INTP_FMT " into an array "
+                "of size %" NPY_INTP_FMT, src_size, dst_size);
         return -1;
     }
 
diff --git a/numpy/core/src/multiarray/datetime.c b/numpy/core/src/multiarray/datetime.c
index de81bcea1..72a3df89c 100644
--- a/numpy/core/src/multiarray/datetime.c
+++ b/numpy/core/src/multiarray/datetime.c
@@ -758,8 +758,8 @@ parse_datetime_extended_unit_from_string(char *str, Py_ssize_t len,
 bad_input:
     if (metastr != NULL) {
         PyErr_Format(PyExc_TypeError,
-                "Invalid datetime metadata string \"%s\" at position %d",
-                metastr, (int)(substr-metastr));
+                "Invalid datetime metadata string \"%s\" at position %zd",
+                metastr, substr-metastr);
     }
     else {
         PyErr_Format(PyExc_TypeError,
@@ -820,8 +820,8 @@ parse_datetime_metadata_from_metastr(char *metastr, Py_ssize_t len,
 bad_input:
     if (substr != metastr) {
         PyErr_Format(PyExc_TypeError,
-                "Invalid datetime metadata string \"%s\" at position %d",
-                metastr, (int)(substr-metastr));
+                "Invalid datetime metadata string \"%s\" at position %zd",
+                metastr, substr - metastr);
     }
     else {
         PyErr_Format(PyExc_TypeError,
@@ -2273,15 +2273,15 @@ convert_pydatetime_to_datetimestruct(PyObject *obj, npy_datetimestruct *out,
 
 invalid_date:
     PyErr_Format(PyExc_ValueError,
-            "Invalid date (%d,%d,%d) when converting to NumPy datetime",
-            (int)out->year, (int)out->month, (int)out->day);
+            "Invalid date (%" NPY_INT64_FMT ",%" NPY_INT32_FMT ",%" NPY_INT32_FMT ") when converting to NumPy datetime",
+            out->year, out->month, out->day);
     return -1;
 
 invalid_time:
     PyErr_Format(PyExc_ValueError,
-            "Invalid time (%d,%d,%d,%d) when converting "
+            "Invalid time (%" NPY_INT32_FMT ",%" NPY_INT32_FMT ",%" NPY_INT32_FMT ",%" NPY_INT32_FMT ") when converting "
             "to NumPy datetime",
-            (int)out->hour, (int)out->min, (int)out->sec, (int)out->us);
+            out->hour, out->min, out->sec, out->us);
     return -1;
 }
 
diff --git a/numpy/core/src/multiarray/datetime_strings.c b/numpy/core/src/multiarray/datetime_strings.c
index 95b7bb3dc..dfc01494f 100644
--- a/numpy/core/src/multiarray/datetime_strings.c
+++ b/numpy/core/src/multiarray/datetime_strings.c
@@ -743,8 +743,8 @@ finish:
 
 parse_error:
     PyErr_Format(PyExc_ValueError,
-            "Error parsing datetime string \"%s\" at position %d",
-            str, (int)(substr-str));
+            "Error parsing datetime string \"%s\" at position %zd",
+            str, substr - str);
     return -1;
 
 error:
diff --git a/numpy/core/src/multiarray/descriptor.c b/numpy/core/src/multiarray/descriptor.c
index 23d140cf6..522b69307 100644
--- a/numpy/core/src/multiarray/descriptor.c
+++ b/numpy/core/src/multiarray/descriptor.c
@@ -1149,8 +1149,8 @@ _convert_from_dict(PyObject *obj, int align)
             }
             Py_DECREF(off);
             if (offset < 0) {
-                PyErr_Format(PyExc_ValueError, "offset %d cannot be negative",
-                             (int)offset);
+                PyErr_Format(PyExc_ValueError, "offset %ld cannot be negative",
+                             offset);
                 Py_DECREF(tup);
                 Py_DECREF(ind);
                 goto fail;
@@ -1164,10 +1164,10 @@ _convert_from_dict(PyObject *obj, int align)
             /* If align=True, enforce field alignment */
             if (align && offset % newdescr->alignment != 0) {
                 PyErr_Format(PyExc_ValueError,
-                        "offset %d for NumPy dtype with fields is "
+                        "offset %ld for NumPy dtype with fields is "
                         "not divisible by the field alignment %d "
                         "with align=True",
-                        (int)offset, (int)newdescr->alignment);
+                        offset, newdescr->alignment);
                 ret = NPY_FAIL;
             }
             else if (offset + newdescr->elsize > totalsize) {
@@ -1286,7 +1286,7 @@ _convert_from_dict(PyObject *obj, int align)
             PyErr_Format(PyExc_ValueError,
                     "NumPy dtype descriptor requires %d bytes, "
                     "cannot override to smaller itemsize of %d",
-                    (int)new->elsize, (int)itemsize);
+                    new->elsize, itemsize);
             Py_DECREF(new);
             goto fail;
         }
@@ -1295,7 +1295,7 @@ _convert_from_dict(PyObject *obj, int align)
             PyErr_Format(PyExc_ValueError,
                     "NumPy dtype descriptor requires alignment of %d bytes, "
                     "which is not divisible into the specified itemsize %d",
-                    (int)new->alignment, (int)itemsize);
+                    new->alignment, itemsize);
             Py_DECREF(new);
             goto fail;
         }
diff --git a/numpy/core/src/multiarray/einsum.c.src b/numpy/core/src/multiarray/einsum.c.src
index e7bbc3d0b..58af44091 100644
--- a/numpy/core/src/multiarray/einsum.c.src
+++ b/numpy/core/src/multiarray/einsum.c.src
@@ -1876,7 +1876,7 @@ parse_operand_subscripts(char *subscripts, int length,
      * later where it matters the char is cast to a signed char.
      */
     for (idim = 0; idim < ndim - 1; ++idim) {
-        int label = op_labels[idim];
+        int label = (signed char)op_labels[idim];
         /* If it is a proper label, find any duplicates of it. */
         if (label > 0) {
             /* Search for the next matching label. */
diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c
index 247864775..8dcd28c84 100644
--- a/numpy/core/src/multiarray/mapping.c
+++ b/numpy/core/src/multiarray/mapping.c
@@ -1198,9 +1198,9 @@ array_assign_boolean_subscript(PyArrayObject *self,
         if (size != PyArray_DIMS(v)[0]) {
             PyErr_Format(PyExc_ValueError,
                     "NumPy boolean array indexing assignment "
-                    "cannot assign %d input values to "
-                    "the %d output values where the mask is true",
-                    (int)PyArray_DIMS(v)[0], (int)size);
+                    "cannot assign %" NPY_INTP_FMT " input values to "
+                    "the %" NPY_INTP_FMT " output values where the mask is true",
+                    PyArray_DIMS(v)[0], size);
             return -1;
         }
         v_stride = PyArray_STRIDES(v)[0];
diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c
index 441567049..ab70367c5 100644
--- a/numpy/core/src/multiarray/multiarraymodule.c
+++ b/numpy/core/src/multiarray/multiarraymodule.c
@@ -118,6 +118,9 @@ PyArray_GetPriority(PyObject *obj, double default_)
 
     ret = PyArray_LookupSpecial_OnInstance(obj, "__array_priority__");
     if (ret == NULL) {
+        if (PyErr_Occurred()) {
+            PyErr_Clear(); /* TODO[gh-14801]: propagate crashes during attribute access? */
+        }
         return default_;
     }
 
@@ -1562,8 +1565,7 @@ _array_fromobject(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *kws)
     PyArrayObject *oparr = NULL, *ret = NULL;
     npy_bool subok = NPY_FALSE;
     npy_bool copy = NPY_TRUE;
-    int nd;
-    npy_intp ndmin = 0;
+    int ndmin = 0, nd;
     PyArray_Descr *type = NULL;
     PyArray_Descr *oldtype = NULL;
     NPY_ORDER order = NPY_KEEPORDER;
@@ -1625,13 +1627,14 @@ _array_fromobject(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *kws)
 
             ndmin_obj = PyDict_GetItem(kws, npy_ma_str_ndmin);
             if (ndmin_obj) {
-                ndmin = PyLong_AsLong(ndmin_obj);
-                if (error_converting(ndmin)) {
+                long t = PyLong_AsLong(ndmin_obj);
+                if (error_converting(t)) {
                     goto clean_type;
                 }
-                else if (ndmin > NPY_MAXDIMS) {
+                else if (t > NPY_MAXDIMS) {
                     goto full_path;
                 }
+                ndmin = t;
             }
 
             /* copy=False with default dtype, order (any is OK) and ndim */
@@ -2063,7 +2066,7 @@ array_fromfile(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *keywds)
     if (file == NULL) {
         return NULL;
     }
-    
+
     if (offset != 0 && strcmp(sep, "") != 0) {
         PyErr_SetString(PyExc_TypeError, "'offset' argument only permitted for binary files");
         Py_XDECREF(type);
@@ -3265,7 +3268,7 @@ array_datetime_data(PyObject *NPY_UNUSED(dummy), PyObject *args)
     }
 
     meta = get_datetime_metadata_from_dtype(dtype);
-    Py_DECREF(dtype);    
+    Py_DECREF(dtype);
     if (meta == NULL) {
         return NULL;
     }
diff --git a/numpy/core/src/multiarray/nditer_api.c b/numpy/core/src/multiarray/nditer_api.c
index db0bfcece..e7fe0fa50 100644
--- a/numpy/core/src/multiarray/nditer_api.c
+++ b/numpy/core/src/multiarray/nditer_api.c
@@ -371,8 +371,8 @@ NpyIter_ResetToIterIndexRange(NpyIter *iter,
         }
         if (errmsg == NULL) {
             PyErr_Format(PyExc_ValueError,
-                    "Out-of-bounds range [%d, %d) passed to "
-                    "ResetToIterIndexRange", (int)istart, (int)iend);
+                    "Out-of-bounds range [%" NPY_INTP_FMT ", %" NPY_INTP_FMT ") passed to "
+                    "ResetToIterIndexRange", istart, iend);
         }
         else {
             *errmsg = "Out-of-bounds range passed to ResetToIterIndexRange";
@@ -382,8 +382,8 @@ NpyIter_ResetToIterIndexRange(NpyIter *iter,
     else if (iend < istart) {
         if (errmsg == NULL) {
             PyErr_Format(PyExc_ValueError,
-                    "Invalid range [%d, %d) passed to ResetToIterIndexRange",
-                    (int)istart, (int)iend);
+                    "Invalid range [%" NPY_INTP_FMT ", %" NPY_INTP_FMT ") passed to ResetToIterIndexRange",
+                    istart, iend);
         }
         else {
             *errmsg = "Invalid range passed to ResetToIterIndexRange";
@@ -1429,8 +1429,8 @@ NpyIter_DebugPrint(NpyIter *iter)
         printf("REUSE_REDUCE_LOOPS ");
 
     printf("\n");
-    printf("| NDim: %d\n", (int)ndim);
-    printf("| NOp: %d\n", (int)nop);
+    printf("| NDim: %d\n", ndim);
+    printf("| NOp: %d\n", nop);
     if (NIT_MASKOP(iter) >= 0) {
         printf("| MaskOp: %d\n", (int)NIT_MASKOP(iter));
     }
diff --git a/numpy/core/src/multiarray/nditer_constr.c b/numpy/core/src/multiarray/nditer_constr.c
index d40836dc2..5e770338d 100644
--- a/numpy/core/src/multiarray/nditer_constr.c
+++ b/numpy/core/src/multiarray/nditer_constr.c
@@ -154,7 +154,7 @@ NpyIter_AdvancedNew(int nop, PyArrayObject **op_in, npy_uint32 flags,
     if (nop > NPY_MAXARGS) {
         PyErr_Format(PyExc_ValueError,
             "Cannot construct an iterator with more than %d operands "
-            "(%d were requested)", (int)NPY_MAXARGS, (int)nop);
+            "(%d were requested)", NPY_MAXARGS, nop);
         return NULL;
     }
 
@@ -810,7 +810,7 @@ npyiter_check_op_axes(int nop, int oa_ndim, int **op_axes,
         PyErr_Format(PyExc_ValueError,
                 "Cannot construct an iterator with more than %d dimensions "
                 "(%d were requested for op_axes)",
-                (int)NPY_MAXDIMS, oa_ndim);
+                NPY_MAXDIMS, oa_ndim);
         return 0;
     }
     if (op_axes == NULL) {
@@ -826,14 +826,14 @@ npyiter_check_op_axes(int nop, int oa_ndim, int **op_axes,
         if (axes != NULL) {
             memset(axes_dupcheck, 0, NPY_MAXDIMS);
             for (idim = 0; idim < oa_ndim; ++idim) {
-                npy_intp i = axes[idim];
+                int i = axes[idim];
                 if (i >= 0) {
                     if (i >= NPY_MAXDIMS) {
                         PyErr_Format(PyExc_ValueError,
                                 "The 'op_axes' provided to the iterator "
                                 "constructor for operand %d "
                                 "contained invalid "
-                                "values %d", (int)iop, (int)i);
+                                "values %d", iop, i);
                         return 0;
                     }
                     else if (axes_dupcheck[i] == 1) {
@@ -841,7 +841,7 @@ npyiter_check_op_axes(int nop, int oa_ndim, int **op_axes,
                                 "The 'op_axes' provided to the iterator "
                                 "constructor for operand %d "
                                 "contained duplicate "
-                                "value %d", (int)iop, (int)i);
+                                "value %d", iop, i);
                         return 0;
                     }
                     else {
@@ -1311,7 +1311,7 @@ npyiter_check_casting(int nop, PyArrayObject **op,
                 PyObject *errmsg;
                 errmsg = PyUString_FromFormat(
                         "Iterator operand %d dtype could not be cast from ",
-                        (int)iop);
+                        iop);
                 PyUString_ConcatAndDel(&errmsg,
                         PyObject_Repr((PyObject *)PyArray_DESCR(op[iop])));
                 PyUString_ConcatAndDel(&errmsg,
@@ -1342,7 +1342,7 @@ npyiter_check_casting(int nop, PyArrayObject **op,
                 PyUString_ConcatAndDel(&errmsg,
                         PyUString_FromFormat(", the operand %d dtype, "
                                 "according to the rule %s",
-                                (int)iop,
+                                iop,
                                 npyiter_casting_to_string(casting)));
                 PyErr_SetObject(PyExc_TypeError, errmsg);
                 Py_DECREF(errmsg);
@@ -1500,8 +1500,8 @@ npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, npyiter_opitflags *op_itf
                                     "Iterator input op_axes[%d][%d] (==%d) "
                                     "is not a valid axis of op[%d], which "
                                     "has %d dimensions ",
-                                    (int)iop, (int)(ndim-idim-1), (int)i,
-                                    (int)iop, (int)ondim);
+                                    iop, (ndim-idim-1), i,
+                                    iop, ondim);
                             return 0;
                         }
                     }
diff --git a/numpy/core/src/multiarray/nditer_pywrap.c b/numpy/core/src/multiarray/nditer_pywrap.c
index 4b9d41aa4..246f9d382 100644
--- a/numpy/core/src/multiarray/nditer_pywrap.c
+++ b/numpy/core/src/multiarray/nditer_pywrap.c
@@ -2016,7 +2016,7 @@ npyiter_seq_item(NewNpyArrayIterObject *self, Py_ssize_t i)
 
     if (i < 0 || i >= nop) {
         PyErr_Format(PyExc_IndexError,
-                "Iterator operand index %d is out of bounds", (int)i_orig);
+                "Iterator operand index %zd is out of bounds", i_orig);
         return NULL;
     }
 
@@ -2030,7 +2030,7 @@ npyiter_seq_item(NewNpyArrayIterObject *self, Py_ssize_t i)
      */
     if (!self->readflags[i]) {
         PyErr_Format(PyExc_RuntimeError,
-                "Iterator operand %d is write-only", (int)i);
+                "Iterator operand %zd is write-only", i);
         return NULL;
     }
 #endif
@@ -2147,12 +2147,12 @@ npyiter_seq_ass_item(NewNpyArrayIterObject *self, Py_ssize_t i, PyObject *v)
 
     if (i < 0 || i >= nop) {
         PyErr_Format(PyExc_IndexError,
-                "Iterator operand index %d is out of bounds", (int)i_orig);
+                "Iterator operand index %zd is out of bounds", i_orig);
         return -1;
     }
     if (!self->writeflags[i]) {
         PyErr_Format(PyExc_RuntimeError,
-                "Iterator operand %d is not writeable", (int)i_orig);
+                "Iterator operand %zd is not writeable", i_orig);
         return -1;
     }
 
diff --git a/numpy/core/src/umath/loops.h.src b/numpy/core/src/umath/loops.h.src
index e98a1ac3c..0ef14a809 100644
--- a/numpy/core/src/umath/loops.h.src
+++ b/numpy/core/src/umath/loops.h.src
@@ -7,14 +7,12 @@
 #define _NPY_UMATH_LOOPS_H_
 
 #define BOOL_invert BOOL_logical_not
-#define BOOL_negative BOOL_logical_not
 #define BOOL_add BOOL_logical_or
 #define BOOL_bitwise_and BOOL_logical_and
 #define BOOL_bitwise_or BOOL_logical_or
 #define BOOL_logical_xor BOOL_not_equal
 #define BOOL_bitwise_xor BOOL_logical_xor
 #define BOOL_multiply BOOL_logical_and
-#define BOOL_subtract BOOL_logical_xor
 #define BOOL_maximum BOOL_logical_or
 #define BOOL_minimum BOOL_logical_and
 #define BOOL_fmax BOOL_maximum
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index e4ad3dc84..1dc581977 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -4058,8 +4058,8 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind,
     for (i = 0; i < ind_size; ++i) {
         if (reduceat_ind[i] < 0 || reduceat_ind[i] >= red_axis_size) {
             PyErr_Format(PyExc_IndexError,
-                "index %d out-of-bounds in %s.%s [0, %d)",
-                (int)reduceat_ind[i], ufunc_name, opname, (int)red_axis_size);
+                "index %" NPY_INTP_FMT " out-of-bounds in %s.%s [0, %" NPY_INTP_FMT ")",
+                reduceat_ind[i], ufunc_name, opname, red_axis_size);
             return NULL;
         }
     }
diff --git a/numpy/core/src/umath/ufunc_type_resolution.c b/numpy/core/src/umath/ufunc_type_resolution.c
index 9be7b63a0..f93d8229e 100644
--- a/numpy/core/src/umath/ufunc_type_resolution.c
+++ b/numpy/core/src/umath/ufunc_type_resolution.c
@@ -883,7 +883,7 @@ PyUFunc_SubtractionTypeResolver(PyUFuncObject *ufunc,
         /* The type resolver would have upcast already */
         if (out_dtypes[0]->type_num == NPY_BOOL) {
             PyErr_Format(PyExc_TypeError,
-                "numpy boolean subtract, the `-` operator, is deprecated, "
+                "numpy boolean subtract, the `-` operator, is not supported, "
                 "use the bitwise_xor, the `^` operator, or the logical_xor "
                 "function instead.");
             return -1;
diff --git a/numpy/core/tests/test_datetime.py b/numpy/core/tests/test_datetime.py
index 11f900c5f..a756dc7e7 100644
--- a/numpy/core/tests/test_datetime.py
+++ b/numpy/core/tests/test_datetime.py
@@ -483,6 +483,30 @@ class TestDateTime(object):
         assert_equal(np.datetime64(a, '[Y]'), np.datetime64('NaT', '[Y]'))
         assert_equal(np.datetime64(a, '[W]'), np.datetime64('NaT', '[W]'))
 
+        # NaN -> NaT
+        nan = np.array([np.nan] * 8)
+        fnan = nan.astype('f')
+        lnan = nan.astype('g')
+        cnan = nan.astype('D')
+        cfnan = nan.astype('F')
+        clnan = nan.astype('G')
+
+        nat = np.array([np.datetime64('NaT')] * 8)
+        assert_equal(nan.astype('M8[ns]'), nat)
+        assert_equal(fnan.astype('M8[ns]'), nat)
+        assert_equal(lnan.astype('M8[ns]'), nat)
+        assert_equal(cnan.astype('M8[ns]'), nat)
+        assert_equal(cfnan.astype('M8[ns]'), nat)
+        assert_equal(clnan.astype('M8[ns]'), nat)
+
+        nat = np.array([np.timedelta64('NaT')] * 8)
+        assert_equal(nan.astype('timedelta64[ns]'), nat)
+        assert_equal(fnan.astype('timedelta64[ns]'), nat)
+        assert_equal(lnan.astype('timedelta64[ns]'), nat)
+        assert_equal(cnan.astype('timedelta64[ns]'), nat)
+        assert_equal(cfnan.astype('timedelta64[ns]'), nat)
+        assert_equal(clnan.astype('timedelta64[ns]'), nat)
+
     def test_days_creation(self):
         assert_equal(np.array('1599', dtype='M8[D]').astype('i8'),
                 (1600-1970)*365 - (1972-1600)/4 + 3 - 365)
diff --git a/numpy/core/tests/test_einsum.py b/numpy/core/tests/test_einsum.py
index cfeeb8a90..1b5b4cb26 100644
--- a/numpy/core/tests/test_einsum.py
+++ b/numpy/core/tests/test_einsum.py
@@ -5,7 +5,7 @@ import itertools
 import numpy as np
 from numpy.testing import (
     assert_, assert_equal, assert_array_equal, assert_almost_equal,
-    assert_raises, suppress_warnings, assert_raises_regex
+    assert_raises, suppress_warnings, assert_raises_regex, assert_allclose
     )
 
 # Setup for optimize einsum
@@ -700,6 +700,14 @@ class TestEinsum(object):
         y2 = x[idx[:, None], idx[:, None], idx, idx]
         assert_equal(y1, y2)
 
+    def test_einsum_failed_on_p9_and_s390x(self):
+        # Issues gh-14692 and gh-12689
+        # Bug with signed vs unsigned char errored on power9 and s390x Linux
+        tensor = np.random.random_sample((10, 10, 10, 10))
+        x = np.einsum('ijij->', tensor)
+        y = tensor.trace(axis1=0, axis2=2).trace()
+        assert_allclose(x, y)
+
     def test_einsum_all_contig_non_contig_output(self):
         # Issue gh-5907, tests that the all contiguous special case
         # actually checks the contiguity of the output
diff --git a/numpy/core/tests/test_issue14735.py b/numpy/core/tests/test_issue14735.py
new file mode 100644
index 000000000..6105c8e6a
--- /dev/null
+++ b/numpy/core/tests/test_issue14735.py
@@ -0,0 +1,29 @@
+import pytest
+import warnings
+import numpy as np
+
+
+class Wrapper:
+    def __init__(self, array):
+        self.array = array
+
+    def __len__(self):
+        return len(self.array)
+
+    def __getitem__(self, item):
+        return type(self)(self.array[item])
+
+    def __getattr__(self, name):
+        if name.startswith("__array_"):
+            warnings.warn("object got converted", UserWarning, stacklevel=1)
+
+        return getattr(self.array, name)
+
+    def __repr__(self):
+        return "<Wrapper({self.array})>".format(self=self)
+
+@pytest.mark.filterwarnings("error")
+def test_getattr_warning():
+    array = Wrapper(np.arange(10))
+    with pytest.raises(UserWarning, match="object got converted"):
+        np.asarray(array)
diff --git a/numpy/distutils/ccompiler.py b/numpy/distutils/ccompiler.py
index 643879023..684c7535b 100644
--- a/numpy/distutils/ccompiler.py
+++ b/numpy/distutils/ccompiler.py
@@ -532,6 +532,11 @@ def CCompiler_customize(self, dist, need_cxx=0):
                                       'g++' in self.compiler[0] or
                                       'clang' in self.compiler[0]):
         self._auto_depends = True
+        if 'gcc' in self.compiler[0]:
+            # add std=c99 flag for gcc
+            # TODO: does this need to be more specific?
+            self.compiler.append('-std=c99')
+            self.compiler_so.append('-std=c99')
     elif os.name == 'posix':
         import tempfile
         import shutil
diff --git a/numpy/distutils/system_info.py b/numpy/distutils/system_info.py
index 5fd1003ab..c2b3e118b 100644
--- a/numpy/distutils/system_info.py
+++ b/numpy/distutils/system_info.py
@@ -156,7 +156,7 @@ from numpy.distutils.misc_util import (is_sequence, is_string,
                                        get_shared_lib_extension)
 from numpy.distutils.command.config import config as cmd_config
 from numpy.distutils.compat import get_exception
-from numpy.distutils import customized_ccompiler
+from numpy.distutils import customized_ccompiler as _customized_ccompiler
 from numpy.distutils import _shell_utils
 import distutils.ccompiler
 import tempfile
@@ -169,6 +169,15 @@ _bits = {'32bit': 32, '64bit': 64}
 platform_bits = _bits[platform.architecture()[0]]
 
 
+global_compiler = None
+
+def customized_ccompiler():
+    global global_compiler
+    if not global_compiler:
+        global_compiler = _customized_ccompiler()
+    return global_compiler
+
+
 def _c_string_literal(s):
     """
     Convert a python string into a literal suitable for inclusion into C code
@@ -1580,7 +1589,7 @@ def get_atlas_version(**config):
             log.info('Status: %d', s)
             log.info('Output: %s', o)
 
-    if atlas_version == '3.2.1_pre3.3.6':
+    elif atlas_version == '3.2.1_pre3.3.6':
         dict_append(info, define_macros=[('NO_ATLAS_INFO', -2)])
     else:
         dict_append(info, define_macros=[(
diff --git a/numpy/lib/financial.py b/numpy/lib/financial.py
index d72384e99..a011e52a9 100644
--- a/numpy/lib/financial.py
+++ b/numpy/lib/financial.py
@@ -12,6 +12,7 @@ otherwise stated.
 """
 from __future__ import division, absolute_import, print_function
 
+import warnings
 from decimal import Decimal
 import functools
 
@@ -19,6 +20,10 @@ import numpy as np
 from numpy.core import overrides
 
 
+_depmsg = ("numpy.{name} is deprecated and will be removed from NumPy 1.20. "
+           "Use numpy_financial.{name} instead "
+           "(https://pypi.org/project/numpy-financial/).")
+
 array_function_dispatch = functools.partial(
     overrides.array_function_dispatch, module='numpy')
 
@@ -45,6 +50,8 @@ def _convert_when(when):
 
 
 def _fv_dispatcher(rate, nper, pmt, pv, when=None):
+    warnings.warn(_depmsg.format(name='fv'),
+                  DeprecationWarning, stacklevel=3)
     return (rate, nper, pmt, pv)
 
 
@@ -53,6 +60,12 @@ def fv(rate, nper, pmt, pv, when='end'):
     """
     Compute the future value.
 
+    .. deprecated:: 1.18
+
+       `fv` is deprecated; for details, see NEP 32 [1]_.
+       Use the corresponding function in the numpy-financial library,
+       https://pypi.org/project/numpy-financial.
+
     Given:
      * a present value, `pv`
      * an interest `rate` compounded once per period, of which
@@ -100,7 +113,9 @@ def fv(rate, nper, pmt, pv, when='end'):
 
     References
     ----------
-    .. [WRW] Wheeler, D. A., E. Rathke, and R. Weir (Eds.) (2009, May).
+    .. [1] NumPy Enhancement Proposal (NEP) 32,
+       https://numpy.org/neps/nep-0032-remove-financial-functions.html
+    .. [2] Wheeler, D. A., E. Rathke, and R. Weir (Eds.) (2009, May).
        Open Document Format for Office Applications (OpenDocument)v1.2,
        Part 2: Recalculated Formula (OpenFormula) Format - Annotated Version,
        Pre-Draft 12. Organization for the Advancement of Structured Information
@@ -109,6 +124,7 @@ def fv(rate, nper, pmt, pv, when='end'):
        http://www.oasis-open.org/committees/documents.php?wg_abbrev=office-formula
        OpenDocument-formula-20090508.odt
 
+
     Examples
     --------
     What is the future value after 10 years of saving $100 now, with
@@ -139,6 +155,8 @@ def fv(rate, nper, pmt, pv, when='end'):
 
 
 def _pmt_dispatcher(rate, nper, pv, fv=None, when=None):
+    warnings.warn(_depmsg.format(name='pmt'),
+                  DeprecationWarning, stacklevel=3)
     return (rate, nper, pv, fv)
 
 
@@ -147,6 +165,12 @@ def pmt(rate, nper, pv, fv=0, when='end'):
     """
     Compute the payment against loan principal plus interest.
 
+    .. deprecated:: 1.18
+
+       `pmt` is deprecated; for details, see NEP 32 [1]_.
+       Use the corresponding function in the numpy-financial library,
+       https://pypi.org/project/numpy-financial.
+
     Given:
      * a present value, `pv` (e.g., an amount borrowed)
      * a future value, `fv` (e.g., 0)
@@ -204,7 +228,9 @@ def pmt(rate, nper, pv, fv=0, when='end'):
 
     References
     ----------
-    .. [WRW] Wheeler, D. A., E. Rathke, and R. Weir (Eds.) (2009, May).
+    .. [1] NumPy Enhancement Proposal (NEP) 32,
+       https://numpy.org/neps/nep-0032-remove-financial-functions.html
+    .. [2] Wheeler, D. A., E. Rathke, and R. Weir (Eds.) (2009, May).
        Open Document Format for Office Applications (OpenDocument)v1.2,
        Part 2: Recalculated Formula (OpenFormula) Format - Annotated Version,
        Pre-Draft 12. Organization for the Advancement of Structured Information
@@ -237,6 +263,8 @@ def pmt(rate, nper, pv, fv=0, when='end'):
 
 
 def _nper_dispatcher(rate, pmt, pv, fv=None, when=None):
+    warnings.warn(_depmsg.format(name='nper'),
+                  DeprecationWarning, stacklevel=3)
     return (rate, pmt, pv, fv)
 
 
@@ -245,6 +273,12 @@ def nper(rate, pmt, pv, fv=0, when='end'):
     """
     Compute the number of periodic payments.
 
+    .. deprecated:: 1.18
+
+       `nper` is deprecated; for details, see NEP 32 [1]_.
+       Use the corresponding function in the numpy-financial library,
+       https://pypi.org/project/numpy-financial.
+
     :class:`decimal.Decimal` type is not supported.
 
     Parameters
@@ -270,6 +304,11 @@ def nper(rate, pmt, pv, fv=0, when='end'):
 
      fv + pv + pmt*nper = 0
 
+    References
+    ----------
+    .. [1] NumPy Enhancement Proposal (NEP) 32,
+       https://numpy.org/neps/nep-0032-remove-financial-functions.html
+
     Examples
     --------
     If you only had $150/month to pay towards the loan, how long would it take
@@ -311,6 +350,8 @@ def nper(rate, pmt, pv, fv=0, when='end'):
 
 
 def _ipmt_dispatcher(rate, per, nper, pv, fv=None, when=None):
+    warnings.warn(_depmsg.format(name='ipmt'),
+                  DeprecationWarning, stacklevel=3)
     return (rate, per, nper, pv, fv)
 
 
@@ -319,6 +360,12 @@ def ipmt(rate, per, nper, pv, fv=0, when='end'):
     """
     Compute the interest portion of a payment.
 
+    .. deprecated:: 1.18
+
+       `ipmt` is deprecated; for details, see NEP 32 [1]_.
+       Use the corresponding function in the numpy-financial library,
+       https://pypi.org/project/numpy-financial.
+
     Parameters
     ----------
     rate : scalar or array_like of shape(M, )
@@ -354,6 +401,11 @@ def ipmt(rate, per, nper, pv, fv=0, when='end'):
 
     ``pmt = ppmt + ipmt``
 
+    References
+    ----------
+    .. [1] NumPy Enhancement Proposal (NEP) 32,
+       https://numpy.org/neps/nep-0032-remove-financial-functions.html
+
     Examples
     --------
     What is the amortization schedule for a 1 year loan of $2500 at
@@ -422,6 +474,8 @@ def _rbl(rate, per, pmt, pv, when):
 
 
 def _ppmt_dispatcher(rate, per, nper, pv, fv=None, when=None):
+    warnings.warn(_depmsg.format(name='ppmt'),
+                  DeprecationWarning, stacklevel=3)
     return (rate, per, nper, pv, fv)
 
 
@@ -430,6 +484,12 @@ def ppmt(rate, per, nper, pv, fv=0, when='end'):
     """
     Compute the payment against loan principal.
 
+    .. deprecated:: 1.18
+
+       `ppmt` is deprecated; for details, see NEP 32 [1]_.
+       Use the corresponding function in the numpy-financial library,
+       https://pypi.org/project/numpy-financial.
+
     Parameters
     ----------
     rate : array_like
@@ -450,12 +510,19 @@ def ppmt(rate, per, nper, pv, fv=0, when='end'):
     --------
     pmt, pv, ipmt
 
+    References
+    ----------
+    .. [1] NumPy Enhancement Proposal (NEP) 32,
+       https://numpy.org/neps/nep-0032-remove-financial-functions.html
+
     """
     total = pmt(rate, nper, pv, fv, when)
     return total - ipmt(rate, per, nper, pv, fv, when)
 
 
 def _pv_dispatcher(rate, nper, pmt, fv=None, when=None):
+    warnings.warn(_depmsg.format(name='pv'),
+                  DeprecationWarning, stacklevel=3)
     return (rate, nper, nper, pv, fv)
 
 
@@ -464,6 +531,12 @@ def pv(rate, nper, pmt, fv=0, when='end'):
     """
     Compute the present value.
 
+    .. deprecated:: 1.18
+
+       `pv` is deprecated; for details, see NEP 32 [1]_.
+       Use the corresponding function in the numpy-financial library,
+       https://pypi.org/project/numpy-financial.
+
     Given:
      * a future value, `fv`
      * an interest `rate` compounded once per period, of which
@@ -510,7 +583,9 @@ def pv(rate, nper, pmt, fv=0, when='end'):
 
     References
     ----------
-    .. [WRW] Wheeler, D. A., E. Rathke, and R. Weir (Eds.) (2009, May).
+    .. [1] NumPy Enhancement Proposal (NEP) 32,
+       https://numpy.org/neps/nep-0032-remove-financial-functions.html
+    .. [2] Wheeler, D. A., E. Rathke, and R. Weir (Eds.) (2009, May).
        Open Document Format for Office Applications (OpenDocument)v1.2,
        Part 2: Recalculated Formula (OpenFormula) Format - Annotated Version,
        Pre-Draft 12. Organization for the Advancement of Structured Information
@@ -567,6 +642,8 @@ def _g_div_gp(r, n, p, x, y, w):
 
 def _rate_dispatcher(nper, pmt, pv, fv, when=None, guess=None, tol=None,
                      maxiter=None):
+    warnings.warn(_depmsg.format(name='rate'),
+                  DeprecationWarning, stacklevel=3)
     return (nper, pmt, pv, fv)
 
 
@@ -582,6 +659,12 @@ def rate(nper, pmt, pv, fv, when='end', guess=None, tol=None, maxiter=100):
     """
     Compute the rate of interest per period.
 
+    .. deprecated:: 1.18
+
+       `rate` is deprecated; for details, see NEP 32 [1]_.
+       Use the corresponding function in the numpy-financial library,
+       https://pypi.org/project/numpy-financial.
+
     Parameters
     ----------
     nper : array_like
@@ -612,13 +695,16 @@ def rate(nper, pmt, pv, fv, when='end', guess=None, tol=None, maxiter=100):
 
     References
     ----------
-    Wheeler, D. A., E. Rathke, and R. Weir (Eds.) (2009, May). Open Document
-    Format for Office Applications (OpenDocument)v1.2, Part 2: Recalculated
-    Formula (OpenFormula) Format - Annotated Version, Pre-Draft 12.
-    Organization for the Advancement of Structured Information Standards
-    (OASIS). Billerica, MA, USA. [ODT Document]. Available:
-    http://www.oasis-open.org/committees/documents.php?wg_abbrev=office-formula
-    OpenDocument-formula-20090508.odt
+    .. [1] NumPy Enhancement Proposal (NEP) 32,
+       https://numpy.org/neps/nep-0032-remove-financial-functions.html
+    .. [2] Wheeler, D. A., E. Rathke, and R. Weir (Eds.) (2009, May).
+       Open Document Format for Office Applications (OpenDocument)v1.2,
+       Part 2: Recalculated Formula (OpenFormula) Format - Annotated Version,
+       Pre-Draft 12. Organization for the Advancement of Structured Information
+       Standards (OASIS). Billerica, MA, USA. [ODT Document].
+       Available:
+       http://www.oasis-open.org/committees/documents.php?wg_abbrev=office-formula
+       OpenDocument-formula-20090508.odt
 
     """
     when = _convert_when(when)
@@ -651,6 +737,8 @@ def rate(nper, pmt, pv, fv, when='end', guess=None, tol=None, maxiter=100):
 
 
 def _irr_dispatcher(values):
+    warnings.warn(_depmsg.format(name='irr'),
+                  DeprecationWarning, stacklevel=3)
     return (values,)
 
 
@@ -659,6 +747,12 @@ def irr(values):
     """
     Return the Internal Rate of Return (IRR).
 
+    .. deprecated:: 1.18
+
+       `irr` is deprecated; for details, see NEP 32 [1]_.
+       Use the corresponding function in the numpy-financial library,
+       https://pypi.org/project/numpy-financial.
+
     This is the "average" periodically compounded rate of return
     that gives a net present value of 0.0; for a more complete explanation,
     see Notes below.
@@ -693,13 +787,15 @@ def irr(values):
      + \\frac{55}{(1+r)^3} + \\frac{20}{(1+r)^4} = 0
 
     In general, for `values` :math:`= [v_0, v_1, ... v_M]`,
-    irr is the solution of the equation: [G]_
+    irr is the solution of the equation: [2]_
 
     .. math:: \\sum_{t=0}^M{\\frac{v_t}{(1+irr)^{t}}} = 0
 
     References
     ----------
-    .. [G] L. J. Gitman, "Principles of Managerial Finance, Brief," 3rd ed.,
+    .. [1] NumPy Enhancement Proposal (NEP) 32,
+       https://numpy.org/neps/nep-0032-remove-financial-functions.html
+    .. [2] L. J. Gitman, "Principles of Managerial Finance, Brief," 3rd ed.,
        Addison-Wesley, 2003, pg. 348.
 
     Examples
@@ -734,6 +830,8 @@ def irr(values):
 
 
 def _npv_dispatcher(rate, values):
+    warnings.warn(_depmsg.format(name='npv'),
+                  DeprecationWarning, stacklevel=3)
     return (values,)
 
 
@@ -742,6 +840,12 @@ def npv(rate, values):
     """
     Returns the NPV (Net Present Value) of a cash flow series.
 
+    .. deprecated:: 1.18
+
+       `npv` is deprecated; for details, see NEP 32 [1]_.
+       Use the corresponding function in the numpy-financial library,
+       https://pypi.org/project/numpy-financial.
+
     Parameters
     ----------
     rate : scalar
@@ -772,13 +876,15 @@ def npv(rate, values):
 
     Notes
     -----
-    Returns the result of: [G]_
+    Returns the result of: [2]_
 
     .. math :: \\sum_{t=0}^{M-1}{\\frac{values_t}{(1+rate)^{t}}}
 
     References
     ----------
-    .. [G] L. J. Gitman, "Principles of Managerial Finance, Brief," 3rd ed.,
+    .. [1] NumPy Enhancement Proposal (NEP) 32,
+       https://numpy.org/neps/nep-0032-remove-financial-functions.html
+    .. [2] L. J. Gitman, "Principles of Managerial Finance, Brief," 3rd ed.,
        Addison-Wesley, 2003, pg. 346.
 
     Examples
@@ -808,6 +914,8 @@ def npv(rate, values):
 
 
 def _mirr_dispatcher(values, finance_rate, reinvest_rate):
+    warnings.warn(_depmsg.format(name='mirr'),
+                  DeprecationWarning, stacklevel=3)
     return (values,)
 
 
@@ -816,6 +924,12 @@ def mirr(values, finance_rate, reinvest_rate):
     """
     Modified internal rate of return.
 
+    .. deprecated:: 1.18
+
+       `mirr` is deprecated; for details, see NEP 32 [1]_.
+       Use the corresponding function in the numpy-financial library,
+       https://pypi.org/project/numpy-financial.
+
     Parameters
     ----------
     values : array_like
@@ -832,6 +946,10 @@ def mirr(values, finance_rate, reinvest_rate):
     out : float
         Modified internal rate of return
 
+    References
+    ----------
+    .. [1] NumPy Enhancement Proposal (NEP) 32,
+       https://numpy.org/neps/nep-0032-remove-financial-functions.html
     """
     values = np.asarray(values)
     n = values.size
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index c39c2eea1..3ad630a7d 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -1893,7 +1893,7 @@ class vectorize(object):
         typecode characters or a list of data type specifiers. There should
         be one data type specifier for each output.
     doc : str, optional
-        The docstring for the function. If `None`, the docstring will be the
+        The docstring for the function. If None, the docstring will be the
         ``pyfunc.__doc__``.
     excluded : set, optional
         Set of strings or integers representing the positional or keyword
diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py
index 6cffab6ac..18ccab3b8 100644
--- a/numpy/lib/nanfunctions.py
+++ b/numpy/lib/nanfunctions.py
@@ -244,8 +244,8 @@ def nanmin(a, axis=None, out=None, keepdims=np._NoValue):
     out : ndarray, optional
         Alternate output array in which to place the result.  The default
         is ``None``; if provided, it must have the same shape as the
-        expected output, but the type will be cast if necessary.  See
-        `doc.ufuncs` for details.
+        expected output, but the type will be cast if necessary. See
+        `ufuncs-output-type` for more details.
 
         .. versionadded:: 1.8.0
     keepdims : bool, optional
@@ -359,8 +359,8 @@ def nanmax(a, axis=None, out=None, keepdims=np._NoValue):
     out : ndarray, optional
         Alternate output array in which to place the result.  The default
         is ``None``; if provided, it must have the same shape as the
-        expected output, but the type will be cast if necessary.  See
-        `doc.ufuncs` for details.
+        expected output, but the type will be cast if necessary. See
+        `ufuncs-output-type` for more details.
 
         .. versionadded:: 1.8.0
     keepdims : bool, optional
@@ -585,8 +585,8 @@ def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
         Alternate output array in which to place the result.  The default
         is ``None``. If provided, it must have the same shape as the
         expected output, but the type will be cast if necessary.  See
-        `doc.ufuncs` for details. The casting of NaN to integer can yield
-        unexpected results.
+        `ufuncs-output-type` for more details. The casting of NaN to integer
+        can yield unexpected results.
 
         .. versionadded:: 1.8.0
     keepdims : bool, optional
@@ -681,9 +681,9 @@ def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
     out : ndarray, optional
         Alternate output array in which to place the result.  The default
         is ``None``. If provided, it must have the same shape as the
-        expected output, but the type will be cast if necessary.  See
-        `doc.ufuncs` for details. The casting of NaN to integer can yield
-        unexpected results.
+        expected output, but the type will be cast if necessary. See
+        `ufuncs-output-type` for more details. The casting of NaN to integer
+        can yield unexpected results.
     keepdims : bool, optional
         If True, the axes which are reduced are left in the result as
         dimensions with size one. With this option, the result will
@@ -750,8 +750,8 @@ def nancumsum(a, axis=None, dtype=None, out=None):
     out : ndarray, optional
         Alternative output array in which to place the result. It must
         have the same shape and buffer length as the expected output
-        but the type will be cast if necessary. See `doc.ufuncs`
-        (Section "Output arguments") for more details.
+        but the type will be cast if necessary. See `ufuncs-output-type` for
+        more details.
 
     Returns
     -------
@@ -888,8 +888,8 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
     out : ndarray, optional
         Alternate output array in which to place the result.  The default
         is ``None``; if provided, it must have the same shape as the
-        expected output, but the type will be cast if necessary.  See
-        `doc.ufuncs` for details.
+        expected output, but the type will be cast if necessary. See
+        `ufuncs-output-type` for more details.
     keepdims : bool, optional
         If this is set to True, the axes which are reduced are left
         in the result as dimensions with size one. With this option,
@@ -1473,7 +1473,7 @@ def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
     mean : Average
     var : Variance while not ignoring NaNs
     nanstd, nanmean
-    numpy.doc.ufuncs : Section "Output arguments"
+    ufuncs-output-type
 
     Notes
     -----
@@ -1625,7 +1625,7 @@ def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
     --------
     var, mean, std
     nanvar, nanmean
-    numpy.doc.ufuncs : Section "Output arguments"
+    ufuncs-output-type
 
     Notes
     -----
diff --git a/numpy/lib/polynomial.py b/numpy/lib/polynomial.py
index 2c72f623c..3d07a0de4 100644
--- a/numpy/lib/polynomial.py
+++ b/numpy/lib/polynomial.py
@@ -479,10 +479,10 @@ def polyfit(x, y, deg, rcond=None, full=False, w=None, cov=False):
         coefficients for `k`-th data set are in ``p[:,k]``.
 
     residuals, rank, singular_values, rcond
-        Present only if `full` = True.  Residuals of the least-squares fit,
-        the effective rank of the scaled Vandermonde coefficient matrix,
-        its singular values, and the specified value of `rcond`. For more
-        details, see `linalg.lstsq`.
+        Present only if `full` = True.  Residuals is sum of squared residuals
+        of the least-squares fit, the effective rank of the scaled Vandermonde
+        coefficient matrix, its singular values, and the specified value of
+        `rcond`. For more details, see `linalg.lstsq`.
 
     V : ndarray, shape (M,M) or (M,M,K)
         Present only if `full` = False and `cov`=True.  The covariance
diff --git a/numpy/lib/tests/test_financial.py b/numpy/lib/tests/test_financial.py
index 21088765f..cb67f7c0f 100644
--- a/numpy/lib/tests/test_financial.py
+++ b/numpy/lib/tests/test_financial.py
@@ -1,5 +1,6 @@
 from __future__ import division, absolute_import, print_function
 
+import warnings
 from decimal import Decimal
 
 import numpy as np
@@ -8,22 +9,35 @@ from numpy.testing import (
     )
 
 
+def filter_deprecation(func):
+    def newfunc(*args, **kwargs):
+        with warnings.catch_warnings(record=True) as ws:
+            warnings.filterwarnings('always', category=DeprecationWarning)
+            func(*args, **kwargs)
+            assert_(all(w.category is DeprecationWarning for w in ws))
+    return newfunc
+
+
 class TestFinancial(object):
+    @filter_deprecation
     def test_npv_irr_congruence(self):
         # IRR is defined as the rate required for the present value of a
         # a series of cashflows to be zero i.e. NPV(IRR(x), x) = 0
         cashflows = np.array([-40000, 5000, 8000, 12000, 30000])
         assert_allclose(np.npv(np.irr(cashflows), cashflows), 0, atol=1e-10, rtol=0)
 
+    @filter_deprecation
     def test_rate(self):
         assert_almost_equal(
             np.rate(10, 0, -3500, 10000),
             0.1107, 4)
 
+    @filter_deprecation
     def test_rate_decimal(self):
         rate = np.rate(Decimal('10'), Decimal('0'), Decimal('-3500'), Decimal('10000'))
         assert_equal(Decimal('0.1106908537142689284704528100'), rate)
 
+    @filter_deprecation
     def test_irr(self):
         v = [-150000, 15000, 25000, 35000, 45000, 60000]
         assert_almost_equal(np.irr(v), 0.0524, 2)
@@ -43,20 +57,25 @@ class TestFinancial(object):
         v = [-1, -2, -3]
         assert_equal(np.irr(v), np.nan)
 
+    @filter_deprecation
     def test_pv(self):
         assert_almost_equal(np.pv(0.07, 20, 12000, 0), -127128.17, 2)
 
+    @filter_deprecation
     def test_pv_decimal(self):
         assert_equal(np.pv(Decimal('0.07'), Decimal('20'), Decimal('12000'), Decimal('0')),
                      Decimal('-127128.1709461939327295222005'))
 
+    @filter_deprecation
     def test_fv(self):
         assert_equal(np.fv(0.075, 20, -2000, 0, 0), 86609.362673042924)
 
+    @filter_deprecation
     def test_fv_decimal(self):
         assert_equal(np.fv(Decimal('0.075'), Decimal('20'), Decimal('-2000'), 0, 0),
                      Decimal('86609.36267304300040536731624'))
 
+    @filter_deprecation
     def test_pmt(self):
         res = np.pmt(0.08 / 12, 5 * 12, 15000)
         tgt = -304.145914
@@ -71,6 +90,7 @@ class TestFinancial(object):
         tgt = np.array([[-166.66667, -19311.258], [-626.90814, -19311.258]])
         assert_allclose(res, tgt)
 
+    @filter_deprecation
     def test_pmt_decimal(self):
         res = np.pmt(Decimal('0.08') / Decimal('12'), 5 * 12, 15000)
         tgt = Decimal('-304.1459143262052370338701494')
@@ -94,18 +114,22 @@ class TestFinancial(object):
         assert_equal(res[1][0], tgt[1][0])
         assert_equal(res[1][1], tgt[1][1])
 
+    @filter_deprecation
     def test_ppmt(self):
         assert_equal(np.round(np.ppmt(0.1 / 12, 1, 60, 55000), 2), -710.25)
 
+    @filter_deprecation
     def test_ppmt_decimal(self):
         assert_equal(np.ppmt(Decimal('0.1') / Decimal('12'), Decimal('1'), Decimal('60'), Decimal('55000')),
                      Decimal('-710.2541257864217612489830917'))
 
     # Two tests showing how Decimal is actually getting at a more exact result
     # .23 / 12 does not come out nicely as a float but does as a decimal
+    @filter_deprecation
     def test_ppmt_special_rate(self):
         assert_equal(np.round(np.ppmt(0.23 / 12, 1, 60, 10000000000), 8), -90238044.232277036)
 
+    @filter_deprecation
     def test_ppmt_special_rate_decimal(self):
         # When rounded out to 8 decimal places like the float based test, this should not equal the same value
         # as the float, substituted for the decimal
@@ -118,31 +142,38 @@ class TestFinancial(object):
         assert_equal(np.ppmt(Decimal('0.23') / Decimal('12'), 1, 60, Decimal('10000000000')),
                      Decimal('-90238044.2322778884413969909'))
 
+    @filter_deprecation
     def test_ipmt(self):
         assert_almost_equal(np.round(np.ipmt(0.1 / 12, 1, 24, 2000), 2), -16.67)
 
+    @filter_deprecation
     def test_ipmt_decimal(self):
         result = np.ipmt(Decimal('0.1') / Decimal('12'), 1, 24, 2000)
         assert_equal(result.flat[0], Decimal('-16.66666666666666666666666667'))
 
+    @filter_deprecation
     def test_nper(self):
         assert_almost_equal(np.nper(0.075, -2000, 0, 100000.),
                             21.54, 2)
 
+    @filter_deprecation
     def test_nper2(self):
         assert_almost_equal(np.nper(0.0, -2000, 0, 100000.),
                             50.0, 1)
 
+    @filter_deprecation
     def test_npv(self):
         assert_almost_equal(
             np.npv(0.05, [-15000, 1500, 2500, 3500, 4500, 6000]),
             122.89, 2)
 
+    @filter_deprecation
     def test_npv_decimal(self):
         assert_equal(
             np.npv(Decimal('0.05'), [-15000, 1500, 2500, 3500, 4500, 6000]),
             Decimal('122.894854950942692161628715'))
 
+    @filter_deprecation
     def test_mirr(self):
         val = [-4500, -800, 800, 800, 600, 600, 800, 800, 700, 3000]
         assert_almost_equal(np.mirr(val, 0.08, 0.055), 0.0666, 4)
@@ -156,6 +187,7 @@ class TestFinancial(object):
         val = [39000, 30000, 21000, 37000, 46000]
         assert_(np.isnan(np.mirr(val, 0.10, 0.12)))
 
+    @filter_deprecation
     def test_mirr_decimal(self):
         val = [Decimal('-4500'), Decimal('-800'), Decimal('800'), Decimal('800'),
                Decimal('600'), Decimal('600'), Decimal('800'), Decimal('800'),
@@ -174,6 +206,7 @@ class TestFinancial(object):
         val = [Decimal('39000'), Decimal('30000'), Decimal('21000'), Decimal('37000'), Decimal('46000')]
         assert_(np.isnan(np.mirr(val, Decimal('0.10'), Decimal('0.12'))))
 
+    @filter_deprecation
     def test_when(self):
         # begin
         assert_equal(np.rate(10, 20, -3500, 10000, 1),
@@ -238,6 +271,7 @@ class TestFinancial(object):
         assert_equal(np.nper(0.075, -2000, 0, 100000., 0),
                      np.nper(0.075, -2000, 0, 100000., 'end'))
 
+    @filter_deprecation
     def test_decimal_with_when(self):
         """Test that decimals are still supported if the when argument is passed"""
         # begin
@@ -312,6 +346,7 @@ class TestFinancial(object):
                      np.ipmt(Decimal('0.1') / Decimal('12'), Decimal('1'), Decimal('24'), Decimal('2000'),
                              Decimal('0'), 'end').flat[0])
 
+    @filter_deprecation
     def test_broadcast(self):
         assert_almost_equal(np.nper(0.075, -2000, 0, 100000., [0, 1]),
                             [21.5449442, 20.76156441], 4)
@@ -329,6 +364,7 @@ class TestFinancial(object):
                             [-74.998201, -75.62318601, -75.62318601,
                              -76.88882405, -76.88882405], 4)
 
+    @filter_deprecation
     def test_broadcast_decimal(self):
         # Use almost equal because precision is tested in the explicit tests, this test is to ensure
         # broadcast with Decimal is not broken.
diff --git a/numpy/ma/core.py b/numpy/ma/core.py
index bb3788c9a..bb0d8d412 100644
--- a/numpy/ma/core.py
+++ b/numpy/ma/core.py
@@ -4394,7 +4394,7 @@ class MaskedArray(ndarray):
         ----------
         axis : None or int or tuple of ints, optional
             Axis or axes along which the count is performed.
-            The default (`axis` = `None`) performs the count over all
+            The default, None, performs the count over all
             the dimensions of the input array. `axis` may be negative, in
             which case it counts from the last to the first axis.
 
@@ -4774,7 +4774,7 @@ class MaskedArray(ndarray):
 
         See Also
         --------
-        ndarray.all : corresponding function for ndarrays
+        numpy.ndarray.all : corresponding function for ndarrays
         numpy.all : equivalent function
 
         Examples
@@ -4812,7 +4812,7 @@ class MaskedArray(ndarray):
 
         See Also
         --------
-        ndarray.any : corresponding function for ndarrays
+        numpy.ndarray.any : corresponding function for ndarrays
         numpy.any : equivalent function
 
         """
@@ -4866,7 +4866,7 @@ class MaskedArray(ndarray):
         flatnonzero :
             Return indices that are non-zero in the flattened version of the input
             array.
-        ndarray.nonzero :
+        numpy.ndarray.nonzero :
             Equivalent ndarray method.
         count_nonzero :
             Counts the number of non-zero elements in the input array.
@@ -4994,7 +4994,7 @@ class MaskedArray(ndarray):
 
         See Also
         --------
-        ndarray.sum : corresponding function for ndarrays
+        numpy.ndarray.sum : corresponding function for ndarrays
         numpy.sum : equivalent function
 
         Examples
@@ -5065,7 +5065,7 @@ class MaskedArray(ndarray):
 
         See Also
         --------
-        ndarray.cumsum : corresponding function for ndarrays
+        numpy.ndarray.cumsum : corresponding function for ndarrays
         numpy.cumsum : equivalent function
 
         Examples
@@ -5102,7 +5102,7 @@ class MaskedArray(ndarray):
 
         See Also
         --------
-        ndarray.prod : corresponding function for ndarrays
+        numpy.ndarray.prod : corresponding function for ndarrays
         numpy.prod : equivalent function
         """
         kwargs = {} if keepdims is np._NoValue else {'keepdims': keepdims}
@@ -5148,7 +5148,7 @@ class MaskedArray(ndarray):
 
         See Also
         --------
-        ndarray.cumprod : corresponding function for ndarrays
+        numpy.ndarray.cumprod : corresponding function for ndarrays
         numpy.cumprod : equivalent function
         """
         result = self.filled(1).cumprod(axis=axis, dtype=dtype, out=out)
@@ -5171,7 +5171,7 @@ class MaskedArray(ndarray):
 
         See Also
         --------
-        ndarray.mean : corresponding function for ndarrays
+        numpy.ndarray.mean : corresponding function for ndarrays
         numpy.mean : Equivalent function
         numpy.ma.average: Weighted average.
 
@@ -5260,7 +5260,7 @@ class MaskedArray(ndarray):
 
         See Also
         --------
-        ndarray.var : corresponding function for ndarrays
+        numpy.ndarray.var : corresponding function for ndarrays
         numpy.var : Equivalent function
         """
         kwargs = {} if keepdims is np._NoValue else {'keepdims': keepdims}
@@ -5323,7 +5323,7 @@ class MaskedArray(ndarray):
 
         See Also
         --------
-        ndarray.std : corresponding function for ndarrays
+        numpy.ndarray.std : corresponding function for ndarrays
         numpy.std : Equivalent function
         """
         kwargs = {} if keepdims is np._NoValue else {'keepdims': keepdims}
@@ -5344,7 +5344,7 @@ class MaskedArray(ndarray):
 
         See Also
         --------
-        ndarray.around : corresponding function for ndarrays
+        numpy.ndarray.around : corresponding function for ndarrays
         numpy.around : equivalent function
         """
         result = self._data.round(decimals=decimals, out=out).view(type(self))
@@ -5406,7 +5406,7 @@ class MaskedArray(ndarray):
         --------
         MaskedArray.sort : Describes sorting algorithms used.
         lexsort : Indirect stable sort with multiple keys.
-        ndarray.sort : Inplace sort.
+        numpy.ndarray.sort : Inplace sort.
 
         Notes
         -----
@@ -5558,7 +5558,7 @@ class MaskedArray(ndarray):
 
         See Also
         --------
-        ndarray.sort : Method to sort an array in-place.
+        numpy.ndarray.sort : Method to sort an array in-place.
         argsort : Indirect sort.
         lexsort : Indirect stable sort on multiple keys.
         searchsorted : Find elements in a sorted array.
@@ -5978,7 +5978,7 @@ class MaskedArray(ndarray):
 
         See Also
         --------
-        ndarray.tobytes
+        numpy.ndarray.tobytes
         tolist, tofile
 
         Notes
diff --git a/numpy/ma/extras.py b/numpy/ma/extras.py
index de1aa3af8..4a83ac781 100644
--- a/numpy/ma/extras.py
+++ b/numpy/ma/extras.py
@@ -542,7 +542,7 @@ def average(a, axis=None, weights=None, returned=False):
         Data to be averaged.
         Masked entries are not taken into account in the computation.
     axis : int, optional
-        Axis along which to average `a`. If `None`, averaging is done over
+        Axis along which to average `a`. If None, averaging is done over
         the flattened array.
     weights : array_like, optional
         The importance that each element has in the computation of the average.
diff --git a/numpy/matlib.py b/numpy/matlib.py
index 604ef470b..b1b155586 100644
--- a/numpy/matlib.py
+++ b/numpy/matlib.py
@@ -239,7 +239,7 @@ def rand(*args):
 
     See Also
     --------
-    randn, numpy.random.rand
+    randn, numpy.random.RandomState.rand
 
     Examples
     --------
@@ -285,7 +285,7 @@ def randn(*args):
 
     See Also
     --------
-    rand, random.randn
+    rand, numpy.random.RandomState.randn
 
     Notes
     -----
diff --git a/numpy/matrixlib/defmatrix.py b/numpy/matrixlib/defmatrix.py
index 3c7e8ffc2..cabd41367 100644
--- a/numpy/matrixlib/defmatrix.py
+++ b/numpy/matrixlib/defmatrix.py
@@ -1046,7 +1046,7 @@ def bmat(obj, ldict=None, gdict=None):
         referenced by name.
     ldict : dict, optional
         A dictionary that replaces local operands in current frame.
-        Ignored if `obj` is not a string or `gdict` is `None`.
+        Ignored if `obj` is not a string or `gdict` is None.
     gdict : dict, optional
         A dictionary that replaces global operands in current frame.
         Ignored if `obj` is not a string.
diff --git a/numpy/polynomial/chebyshev.py b/numpy/polynomial/chebyshev.py
index 093eb0048..0cd9c4d23 100644
--- a/numpy/polynomial/chebyshev.py
+++ b/numpy/polynomial/chebyshev.py
@@ -1468,7 +1468,7 @@ def chebvander2d(x, y, deg):
     .. versionadded:: 1.7.0
 
     """
-    return pu._vander2d(chebvander, x, y, deg)
+    return pu._vander_nd_flat((chebvander, chebvander), (x, y), deg)
 
 
 def chebvander3d(x, y, z, deg):
@@ -1522,7 +1522,7 @@ def chebvander3d(x, y, z, deg):
     .. versionadded:: 1.7.0
 
     """
-    return pu._vander3d(chebvander, x, y, z, deg)
+    return pu._vander_nd_flat((chebvander, chebvander, chebvander), (x, y, z), deg)
 
 
 def chebfit(x, y, deg, rcond=None, full=False, w=None):
diff --git a/numpy/polynomial/hermite.py b/numpy/polynomial/hermite.py
index 0011fa3b7..9b1aea239 100644
--- a/numpy/polynomial/hermite.py
+++ b/numpy/polynomial/hermite.py
@@ -1193,7 +1193,7 @@ def hermvander2d(x, y, deg):
     .. versionadded:: 1.7.0
 
     """
-    return pu._vander2d(hermvander, x, y, deg)
+    return pu._vander_nd_flat((hermvander, hermvander), (x, y), deg)
 
 
 def hermvander3d(x, y, z, deg):
@@ -1247,7 +1247,7 @@ def hermvander3d(x, y, z, deg):
     .. versionadded:: 1.7.0
 
     """
-    return pu._vander3d(hermvander, x, y, z, deg)
+    return pu._vander_nd_flat((hermvander, hermvander, hermvander), (x, y, z), deg)
 
 
 def hermfit(x, y, deg, rcond=None, full=False, w=None):
diff --git a/numpy/polynomial/hermite_e.py b/numpy/polynomial/hermite_e.py
index b1cc2d3ab..c5a0a05a2 100644
--- a/numpy/polynomial/hermite_e.py
+++ b/numpy/polynomial/hermite_e.py
@@ -1186,7 +1186,7 @@ def hermevander2d(x, y, deg):
     .. versionadded:: 1.7.0
 
     """
-    return pu._vander2d(hermevander, x, y, deg)
+    return pu._vander_nd_flat((hermevander, hermevander), (x, y), deg)
 
 
 def hermevander3d(x, y, z, deg):
@@ -1240,7 +1240,7 @@ def hermevander3d(x, y, z, deg):
     .. versionadded:: 1.7.0
 
     """
-    return pu._vander3d(hermevander, x, y, z, deg)
+    return pu._vander_nd_flat((hermevander, hermevander, hermevander), (x, y, z), deg)
 
 
 def hermefit(x, y, deg, rcond=None, full=False, w=None):
diff --git a/numpy/polynomial/laguerre.py b/numpy/polynomial/laguerre.py
index 7e7e45ca1..538a1d449 100644
--- a/numpy/polynomial/laguerre.py
+++ b/numpy/polynomial/laguerre.py
@@ -1193,7 +1193,7 @@ def lagvander2d(x, y, deg):
     .. versionadded:: 1.7.0
 
     """
-    return pu._vander2d(lagvander, x, y, deg)
+    return pu._vander_nd_flat((lagvander, lagvander), (x, y), deg)
 
 
 def lagvander3d(x, y, z, deg):
@@ -1247,7 +1247,7 @@ def lagvander3d(x, y, z, deg):
     .. versionadded:: 1.7.0
 
     """
-    return pu._vander3d(lagvander, x, y, z, deg)
+    return pu._vander_nd_flat((lagvander, lagvander, lagvander), (x, y, z), deg)
 
 
 def lagfit(x, y, deg, rcond=None, full=False, w=None):
diff --git a/numpy/polynomial/legendre.py b/numpy/polynomial/legendre.py
index 281982d0b..c11824761 100644
--- a/numpy/polynomial/legendre.py
+++ b/numpy/polynomial/legendre.py
@@ -1229,7 +1229,7 @@ def legvander2d(x, y, deg):
     .. versionadded:: 1.7.0
 
     """
-    return pu._vander2d(legvander, x, y, deg)
+    return pu._vander_nd_flat((legvander, legvander), (x, y), deg)
 
 
 def legvander3d(x, y, z, deg):
@@ -1283,7 +1283,7 @@ def legvander3d(x, y, z, deg):
     .. versionadded:: 1.7.0
 
     """
-    return pu._vander3d(legvander, x, y, z, deg)
+    return pu._vander_nd_flat((legvander, legvander, legvander), (x, y, z), deg)
 
 
 def legfit(x, y, deg, rcond=None, full=False, w=None):
diff --git a/numpy/polynomial/polynomial.py b/numpy/polynomial/polynomial.py
index 3f0a902cf..315ea1495 100644
--- a/numpy/polynomial/polynomial.py
+++ b/numpy/polynomial/polynomial.py
@@ -1133,7 +1133,7 @@ def polyvander2d(x, y, deg):
     polyvander, polyvander3d, polyval2d, polyval3d
 
     """
-    return pu._vander2d(polyvander, x, y, deg)
+    return pu._vander_nd_flat((polyvander, polyvander), (x, y), deg)
 
 
 def polyvander3d(x, y, z, deg):
@@ -1187,7 +1187,7 @@ def polyvander3d(x, y, z, deg):
     .. versionadded:: 1.7.0
 
     """
-    return pu._vander3d(polyvander, x, y, z, deg)
+    return pu._vander_nd_flat((polyvander, polyvander, polyvander), (x, y, z), deg)
 
 
 def polyfit(x, y, deg, rcond=None, full=False, w=None):
diff --git a/numpy/polynomial/polyutils.py b/numpy/polynomial/polyutils.py
index 35b24d1ab..5dcfa7a7a 100644
--- a/numpy/polynomial/polyutils.py
+++ b/numpy/polynomial/polyutils.py
@@ -46,6 +46,7 @@ Functions
 from __future__ import division, absolute_import, print_function
 
 import operator
+import functools
 import warnings
 
 import numpy as np
@@ -415,45 +416,89 @@ def mapdomain(x, old, new):
     return off + scl*x
 
 
-def _vander2d(vander_f, x, y, deg):
-    """
-    Helper function used to implement the ``<type>vander2d`` functions.
+def _nth_slice(i, ndim):
+    sl = [np.newaxis] * ndim
+    sl[i] = slice(None)
+    return tuple(sl)
+
+
+def _vander_nd(vander_fs, points, degrees):
+    r"""
+    A generalization of the Vandermonde matrix for N dimensions
+
+    The result is built by combining the results of 1d Vandermonde matrices,
+
+    .. math::
+        W[i_0, \ldots, i_M, j_0, \ldots, j_N] = \prod_{k=0}^N{V_k(x_k)[i_0, \ldots, i_M, j_k]}
+
+    where
+
+    .. math::
+        N &= \texttt{len(points)} = \texttt{len(degrees)} = \texttt{len(vander\_fs)} \\
+        M &= \texttt{points[k].ndim} \\
+        V_k &= \texttt{vander\_fs[k]} \\
+        x_k &= \texttt{points[k]} \\
+        0 \le j_k &\le \texttt{degrees[k]}
+
+    Expanding the one-dimensional :math:`V_k` functions gives:
+
+    .. math::
+        W[i_0, \ldots, i_M, j_0, \ldots, j_N] = \prod_{k=0}^N{B_{k, j_k}(x_k[i_0, \ldots, i_M])}
+
+    where :math:`B_{k,m}` is the m'th basis of the polynomial construction used along
+    dimension :math:`k`. For a regular polynomial, :math:`B_{k, m}(x) = P_m(x) = x^m`.
 
     Parameters
     ----------
-    vander_f : function(array_like, int) -> ndarray
-        The 1d vander function, such as ``polyvander``
-    x, y, deg :
-        See the ``<type>vander2d`` functions for more detail
+    vander_fs : Sequence[function(array_like, int) -> ndarray]
+        The 1d vander function to use for each axis, such as ``polyvander``
+    points : Sequence[array_like]
+        Arrays of point coordinates, all of the same shape. The dtypes
+        will be converted to either float64 or complex128 depending on
+        whether any of the elements are complex. Scalars are converted to
+        1-D arrays.
+        This must be the same length as `vander_fs`.
+    degrees : Sequence[int]
+        The maximum degree (inclusive) to use for each axis.
+        This must be the same length as `vander_fs`.
+
+    Returns
+    -------
+    vander_nd : ndarray
+        An array of shape ``points[0].shape + tuple(d + 1 for d in degrees)``.
     """
-    degx, degy = deg
-    x, y = np.array((x, y), copy=False) + 0.0
+    n_dims = len(vander_fs)
+    if n_dims != len(points):
+        raise ValueError(
+            "Expected {} dimensions of sample points, got {}".format(n_dims, len(points)))
+    if n_dims != len(degrees):
+        raise ValueError(
+            "Expected {} dimensions of degrees, got {}".format(n_dims, len(degrees)))
+    if n_dims == 0:
+        raise ValueError("Unable to guess a dtype or shape when no points are given")
+
+    # convert to the same shape and type
+    points = tuple(np.array(tuple(points), copy=False) + 0.0)
 
-    vx = vander_f(x, degx)
-    vy = vander_f(y, degy)
-    v = vx[..., None]*vy[..., None,:]
-    return v.reshape(v.shape[:-2] + (-1,))
+    # produce the vandermonde matrix for each dimension, placing the last
+    # axis of each in an independent trailing axis of the output
+    vander_arrays = (
+        vander_fs[i](points[i], degrees[i])[(...,) + _nth_slice(i, n_dims)]
+        for i in range(n_dims)
+    )
 
+    # we checked this wasn't empty already, so no `initial` needed
+    return functools.reduce(operator.mul, vander_arrays)
 
-def _vander3d(vander_f, x, y, z, deg):
+
+def _vander_nd_flat(vander_fs, points, degrees):
     """
-    Helper function used to implement the ``<type>vander3d`` functions.
+    Like `_vander_nd`, but flattens the last ``len(degrees)`` axes into a single axis
 
-    Parameters
-    ----------
-    vander_f : function(array_like, int) -> ndarray
-        The 1d vander function, such as ``polyvander``
-    x, y, z, deg :
-        See the ``<type>vander3d`` functions for more detail
+    Used to implement the public ``<type>vander<n>d`` functions.
     """
-    degx, degy, degz = deg
-    x, y, z = np.array((x, y, z), copy=False) + 0.0
-
-    vx = vander_f(x, degx)
-    vy = vander_f(y, degy)
-    vz = vander_f(z, degz)
-    v = vx[..., None, None]*vy[..., None,:, None]*vz[..., None, None,:]
-    return v.reshape(v.shape[:-3] + (-1,))
+    v = _vander_nd(vander_fs, points, degrees)
+    return v.reshape(v.shape[:-len(degrees)] + (-1,))
 
 
 def _fromroots(line_f, mul_f, roots):
diff --git a/numpy/random/__init__.py b/numpy/random/__init__.py
index f7c248451..1ceb5c4dd 100644
--- a/numpy/random/__init__.py
+++ b/numpy/random/__init__.py
@@ -179,20 +179,19 @@ __all__ = [
 
 # add these for module-freeze analysis (like PyInstaller)
 from . import _pickle
-from . import common
-from . import bounded_integers
-
+from . import _common
+from . import _bounded_integers
+
+from ._generator import Generator, default_rng
+from ._bit_generator import SeedSequence, BitGenerator
+from ._mt19937 import MT19937
+from ._pcg64 import PCG64
+from ._philox import Philox
+from ._sfc64 import SFC64
 from .mtrand import *
-from .generator import Generator, default_rng
-from .bit_generator import SeedSequence
-from .mt19937 import MT19937
-from .pcg64 import PCG64
-from .philox import Philox
-from .sfc64 import SFC64
-from .mtrand import RandomState
 
 __all__ += ['Generator', 'RandomState', 'SeedSequence', 'MT19937',
-            'Philox', 'PCG64', 'SFC64', 'default_rng']
+            'Philox', 'PCG64', 'SFC64', 'default_rng', 'BitGenerator']
 
 
 def __RandomState_ctor():
diff --git a/numpy/random/bit_generator.pxd b/numpy/random/_bit_generator.pxd
index 984033f17..30fa4a27d 100644
--- a/numpy/random/bit_generator.pxd
+++ b/numpy/random/_bit_generator.pxd
@@ -1,6 +1,15 @@
-
-from .common cimport bitgen_t, uint32_t
 cimport numpy as np
+from libc.stdint cimport uint32_t, uint64_t
+
+cdef extern from "include/bitgen.h":
+    struct bitgen:
+        void *state
+        uint64_t (*next_uint64)(void *st) nogil
+        uint32_t (*next_uint32)(void *st) nogil
+        double (*next_double)(void *st) nogil
+        uint64_t (*next_raw)(void *st) nogil
+
+    ctypedef bitgen bitgen_t
 
 cdef class BitGenerator():
     cdef readonly object _seed_seq
diff --git a/numpy/random/bit_generator.pyx b/numpy/random/_bit_generator.pyx
index eb608af6c..21d21e6bb 100644
--- a/numpy/random/bit_generator.pyx
+++ b/numpy/random/_bit_generator.pyx
@@ -53,9 +53,7 @@ from cpython.pycapsule cimport PyCapsule_New
 import numpy as np
 cimport numpy as np
 
-from libc.stdint cimport uint32_t
-from .common cimport (random_raw, benchmark, prepare_ctypes, prepare_cffi)
-from .distributions cimport bitgen_t
+from ._common cimport (random_raw, benchmark, prepare_ctypes, prepare_cffi)
 
 __all__ = ['SeedSequence', 'BitGenerator']
 
@@ -116,7 +114,7 @@ def _coerce_to_uint32_array(x):
     Examples
     --------
     >>> import numpy as np
-    >>> from numpy.random.bit_generator import _coerce_to_uint32_array
+    >>> from numpy.random._bit_generator import _coerce_to_uint32_array
     >>> _coerce_to_uint32_array(12345)
     array([12345], dtype=uint32)
     >>> _coerce_to_uint32_array('12345')
@@ -484,13 +482,12 @@ cdef class BitGenerator():
 
     Parameters
     ----------
-    seed : {None, int, array_like[ints], ISeedSequence}, optional
+    seed : {None, int, array_like[ints], SeedSequence}, optional
         A seed to initialize the `BitGenerator`. If None, then fresh,
         unpredictable entropy will be pulled from the OS. If an ``int`` or
         ``array_like[ints]`` is passed, then it will be passed to
-        `SeedSequence` to derive the initial `BitGenerator` state. One may also
-        pass in an implementor of the `ISeedSequence` interface like
-        `SeedSequence`.
+        ~`numpy.random.SeedSequence` to derive the initial `BitGenerator` state.
+        One may also pass in a `SeedSequence` instance.
 
     Attributes
     ----------
diff --git a/numpy/random/_bounded_integers.pxd b/numpy/random/_bounded_integers.pxd
new file mode 100644
index 000000000..d3ee97a70
--- /dev/null
+++ b/numpy/random/_bounded_integers.pxd
@@ -0,0 +1,29 @@
+from libc.stdint cimport (uint8_t, uint16_t, uint32_t, uint64_t,
+                          int8_t, int16_t, int32_t, int64_t, intptr_t)
+import numpy as np
+cimport numpy as np
+ctypedef np.npy_bool bool_t
+
+from ._bit_generator cimport bitgen_t
+
+cdef inline uint64_t _gen_mask(uint64_t max_val) nogil:
+    """Mask generator for use in bounded random numbers"""
+    # Smallest bit mask >= max
+    cdef uint64_t mask = max_val
+    mask |= mask >> 1
+    mask |= mask >> 2
+    mask |= mask >> 4
+    mask |= mask >> 8
+    mask |= mask >> 16
+    mask |= mask >> 32
+    return mask
+
+cdef object _rand_uint64(object low, object high, object size, bint use_masked, bint closed, bitgen_t *state, object lock)
+cdef object _rand_uint32(object low, object high, object size, bint use_masked, bint closed, bitgen_t *state, object lock)
+cdef object _rand_uint16(object low, object high, object size, bint use_masked, bint closed, bitgen_t *state, object lock)
+cdef object _rand_uint8(object low, object high, object size, bint use_masked, bint closed, bitgen_t *state, object lock)
+cdef object _rand_bool(object low, object high, object size, bint use_masked, bint closed, bitgen_t *state, object lock)
+cdef object _rand_int64(object low, object high, object size, bint use_masked, bint closed, bitgen_t *state, object lock)
+cdef object _rand_int32(object low, object high, object size, bint use_masked, bint closed, bitgen_t *state, object lock)
+cdef object _rand_int16(object low, object high, object size, bint use_masked, bint closed, bitgen_t *state, object lock)
+cdef object _rand_int8(object low, object high, object size, bint use_masked, bint closed, bitgen_t *state, object lock)
diff --git a/numpy/random/bounded_integers.pxd.in b/numpy/random/_bounded_integers.pxd.in
index 7a3f224dc..320d35774 100644
--- a/numpy/random/bounded_integers.pxd.in
+++ b/numpy/random/_bounded_integers.pxd.in
@@ -4,7 +4,7 @@ import numpy as np
 cimport numpy as np
 ctypedef np.npy_bool bool_t
 
-from .common cimport bitgen_t
+from ._bit_generator cimport bitgen_t
 
 cdef inline uint64_t _gen_mask(uint64_t max_val) nogil:
     """Mask generator for use in bounded random numbers"""
diff --git a/numpy/random/_bounded_integers.pyx b/numpy/random/_bounded_integers.pyx
new file mode 100644
index 000000000..d6a534b43
--- /dev/null
+++ b/numpy/random/_bounded_integers.pyx
@@ -0,0 +1,1564 @@
+#!python
+#cython: wraparound=False, nonecheck=False, boundscheck=False, cdivision=True
+
+import numpy as np
+cimport numpy as np
+
+__all__ = []
+
+np.import_array()
+
+cdef extern from "include/distributions.h":
+    # Generate random numbers in closed interval [off, off + rng].
+    uint64_t random_bounded_uint64(bitgen_t *bitgen_state,
+                                   uint64_t off, uint64_t rng,
+                                   uint64_t mask, bint use_masked) nogil
+    uint32_t random_buffered_bounded_uint32(bitgen_t *bitgen_state,
+                                            uint32_t off, uint32_t rng,
+                                            uint32_t mask, bint use_masked,
+                                            int *bcnt, uint32_t *buf) nogil
+    uint16_t random_buffered_bounded_uint16(bitgen_t *bitgen_state,
+                                            uint16_t off, uint16_t rng,
+                                            uint16_t mask, bint use_masked,
+                                            int *bcnt, uint32_t *buf) nogil
+    uint8_t random_buffered_bounded_uint8(bitgen_t *bitgen_state,
+                                          uint8_t off, uint8_t rng,
+                                          uint8_t mask, bint use_masked,
+                                          int *bcnt, uint32_t *buf) nogil
+    np.npy_bool random_buffered_bounded_bool(bitgen_t *bitgen_state,
+                                             np.npy_bool off, np.npy_bool rng,
+                                             np.npy_bool mask, bint use_masked,
+                                             int *bcnt, uint32_t *buf) nogil
+    void random_bounded_uint64_fill(bitgen_t *bitgen_state,
+                                    uint64_t off, uint64_t rng, np.npy_intp cnt,
+                                    bint use_masked,
+                                    uint64_t *out) nogil
+    void random_bounded_uint32_fill(bitgen_t *bitgen_state,
+                                    uint32_t off, uint32_t rng, np.npy_intp cnt,
+                                    bint use_masked,
+                                    uint32_t *out) nogil
+    void random_bounded_uint16_fill(bitgen_t *bitgen_state,
+                                    uint16_t off, uint16_t rng, np.npy_intp cnt,
+                                    bint use_masked,
+                                    uint16_t *out) nogil
+    void random_bounded_uint8_fill(bitgen_t *bitgen_state,
+                                   uint8_t off, uint8_t rng, np.npy_intp cnt,
+                                   bint use_masked,
+                                   uint8_t *out) nogil
+    void random_bounded_bool_fill(bitgen_t *bitgen_state,
+                                  np.npy_bool off, np.npy_bool rng, np.npy_intp cnt,
+                                  bint use_masked,
+                                  np.npy_bool *out) nogil
+
+
+
+_integers_types = {'bool': (0, 2),
+                 'int8': (-2**7, 2**7),
+                 'int16': (-2**15, 2**15),
+                 'int32': (-2**31, 2**31),
+                 'int64': (-2**63, 2**63),
+                 'uint8': (0, 2**8),
+                 'uint16': (0, 2**16),
+                 'uint32': (0, 2**32),
+                 'uint64': (0, 2**64)}
+
+
+cdef object _rand_uint32_broadcast(np.ndarray low, np.ndarray high, object size,
+                                       bint use_masked, bint closed,
+                                       bitgen_t *state, object lock):
+    """
+    Array path for smaller integer types
+
+    This path is simpler since the high value in the open interval [low, high)
+    must be in-range for the next larger type, uint64. Here we case to
+    this type for checking and the recast to uint32 when producing the
+    random integers.
+    """
+    cdef uint32_t rng, last_rng, off, val, mask, out_val, is_open
+    cdef uint32_t buf
+    cdef uint32_t *out_data
+    cdef uint64_t low_v, high_v
+    cdef np.ndarray low_arr, high_arr, out_arr
+    cdef np.npy_intp i, cnt
+    cdef np.broadcast it
+    cdef int buf_rem = 0
+
+    # Array path
+    is_open = not closed
+    low_arr = <np.ndarray>low
+    high_arr = <np.ndarray>high
+    if np.any(np.less(low_arr, 0)):
+        raise ValueError('low is out of bounds for uint32')
+    if closed:
+        high_comp = np.greater_equal
+        low_high_comp = np.greater
+    else:
+        high_comp = np.greater
+        low_high_comp = np.greater_equal
+
+    if np.any(high_comp(high_arr, 0X100000000ULL)):
+        raise ValueError('high is out of bounds for uint32')
+    if np.any(low_high_comp(low_arr, high_arr)):
+        comp = '>' if closed else '>='
+        raise ValueError('low {comp} high'.format(comp=comp))
+
+    low_arr = <np.ndarray>np.PyArray_FROM_OTF(low, np.NPY_UINT64, np.NPY_ALIGNED | np.NPY_FORCECAST)
+    high_arr = <np.ndarray>np.PyArray_FROM_OTF(high, np.NPY_UINT64, np.NPY_ALIGNED | np.NPY_FORCECAST)
+
+    if size is not None:
+        out_arr = <np.ndarray>np.empty(size, np.uint32)
+    else:
+        it = np.PyArray_MultiIterNew2(low_arr, high_arr)
+        out_arr = <np.ndarray>np.empty(it.shape, np.uint32)
+
+    it = np.PyArray_MultiIterNew3(low_arr, high_arr, out_arr)
+    out_data = <uint32_t *>np.PyArray_DATA(out_arr)
+    cnt = np.PyArray_SIZE(out_arr)
+    mask = last_rng = 0
+    with lock, nogil:
+        for i in range(cnt):
+            low_v = (<uint64_t*>np.PyArray_MultiIter_DATA(it, 0))[0]
+            high_v = (<uint64_t*>np.PyArray_MultiIter_DATA(it, 1))[0]
+            # Subtract 1 since generator produces values on the closed int [off, off+rng]
+            rng = <uint32_t>((high_v - is_open) - low_v)
+            off = <uint32_t>(<uint64_t>low_v)
+
+            if rng != last_rng:
+                # Smallest bit mask >= max
+                mask = <uint32_t>_gen_mask(rng)
+
+            out_data[i] = random_buffered_bounded_uint32(state, off, rng, mask, use_masked, &buf_rem, &buf)
+
+            np.PyArray_MultiIter_NEXT(it)
+    return out_arr
+
+cdef object _rand_uint16_broadcast(np.ndarray low, np.ndarray high, object size,
+                                       bint use_masked, bint closed,
+                                       bitgen_t *state, object lock):
+    """
+    Array path for smaller integer types
+
+    This path is simpler since the high value in the open interval [low, high)
+    must be in-range for the next larger type, uint32. Here we case to
+    this type for checking and the recast to uint16 when producing the
+    random integers.
+    """
+    cdef uint16_t rng, last_rng, off, val, mask, out_val, is_open
+    cdef uint32_t buf
+    cdef uint16_t *out_data
+    cdef uint32_t low_v, high_v
+    cdef np.ndarray low_arr, high_arr, out_arr
+    cdef np.npy_intp i, cnt
+    cdef np.broadcast it
+    cdef int buf_rem = 0
+
+    # Array path
+    is_open = not closed
+    low_arr = <np.ndarray>low
+    high_arr = <np.ndarray>high
+    if np.any(np.less(low_arr, 0)):
+        raise ValueError('low is out of bounds for uint16')
+    if closed:
+        high_comp = np.greater_equal
+        low_high_comp = np.greater
+    else:
+        high_comp = np.greater
+        low_high_comp = np.greater_equal
+
+    if np.any(high_comp(high_arr, 0X10000UL)):
+        raise ValueError('high is out of bounds for uint16')
+    if np.any(low_high_comp(low_arr, high_arr)):
+        comp = '>' if closed else '>='
+        raise ValueError('low {comp} high'.format(comp=comp))
+
+    low_arr = <np.ndarray>np.PyArray_FROM_OTF(low, np.NPY_UINT32, np.NPY_ALIGNED | np.NPY_FORCECAST)
+    high_arr = <np.ndarray>np.PyArray_FROM_OTF(high, np.NPY_UINT32, np.NPY_ALIGNED | np.NPY_FORCECAST)
+
+    if size is not None:
+        out_arr = <np.ndarray>np.empty(size, np.uint16)
+    else:
+        it = np.PyArray_MultiIterNew2(low_arr, high_arr)
+        out_arr = <np.ndarray>np.empty(it.shape, np.uint16)
+
+    it = np.PyArray_MultiIterNew3(low_arr, high_arr, out_arr)
+    out_data = <uint16_t *>np.PyArray_DATA(out_arr)
+    cnt = np.PyArray_SIZE(out_arr)
+    mask = last_rng = 0
+    with lock, nogil:
+        for i in range(cnt):
+            low_v = (<uint32_t*>np.PyArray_MultiIter_DATA(it, 0))[0]
+            high_v = (<uint32_t*>np.PyArray_MultiIter_DATA(it, 1))[0]
+            # Subtract 1 since generator produces values on the closed int [off, off+rng]
+            rng = <uint16_t>((high_v - is_open) - low_v)
+            off = <uint16_t>(<uint32_t>low_v)
+
+            if rng != last_rng:
+                # Smallest bit mask >= max
+                mask = <uint16_t>_gen_mask(rng)
+
+            out_data[i] = random_buffered_bounded_uint16(state, off, rng, mask, use_masked, &buf_rem, &buf)
+
+            np.PyArray_MultiIter_NEXT(it)
+    return out_arr
+
+cdef object _rand_uint8_broadcast(np.ndarray low, np.ndarray high, object size,
+                                       bint use_masked, bint closed,
+                                       bitgen_t *state, object lock):
+    """
+    Array path for smaller integer types
+
+    This path is simpler since the high value in the open interval [low, high)
+    must be in-range for the next larger type, uint16. Here we case to
+    this type for checking and the recast to uint8 when producing the
+    random integers.
+    """
+    cdef uint8_t rng, last_rng, off, val, mask, out_val, is_open
+    cdef uint32_t buf
+    cdef uint8_t *out_data
+    cdef uint16_t low_v, high_v
+    cdef np.ndarray low_arr, high_arr, out_arr
+    cdef np.npy_intp i, cnt
+    cdef np.broadcast it
+    cdef int buf_rem = 0
+
+    # Array path
+    is_open = not closed
+    low_arr = <np.ndarray>low
+    high_arr = <np.ndarray>high
+    if np.any(np.less(low_arr, 0)):
+        raise ValueError('low is out of bounds for uint8')
+    if closed:
+        high_comp = np.greater_equal
+        low_high_comp = np.greater
+    else:
+        high_comp = np.greater
+        low_high_comp = np.greater_equal
+
+    if np.any(high_comp(high_arr, 0X100UL)):
+        raise ValueError('high is out of bounds for uint8')
+    if np.any(low_high_comp(low_arr, high_arr)):
+        comp = '>' if closed else '>='
+        raise ValueError('low {comp} high'.format(comp=comp))
+
+    low_arr = <np.ndarray>np.PyArray_FROM_OTF(low, np.NPY_UINT16, np.NPY_ALIGNED | np.NPY_FORCECAST)
+    high_arr = <np.ndarray>np.PyArray_FROM_OTF(high, np.NPY_UINT16, np.NPY_ALIGNED | np.NPY_FORCECAST)
+
+    if size is not None:
+        out_arr = <np.ndarray>np.empty(size, np.uint8)
+    else:
+        it = np.PyArray_MultiIterNew2(low_arr, high_arr)
+        out_arr = <np.ndarray>np.empty(it.shape, np.uint8)
+
+    it = np.PyArray_MultiIterNew3(low_arr, high_arr, out_arr)
+    out_data = <uint8_t *>np.PyArray_DATA(out_arr)
+    cnt = np.PyArray_SIZE(out_arr)
+    mask = last_rng = 0
+    with lock, nogil:
+        for i in range(cnt):
+            low_v = (<uint16_t*>np.PyArray_MultiIter_DATA(it, 0))[0]
+            high_v = (<uint16_t*>np.PyArray_MultiIter_DATA(it, 1))[0]
+            # Subtract 1 since generator produces values on the closed int [off, off+rng]
+            rng = <uint8_t>((high_v - is_open) - low_v)
+            off = <uint8_t>(<uint16_t>low_v)
+
+            if rng != last_rng:
+                # Smallest bit mask >= max
+                mask = <uint8_t>_gen_mask(rng)
+
+            out_data[i] = random_buffered_bounded_uint8(state, off, rng, mask, use_masked, &buf_rem, &buf)
+
+            np.PyArray_MultiIter_NEXT(it)
+    return out_arr
+
+cdef object _rand_bool_broadcast(np.ndarray low, np.ndarray high, object size,
+                                       bint use_masked, bint closed,
+                                       bitgen_t *state, object lock):
+    """
+    Array path for smaller integer types
+
+    This path is simpler since the high value in the open interval [low, high)
+    must be in-range for the next larger type, uint8. Here we case to
+    this type for checking and the recast to bool when producing the
+    random integers.
+    """
+    cdef bool_t rng, last_rng, off, val, mask, out_val, is_open
+    cdef uint32_t buf
+    cdef bool_t *out_data
+    cdef uint8_t low_v, high_v
+    cdef np.ndarray low_arr, high_arr, out_arr
+    cdef np.npy_intp i, cnt
+    cdef np.broadcast it
+    cdef int buf_rem = 0
+
+    # Array path
+    is_open = not closed
+    low_arr = <np.ndarray>low
+    high_arr = <np.ndarray>high
+    if np.any(np.less(low_arr, 0)):
+        raise ValueError('low is out of bounds for bool')
+    if closed:
+        high_comp = np.greater_equal
+        low_high_comp = np.greater
+    else:
+        high_comp = np.greater
+        low_high_comp = np.greater_equal
+
+    if np.any(high_comp(high_arr, 0x2UL)):
+        raise ValueError('high is out of bounds for bool')
+    if np.any(low_high_comp(low_arr, high_arr)):
+        comp = '>' if closed else '>='
+        raise ValueError('low {comp} high'.format(comp=comp))
+
+    low_arr = <np.ndarray>np.PyArray_FROM_OTF(low, np.NPY_UINT8, np.NPY_ALIGNED | np.NPY_FORCECAST)
+    high_arr = <np.ndarray>np.PyArray_FROM_OTF(high, np.NPY_UINT8, np.NPY_ALIGNED | np.NPY_FORCECAST)
+
+    if size is not None:
+        out_arr = <np.ndarray>np.empty(size, np.bool_)
+    else:
+        it = np.PyArray_MultiIterNew2(low_arr, high_arr)
+        out_arr = <np.ndarray>np.empty(it.shape, np.bool_)
+
+    it = np.PyArray_MultiIterNew3(low_arr, high_arr, out_arr)
+    out_data = <bool_t *>np.PyArray_DATA(out_arr)
+    cnt = np.PyArray_SIZE(out_arr)
+    mask = last_rng = 0
+    with lock, nogil:
+        for i in range(cnt):
+            low_v = (<uint8_t*>np.PyArray_MultiIter_DATA(it, 0))[0]
+            high_v = (<uint8_t*>np.PyArray_MultiIter_DATA(it, 1))[0]
+            # Subtract 1 since generator produces values on the closed int [off, off+rng]
+            rng = <bool_t>((high_v - is_open) - low_v)
+            off = <bool_t>(<uint8_t>low_v)
+
+            if rng != last_rng:
+                # Smallest bit mask >= max
+                mask = <bool_t>_gen_mask(rng)
+
+            out_data[i] = random_buffered_bounded_bool(state, off, rng, mask, use_masked, &buf_rem, &buf)
+
+            np.PyArray_MultiIter_NEXT(it)
+    return out_arr
+
+cdef object _rand_int32_broadcast(np.ndarray low, np.ndarray high, object size,
+                                       bint use_masked, bint closed,
+                                       bitgen_t *state, object lock):
+    """
+    Array path for smaller integer types
+
+    This path is simpler since the high value in the open interval [low, high)
+    must be in-range for the next larger type, uint64. Here we case to
+    this type for checking and the recast to int32 when producing the
+    random integers.
+    """
+    cdef uint32_t rng, last_rng, off, val, mask, out_val, is_open
+    cdef uint32_t buf
+    cdef uint32_t *out_data
+    cdef uint64_t low_v, high_v
+    cdef np.ndarray low_arr, high_arr, out_arr
+    cdef np.npy_intp i, cnt
+    cdef np.broadcast it
+    cdef int buf_rem = 0
+
+    # Array path
+    is_open = not closed
+    low_arr = <np.ndarray>low
+    high_arr = <np.ndarray>high
+    if np.any(np.less(low_arr, -0x80000000LL)):
+        raise ValueError('low is out of bounds for int32')
+    if closed:
+        high_comp = np.greater_equal
+        low_high_comp = np.greater
+    else:
+        high_comp = np.greater
+        low_high_comp = np.greater_equal
+
+    if np.any(high_comp(high_arr, 0x80000000LL)):
+        raise ValueError('high is out of bounds for int32')
+    if np.any(low_high_comp(low_arr, high_arr)):
+        comp = '>' if closed else '>='
+        raise ValueError('low {comp} high'.format(comp=comp))
+
+    low_arr = <np.ndarray>np.PyArray_FROM_OTF(low, np.NPY_INT64, np.NPY_ALIGNED | np.NPY_FORCECAST)
+    high_arr = <np.ndarray>np.PyArray_FROM_OTF(high, np.NPY_INT64, np.NPY_ALIGNED | np.NPY_FORCECAST)
+
+    if size is not None:
+        out_arr = <np.ndarray>np.empty(size, np.int32)
+    else:
+        it = np.PyArray_MultiIterNew2(low_arr, high_arr)
+        out_arr = <np.ndarray>np.empty(it.shape, np.int32)
+
+    it = np.PyArray_MultiIterNew3(low_arr, high_arr, out_arr)
+    out_data = <uint32_t *>np.PyArray_DATA(out_arr)
+    cnt = np.PyArray_SIZE(out_arr)
+    mask = last_rng = 0
+    with lock, nogil:
+        for i in range(cnt):
+            low_v = (<uint64_t*>np.PyArray_MultiIter_DATA(it, 0))[0]
+            high_v = (<uint64_t*>np.PyArray_MultiIter_DATA(it, 1))[0]
+            # Subtract 1 since generator produces values on the closed int [off, off+rng]
+            rng = <uint32_t>((high_v - is_open) - low_v)
+            off = <uint32_t>(<uint64_t>low_v)
+
+            if rng != last_rng:
+                # Smallest bit mask >= max
+                mask = <uint32_t>_gen_mask(rng)
+
+            out_data[i] = random_buffered_bounded_uint32(state, off, rng, mask, use_masked, &buf_rem, &buf)
+
+            np.PyArray_MultiIter_NEXT(it)
+    return out_arr
+
+cdef object _rand_int16_broadcast(np.ndarray low, np.ndarray high, object size,
+                                       bint use_masked, bint closed,
+                                       bitgen_t *state, object lock):
+    """
+    Array path for smaller integer types
+
+    This path is simpler since the high value in the open interval [low, high)
+    must be in-range for the next larger type, uint32. Here we case to
+    this type for checking and the recast to int16 when producing the
+    random integers.
+    """
+    cdef uint16_t rng, last_rng, off, val, mask, out_val, is_open
+    cdef uint32_t buf
+    cdef uint16_t *out_data
+    cdef uint32_t low_v, high_v
+    cdef np.ndarray low_arr, high_arr, out_arr
+    cdef np.npy_intp i, cnt
+    cdef np.broadcast it
+    cdef int buf_rem = 0
+
+    # Array path
+    is_open = not closed
+    low_arr = <np.ndarray>low
+    high_arr = <np.ndarray>high
+    if np.any(np.less(low_arr, -0x8000LL)):
+        raise ValueError('low is out of bounds for int16')
+    if closed:
+        high_comp = np.greater_equal
+        low_high_comp = np.greater
+    else:
+        high_comp = np.greater
+        low_high_comp = np.greater_equal
+
+    if np.any(high_comp(high_arr, 0x8000LL)):
+        raise ValueError('high is out of bounds for int16')
+    if np.any(low_high_comp(low_arr, high_arr)):
+        comp = '>' if closed else '>='
+        raise ValueError('low {comp} high'.format(comp=comp))
+
+    low_arr = <np.ndarray>np.PyArray_FROM_OTF(low, np.NPY_INT32, np.NPY_ALIGNED | np.NPY_FORCECAST)
+    high_arr = <np.ndarray>np.PyArray_FROM_OTF(high, np.NPY_INT32, np.NPY_ALIGNED | np.NPY_FORCECAST)
+
+    if size is not None:
+        out_arr = <np.ndarray>np.empty(size, np.int16)
+    else:
+        it = np.PyArray_MultiIterNew2(low_arr, high_arr)
+        out_arr = <np.ndarray>np.empty(it.shape, np.int16)
+
+    it = np.PyArray_MultiIterNew3(low_arr, high_arr, out_arr)
+    out_data = <uint16_t *>np.PyArray_DATA(out_arr)
+    cnt = np.PyArray_SIZE(out_arr)
+    mask = last_rng = 0
+    with lock, nogil:
+        for i in range(cnt):
+            low_v = (<uint32_t*>np.PyArray_MultiIter_DATA(it, 0))[0]
+            high_v = (<uint32_t*>np.PyArray_MultiIter_DATA(it, 1))[0]
+            # Subtract 1 since generator produces values on the closed int [off, off+rng]
+            rng = <uint16_t>((high_v - is_open) - low_v)
+            off = <uint16_t>(<uint32_t>low_v)
+
+            if rng != last_rng:
+                # Smallest bit mask >= max
+                mask = <uint16_t>_gen_mask(rng)
+
+            out_data[i] = random_buffered_bounded_uint16(state, off, rng, mask, use_masked, &buf_rem, &buf)
+
+            np.PyArray_MultiIter_NEXT(it)
+    return out_arr
+
+cdef object _rand_int8_broadcast(np.ndarray low, np.ndarray high, object size,
+                                       bint use_masked, bint closed,
+                                       bitgen_t *state, object lock):
+    """
+    Array path for smaller integer types
+
+    This path is simpler since the high value in the open interval [low, high)
+    must be in-range for the next larger type, uint16. Here we case to
+    this type for checking and the recast to int8 when producing the
+    random integers.
+    """
+    cdef uint8_t rng, last_rng, off, val, mask, out_val, is_open
+    cdef uint32_t buf
+    cdef uint8_t *out_data
+    cdef uint16_t low_v, high_v
+    cdef np.ndarray low_arr, high_arr, out_arr
+    cdef np.npy_intp i, cnt
+    cdef np.broadcast it
+    cdef int buf_rem = 0
+
+    # Array path
+    is_open = not closed
+    low_arr = <np.ndarray>low
+    high_arr = <np.ndarray>high
+    if np.any(np.less(low_arr, -0x80LL)):
+        raise ValueError('low is out of bounds for int8')
+    if closed:
+        high_comp = np.greater_equal
+        low_high_comp = np.greater
+    else:
+        high_comp = np.greater
+        low_high_comp = np.greater_equal
+
+    if np.any(high_comp(high_arr, 0x80LL)):
+        raise ValueError('high is out of bounds for int8')
+    if np.any(low_high_comp(low_arr, high_arr)):
+        comp = '>' if closed else '>='
+        raise ValueError('low {comp} high'.format(comp=comp))
+
+    low_arr = <np.ndarray>np.PyArray_FROM_OTF(low, np.NPY_INT16, np.NPY_ALIGNED | np.NPY_FORCECAST)
+    high_arr = <np.ndarray>np.PyArray_FROM_OTF(high, np.NPY_INT16, np.NPY_ALIGNED | np.NPY_FORCECAST)
+
+    if size is not None:
+        out_arr = <np.ndarray>np.empty(size, np.int8)
+    else:
+        it = np.PyArray_MultiIterNew2(low_arr, high_arr)
+        out_arr = <np.ndarray>np.empty(it.shape, np.int8)
+
+    it = np.PyArray_MultiIterNew3(low_arr, high_arr, out_arr)
+    out_data = <uint8_t *>np.PyArray_DATA(out_arr)
+    cnt = np.PyArray_SIZE(out_arr)
+    mask = last_rng = 0
+    with lock, nogil:
+        for i in range(cnt):
+            low_v = (<uint16_t*>np.PyArray_MultiIter_DATA(it, 0))[0]
+            high_v = (<uint16_t*>np.PyArray_MultiIter_DATA(it, 1))[0]
+            # Subtract 1 since generator produces values on the closed int [off, off+rng]
+            rng = <uint8_t>((high_v - is_open) - low_v)
+            off = <uint8_t>(<uint16_t>low_v)
+
+            if rng != last_rng:
+                # Smallest bit mask >= max
+                mask = <uint8_t>_gen_mask(rng)
+
+            out_data[i] = random_buffered_bounded_uint8(state, off, rng, mask, use_masked, &buf_rem, &buf)
+
+            np.PyArray_MultiIter_NEXT(it)
+    return out_arr
+
+
+cdef object _rand_uint64_broadcast(object low, object high, object size,
+                                       bint use_masked, bint closed,
+                                       bitgen_t *state, object lock):
+    """
+    Array path for 64-bit integer types
+
+    Requires special treatment since the high value can be out-of-range for
+    the largest (64 bit) integer type since the generator is specified on the
+    interval [low,high).
+
+    The internal generator does not have this issue since it generates from
+    the closes interval [low, high-1] and high-1 is always in range for the
+    64 bit integer type.
+    """
+
+    cdef np.ndarray low_arr, high_arr, out_arr, highm1_arr
+    cdef np.npy_intp i, cnt, n
+    cdef np.broadcast it
+    cdef object closed_upper
+    cdef uint64_t *out_data
+    cdef uint64_t *highm1_data
+    cdef uint64_t low_v, high_v
+    cdef uint64_t rng, last_rng, val, mask, off, out_val
+
+    low_arr = <np.ndarray>low
+    high_arr = <np.ndarray>high
+
+    if np.any(np.less(low_arr, 0x0ULL)):
+        raise ValueError('low is out of bounds for uint64')
+    dt = high_arr.dtype
+    if closed or np.issubdtype(dt, np.integer):
+        # Avoid object dtype path if already an integer
+        high_lower_comp = np.less if closed else np.less_equal
+        if np.any(high_lower_comp(high_arr, 0x0ULL)):
+            comp = '>' if closed else '>='
+            raise ValueError('low {comp} high'.format(comp=comp))
+        high_m1 = high_arr if closed else high_arr - dt.type(1)
+        if np.any(np.greater(high_m1, 0xFFFFFFFFFFFFFFFFULL)):
+            raise ValueError('high is out of bounds for uint64')
+        highm1_arr = <np.ndarray>np.PyArray_FROM_OTF(high_m1, np.NPY_UINT64, np.NPY_ALIGNED | np.NPY_FORCECAST)
+    else:
+        # If input is object or a floating type
+        highm1_arr = <np.ndarray>np.empty_like(high_arr, dtype=np.uint64)
+        highm1_data = <uint64_t *>np.PyArray_DATA(highm1_arr)
+        cnt = np.PyArray_SIZE(high_arr)
+        flat = high_arr.flat
+        for i in range(cnt):
+            # Subtract 1 since generator produces values on the closed int [off, off+rng]
+            closed_upper = int(flat[i]) - 1
+            if closed_upper > 0xFFFFFFFFFFFFFFFFULL:
+                raise ValueError('high is out of bounds for uint64')
+            if closed_upper < 0x0ULL:
+                comp = '>' if closed else '>='
+                raise ValueError('low {comp} high'.format(comp=comp))
+            highm1_data[i] = <uint64_t>closed_upper
+
+    if np.any(np.greater(low_arr, highm1_arr)):
+        comp = '>' if closed else '>='
+        raise ValueError('low {comp} high'.format(comp=comp))
+
+    high_arr = highm1_arr
+    low_arr = <np.ndarray>np.PyArray_FROM_OTF(low, np.NPY_UINT64, np.NPY_ALIGNED | np.NPY_FORCECAST)
+
+    if size is not None:
+        out_arr = <np.ndarray>np.empty(size, np.uint64)
+    else:
+        it = np.PyArray_MultiIterNew2(low_arr, high_arr)
+        out_arr = <np.ndarray>np.empty(it.shape, np.uint64)
+
+    it = np.PyArray_MultiIterNew3(low_arr, high_arr, out_arr)
+    out_data = <uint64_t *>np.PyArray_DATA(out_arr)
+    n = np.PyArray_SIZE(out_arr)
+    mask = last_rng = 0
+    with lock, nogil:
+        for i in range(n):
+            low_v = (<uint64_t*>np.PyArray_MultiIter_DATA(it, 0))[0]
+            high_v = (<uint64_t*>np.PyArray_MultiIter_DATA(it, 1))[0]
+            # Generator produces values on the closed int [off, off+rng], -1 subtracted above
+            rng = <uint64_t>(high_v - low_v)
+            off = <uint64_t>(<uint64_t>low_v)
+
+            if rng != last_rng:
+                mask = _gen_mask(rng)
+            out_data[i] = random_bounded_uint64(state, off, rng, mask, use_masked)
+
+            np.PyArray_MultiIter_NEXT(it)
+
+    return out_arr
+
+cdef object _rand_int64_broadcast(object low, object high, object size,
+                                       bint use_masked, bint closed,
+                                       bitgen_t *state, object lock):
+    """
+    Array path for 64-bit integer types
+
+    Requires special treatment since the high value can be out-of-range for
+    the largest (64 bit) integer type since the generator is specified on the
+    interval [low,high).
+
+    The internal generator does not have this issue since it generates from
+    the closes interval [low, high-1] and high-1 is always in range for the
+    64 bit integer type.
+    """
+
+    cdef np.ndarray low_arr, high_arr, out_arr, highm1_arr
+    cdef np.npy_intp i, cnt, n
+    cdef np.broadcast it
+    cdef object closed_upper
+    cdef uint64_t *out_data
+    cdef int64_t *highm1_data
+    cdef int64_t low_v, high_v
+    cdef uint64_t rng, last_rng, val, mask, off, out_val
+
+    low_arr = <np.ndarray>low
+    high_arr = <np.ndarray>high
+
+    if np.any(np.less(low_arr, -0x8000000000000000LL)):
+        raise ValueError('low is out of bounds for int64')
+    dt = high_arr.dtype
+    if closed or np.issubdtype(dt, np.integer):
+        # Avoid object dtype path if already an integer
+        high_lower_comp = np.less if closed else np.less_equal
+        if np.any(high_lower_comp(high_arr, -0x8000000000000000LL)):
+            comp = '>' if closed else '>='
+            raise ValueError('low {comp} high'.format(comp=comp))
+        high_m1 = high_arr if closed else high_arr - dt.type(1)
+        if np.any(np.greater(high_m1, 0x7FFFFFFFFFFFFFFFLL)):
+            raise ValueError('high is out of bounds for int64')
+        highm1_arr = <np.ndarray>np.PyArray_FROM_OTF(high_m1, np.NPY_INT64, np.NPY_ALIGNED | np.NPY_FORCECAST)
+    else:
+        # If input is object or a floating type
+        highm1_arr = <np.ndarray>np.empty_like(high_arr, dtype=np.int64)
+        highm1_data = <int64_t *>np.PyArray_DATA(highm1_arr)
+        cnt = np.PyArray_SIZE(high_arr)
+        flat = high_arr.flat
+        for i in range(cnt):
+            # Subtract 1 since generator produces values on the closed int [off, off+rng]
+            closed_upper = int(flat[i]) - 1
+            if closed_upper > 0x7FFFFFFFFFFFFFFFLL:
+                raise ValueError('high is out of bounds for int64')
+            if closed_upper < -0x8000000000000000LL:
+                comp = '>' if closed else '>='
+                raise ValueError('low {comp} high'.format(comp=comp))
+            highm1_data[i] = <int64_t>closed_upper
+
+    if np.any(np.greater(low_arr, highm1_arr)):
+        comp = '>' if closed else '>='
+        raise ValueError('low {comp} high'.format(comp=comp))
+
+    high_arr = highm1_arr
+    low_arr = <np.ndarray>np.PyArray_FROM_OTF(low, np.NPY_INT64, np.NPY_ALIGNED | np.NPY_FORCECAST)
+
+    if size is not None:
+        out_arr = <np.ndarray>np.empty(size, np.int64)
+    else:
+        it = np.PyArray_MultiIterNew2(low_arr, high_arr)
+        out_arr = <np.ndarray>np.empty(it.shape, np.int64)
+
+    it = np.PyArray_MultiIterNew3(low_arr, high_arr, out_arr)
+    out_data = <uint64_t *>np.PyArray_DATA(out_arr)
+    n = np.PyArray_SIZE(out_arr)
+    mask = last_rng = 0
+    with lock, nogil:
+        for i in range(n):
+            low_v = (<int64_t*>np.PyArray_MultiIter_DATA(it, 0))[0]
+            high_v = (<int64_t*>np.PyArray_MultiIter_DATA(it, 1))[0]
+            # Generator produces values on the closed int [off, off+rng], -1 subtracted above
+            rng = <uint64_t>(high_v - low_v)
+            off = <uint64_t>(<int64_t>low_v)
+
+            if rng != last_rng:
+                mask = _gen_mask(rng)
+            out_data[i] = random_bounded_uint64(state, off, rng, mask, use_masked)
+
+            np.PyArray_MultiIter_NEXT(it)
+
+    return out_arr
+
+
+cdef object _rand_uint64(object low, object high, object size,
+                             bint use_masked, bint closed,
+                             bitgen_t *state, object lock):
+    """
+    _rand_uint64(low, high, size, use_masked, *state, lock)
+
+    Return random np.uint64 integers from `low` (inclusive) to `high` (exclusive).
+
+    Return random integers from the "discrete uniform" distribution in the
+    interval [`low`, `high`).  If `high` is None (the default),
+    then results are from [0, `low`). On entry the arguments are presumed
+    to have been validated for size and order for the np.uint64 type.
+
+    Parameters
+    ----------
+    low : int or array-like
+        Lowest (signed) integer to be drawn from the distribution (unless
+        ``high=None``, in which case this parameter is the *highest* such
+        integer).
+    high : int or array-like
+        If provided, one above the largest (signed) integer to be drawn from the
+        distribution (see above for behavior if ``high=None``).
+    size : int or tuple of ints
+        Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
+        ``m * n * k`` samples are drawn.  Default is None, in which case a
+        single value is returned.
+    use_masked : bool
+        If True then rejection sampling with a range mask is used else Lemire's algorithm is used.
+    closed : bool
+        If True then sample from [low, high].  If False, sample [low, high)
+    state : bit generator
+        Bit generator state to use in the core random number generators
+    lock : threading.Lock
+        Lock to prevent multiple using a single generator simultaneously
+
+    Returns
+    -------
+    out : python scalar or ndarray of np.uint64
+          `size`-shaped array of random integers from the appropriate
+          distribution, or a single such random int if `size` not provided.
+
+    Notes
+    -----
+    The internal integer generator produces values from the closed
+    interval [low, high-(not closed)].  This requires some care since
+    high can be out-of-range for uint64. The scalar path leaves
+    integers as Python integers until the 1 has been subtracted to
+    avoid needing to cast to a larger type.
+    """
+    cdef np.ndarray out_arr, low_arr, high_arr
+    cdef uint64_t rng, off, out_val
+    cdef uint64_t *out_data
+    cdef np.npy_intp i, n, cnt
+
+    if size is not None:
+        if (np.prod(size) == 0):
+            return np.empty(size, dtype=np.uint64)
+
+    low_arr = <np.ndarray>np.array(low, copy=False)
+    high_arr = <np.ndarray>np.array(high, copy=False)
+    low_ndim = np.PyArray_NDIM(low_arr)
+    high_ndim = np.PyArray_NDIM(high_arr)
+    if ((low_ndim == 0 or (low_ndim == 1 and low_arr.size == 1 and size is not None)) and
+            (high_ndim == 0 or (high_ndim == 1 and high_arr.size == 1 and size is not None))):
+        low = int(low_arr)
+        high = int(high_arr)
+        # Subtract 1 since internal generator produces on closed interval [low, high]
+        if not closed:
+            high -= 1
+
+        if low < 0x0ULL:
+            raise ValueError("low is out of bounds for uint64")
+        if high > 0xFFFFFFFFFFFFFFFFULL:
+            raise ValueError("high is out of bounds for uint64")
+        if low > high:  # -1 already subtracted, closed interval
+            comp = '>' if closed else '>='
+            raise ValueError('low {comp} high'.format(comp=comp))
+
+        rng = <uint64_t>(high - low)
+        off = <uint64_t>(<uint64_t>low)
+        if size is None:
+            with lock:
+                random_bounded_uint64_fill(state, off, rng, 1, use_masked, &out_val)
+            return np.uint64(<uint64_t>out_val)
+        else:
+            out_arr = <np.ndarray>np.empty(size, np.uint64)
+            cnt = np.PyArray_SIZE(out_arr)
+            out_data = <uint64_t *>np.PyArray_DATA(out_arr)
+            with lock, nogil:
+                random_bounded_uint64_fill(state, off, rng, cnt, use_masked, out_data)
+            return out_arr
+    return _rand_uint64_broadcast(low_arr, high_arr, size, use_masked, closed, state, lock)
+
+cdef object _rand_uint32(object low, object high, object size,
+                             bint use_masked, bint closed,
+                             bitgen_t *state, object lock):
+    """
+    _rand_uint32(low, high, size, use_masked, *state, lock)
+
+    Return random np.uint32 integers from `low` (inclusive) to `high` (exclusive).
+
+    Return random integers from the "discrete uniform" distribution in the
+    interval [`low`, `high`).  If `high` is None (the default),
+    then results are from [0, `low`). On entry the arguments are presumed
+    to have been validated for size and order for the np.uint32 type.
+
+    Parameters
+    ----------
+    low : int or array-like
+        Lowest (signed) integer to be drawn from the distribution (unless
+        ``high=None``, in which case this parameter is the *highest* such
+        integer).
+    high : int or array-like
+        If provided, one above the largest (signed) integer to be drawn from the
+        distribution (see above for behavior if ``high=None``).
+    size : int or tuple of ints
+        Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
+        ``m * n * k`` samples are drawn.  Default is None, in which case a
+        single value is returned.
+    use_masked : bool
+        If True then rejection sampling with a range mask is used else Lemire's algorithm is used.
+    closed : bool
+        If True then sample from [low, high].  If False, sample [low, high)
+    state : bit generator
+        Bit generator state to use in the core random number generators
+    lock : threading.Lock
+        Lock to prevent multiple using a single generator simultaneously
+
+    Returns
+    -------
+    out : python scalar or ndarray of np.uint32
+          `size`-shaped array of random integers from the appropriate
+          distribution, or a single such random int if `size` not provided.
+
+    Notes
+    -----
+    The internal integer generator produces values from the closed
+    interval [low, high-(not closed)].  This requires some care since
+    high can be out-of-range for uint32. The scalar path leaves
+    integers as Python integers until the 1 has been subtracted to
+    avoid needing to cast to a larger type.
+    """
+    cdef np.ndarray out_arr, low_arr, high_arr
+    cdef uint32_t rng, off, out_val
+    cdef uint32_t *out_data
+    cdef np.npy_intp i, n, cnt
+
+    if size is not None:
+        if (np.prod(size) == 0):
+            return np.empty(size, dtype=np.uint32)
+
+    low_arr = <np.ndarray>np.array(low, copy=False)
+    high_arr = <np.ndarray>np.array(high, copy=False)
+    low_ndim = np.PyArray_NDIM(low_arr)
+    high_ndim = np.PyArray_NDIM(high_arr)
+    if ((low_ndim == 0 or (low_ndim == 1 and low_arr.size == 1 and size is not None)) and
+            (high_ndim == 0 or (high_ndim == 1 and high_arr.size == 1 and size is not None))):
+        low = int(low_arr)
+        high = int(high_arr)
+        # Subtract 1 since internal generator produces on closed interval [low, high]
+        if not closed:
+            high -= 1
+
+        if low < 0x0UL:
+            raise ValueError("low is out of bounds for uint32")
+        if high > 0XFFFFFFFFUL:
+            raise ValueError("high is out of bounds for uint32")
+        if low > high:  # -1 already subtracted, closed interval
+            comp = '>' if closed else '>='
+            raise ValueError('low {comp} high'.format(comp=comp))
+
+        rng = <uint32_t>(high - low)
+        off = <uint32_t>(<uint32_t>low)
+        if size is None:
+            with lock:
+                random_bounded_uint32_fill(state, off, rng, 1, use_masked, &out_val)
+            return np.uint32(<uint32_t>out_val)
+        else:
+            out_arr = <np.ndarray>np.empty(size, np.uint32)
+            cnt = np.PyArray_SIZE(out_arr)
+            out_data = <uint32_t *>np.PyArray_DATA(out_arr)
+            with lock, nogil:
+                random_bounded_uint32_fill(state, off, rng, cnt, use_masked, out_data)
+            return out_arr
+    return _rand_uint32_broadcast(low_arr, high_arr, size, use_masked, closed, state, lock)
+
+cdef object _rand_uint16(object low, object high, object size,
+                             bint use_masked, bint closed,
+                             bitgen_t *state, object lock):
+    """
+    _rand_uint16(low, high, size, use_masked, *state, lock)
+
+    Return random np.uint16 integers from `low` (inclusive) to `high` (exclusive).
+
+    Return random integers from the "discrete uniform" distribution in the
+    interval [`low`, `high`).  If `high` is None (the default),
+    then results are from [0, `low`). On entry the arguments are presumed
+    to have been validated for size and order for the np.uint16 type.
+
+    Parameters
+    ----------
+    low : int or array-like
+        Lowest (signed) integer to be drawn from the distribution (unless
+        ``high=None``, in which case this parameter is the *highest* such
+        integer).
+    high : int or array-like
+        If provided, one above the largest (signed) integer to be drawn from the
+        distribution (see above for behavior if ``high=None``).
+    size : int or tuple of ints
+        Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
+        ``m * n * k`` samples are drawn.  Default is None, in which case a
+        single value is returned.
+    use_masked : bool
+        If True then rejection sampling with a range mask is used else Lemire's algorithm is used.
+    closed : bool
+        If True then sample from [low, high].  If False, sample [low, high)
+    state : bit generator
+        Bit generator state to use in the core random number generators
+    lock : threading.Lock
+        Lock to prevent multiple using a single generator simultaneously
+
+    Returns
+    -------
+    out : python scalar or ndarray of np.uint16
+          `size`-shaped array of random integers from the appropriate
+          distribution, or a single such random int if `size` not provided.
+
+    Notes
+    -----
+    The internal integer generator produces values from the closed
+    interval [low, high-(not closed)].  This requires some care since
+    high can be out-of-range for uint16. The scalar path leaves
+    integers as Python integers until the 1 has been subtracted to
+    avoid needing to cast to a larger type.
+    """
+    cdef np.ndarray out_arr, low_arr, high_arr
+    cdef uint16_t rng, off, out_val
+    cdef uint16_t *out_data
+    cdef np.npy_intp i, n, cnt
+
+    if size is not None:
+        if (np.prod(size) == 0):
+            return np.empty(size, dtype=np.uint16)
+
+    low_arr = <np.ndarray>np.array(low, copy=False)
+    high_arr = <np.ndarray>np.array(high, copy=False)
+    low_ndim = np.PyArray_NDIM(low_arr)
+    high_ndim = np.PyArray_NDIM(high_arr)
+    if ((low_ndim == 0 or (low_ndim == 1 and low_arr.size == 1 and size is not None)) and
+            (high_ndim == 0 or (high_ndim == 1 and high_arr.size == 1 and size is not None))):
+        low = int(low_arr)
+        high = int(high_arr)
+        # Subtract 1 since internal generator produces on closed interval [low, high]
+        if not closed:
+            high -= 1
+
+        if low < 0x0UL:
+            raise ValueError("low is out of bounds for uint16")
+        if high > 0XFFFFUL:
+            raise ValueError("high is out of bounds for uint16")
+        if low > high:  # -1 already subtracted, closed interval
+            comp = '>' if closed else '>='
+            raise ValueError('low {comp} high'.format(comp=comp))
+
+        rng = <uint16_t>(high - low)
+        off = <uint16_t>(<uint16_t>low)
+        if size is None:
+            with lock:
+                random_bounded_uint16_fill(state, off, rng, 1, use_masked, &out_val)
+            return np.uint16(<uint16_t>out_val)
+        else:
+            out_arr = <np.ndarray>np.empty(size, np.uint16)
+            cnt = np.PyArray_SIZE(out_arr)
+            out_data = <uint16_t *>np.PyArray_DATA(out_arr)
+            with lock, nogil:
+                random_bounded_uint16_fill(state, off, rng, cnt, use_masked, out_data)
+            return out_arr
+    return _rand_uint16_broadcast(low_arr, high_arr, size, use_masked, closed, state, lock)
+
+cdef object _rand_uint8(object low, object high, object size,
+                             bint use_masked, bint closed,
+                             bitgen_t *state, object lock):
+    """
+    _rand_uint8(low, high, size, use_masked, *state, lock)
+
+    Return random np.uint8 integers from `low` (inclusive) to `high` (exclusive).
+
+    Return random integers from the "discrete uniform" distribution in the
+    interval [`low`, `high`).  If `high` is None (the default),
+    then results are from [0, `low`). On entry the arguments are presumed
+    to have been validated for size and order for the np.uint8 type.
+
+    Parameters
+    ----------
+    low : int or array-like
+        Lowest (signed) integer to be drawn from the distribution (unless
+        ``high=None``, in which case this parameter is the *highest* such
+        integer).
+    high : int or array-like
+        If provided, one above the largest (signed) integer to be drawn from the
+        distribution (see above for behavior if ``high=None``).
+    size : int or tuple of ints
+        Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
+        ``m * n * k`` samples are drawn.  Default is None, in which case a
+        single value is returned.
+    use_masked : bool
+        If True then rejection sampling with a range mask is used else Lemire's algorithm is used.
+    closed : bool
+        If True then sample from [low, high].  If False, sample [low, high)
+    state : bit generator
+        Bit generator state to use in the core random number generators
+    lock : threading.Lock
+        Lock to prevent multiple using a single generator simultaneously
+
+    Returns
+    -------
+    out : python scalar or ndarray of np.uint8
+          `size`-shaped array of random integers from the appropriate
+          distribution, or a single such random int if `size` not provided.
+
+    Notes
+    -----
+    The internal integer generator produces values from the closed
+    interval [low, high-(not closed)].  This requires some care since
+    high can be out-of-range for uint8. The scalar path leaves
+    integers as Python integers until the 1 has been subtracted to
+    avoid needing to cast to a larger type.
+    """
+    cdef np.ndarray out_arr, low_arr, high_arr
+    cdef uint8_t rng, off, out_val
+    cdef uint8_t *out_data
+    cdef np.npy_intp i, n, cnt
+
+    if size is not None:
+        if (np.prod(size) == 0):
+            return np.empty(size, dtype=np.uint8)
+
+    low_arr = <np.ndarray>np.array(low, copy=False)
+    high_arr = <np.ndarray>np.array(high, copy=False)
+    low_ndim = np.PyArray_NDIM(low_arr)
+    high_ndim = np.PyArray_NDIM(high_arr)
+    if ((low_ndim == 0 or (low_ndim == 1 and low_arr.size == 1 and size is not None)) and
+            (high_ndim == 0 or (high_ndim == 1 and high_arr.size == 1 and size is not None))):
+        low = int(low_arr)
+        high = int(high_arr)
+        # Subtract 1 since internal generator produces on closed interval [low, high]
+        if not closed:
+            high -= 1
+
+        if low < 0x0UL:
+            raise ValueError("low is out of bounds for uint8")
+        if high > 0XFFUL:
+            raise ValueError("high is out of bounds for uint8")
+        if low > high:  # -1 already subtracted, closed interval
+            comp = '>' if closed else '>='
+            raise ValueError('low {comp} high'.format(comp=comp))
+
+        rng = <uint8_t>(high - low)
+        off = <uint8_t>(<uint8_t>low)
+        if size is None:
+            with lock:
+                random_bounded_uint8_fill(state, off, rng, 1, use_masked, &out_val)
+            return np.uint8(<uint8_t>out_val)
+        else:
+            out_arr = <np.ndarray>np.empty(size, np.uint8)
+            cnt = np.PyArray_SIZE(out_arr)
+            out_data = <uint8_t *>np.PyArray_DATA(out_arr)
+            with lock, nogil:
+                random_bounded_uint8_fill(state, off, rng, cnt, use_masked, out_data)
+            return out_arr
+    return _rand_uint8_broadcast(low_arr, high_arr, size, use_masked, closed, state, lock)
+
+cdef object _rand_bool(object low, object high, object size,
+                             bint use_masked, bint closed,
+                             bitgen_t *state, object lock):
+    """
+    _rand_bool(low, high, size, use_masked, *state, lock)
+
+    Return random np.bool integers from `low` (inclusive) to `high` (exclusive).
+
+    Return random integers from the "discrete uniform" distribution in the
+    interval [`low`, `high`).  If `high` is None (the default),
+    then results are from [0, `low`). On entry the arguments are presumed
+    to have been validated for size and order for the np.bool type.
+
+    Parameters
+    ----------
+    low : int or array-like
+        Lowest (signed) integer to be drawn from the distribution (unless
+        ``high=None``, in which case this parameter is the *highest* such
+        integer).
+    high : int or array-like
+        If provided, one above the largest (signed) integer to be drawn from the
+        distribution (see above for behavior if ``high=None``).
+    size : int or tuple of ints
+        Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
+        ``m * n * k`` samples are drawn.  Default is None, in which case a
+        single value is returned.
+    use_masked : bool
+        If True then rejection sampling with a range mask is used else Lemire's algorithm is used.
+    closed : bool
+        If True then sample from [low, high].  If False, sample [low, high)
+    state : bit generator
+        Bit generator state to use in the core random number generators
+    lock : threading.Lock
+        Lock to prevent multiple using a single generator simultaneously
+
+    Returns
+    -------
+    out : python scalar or ndarray of np.bool
+          `size`-shaped array of random integers from the appropriate
+          distribution, or a single such random int if `size` not provided.
+
+    Notes
+    -----
+    The internal integer generator produces values from the closed
+    interval [low, high-(not closed)].  This requires some care since
+    high can be out-of-range for bool. The scalar path leaves
+    integers as Python integers until the 1 has been subtracted to
+    avoid needing to cast to a larger type.
+    """
+    cdef np.ndarray out_arr, low_arr, high_arr
+    cdef bool_t rng, off, out_val
+    cdef bool_t *out_data
+    cdef np.npy_intp i, n, cnt
+
+    if size is not None:
+        if (np.prod(size) == 0):
+            return np.empty(size, dtype=np.bool)
+
+    low_arr = <np.ndarray>np.array(low, copy=False)
+    high_arr = <np.ndarray>np.array(high, copy=False)
+    low_ndim = np.PyArray_NDIM(low_arr)
+    high_ndim = np.PyArray_NDIM(high_arr)
+    if ((low_ndim == 0 or (low_ndim == 1 and low_arr.size == 1 and size is not None)) and
+            (high_ndim == 0 or (high_ndim == 1 and high_arr.size == 1 and size is not None))):
+        low = int(low_arr)
+        high = int(high_arr)
+        # Subtract 1 since internal generator produces on closed interval [low, high]
+        if not closed:
+            high -= 1
+
+        if low < 0x0UL:
+            raise ValueError("low is out of bounds for bool")
+        if high > 0x1UL:
+            raise ValueError("high is out of bounds for bool")
+        if low > high:  # -1 already subtracted, closed interval
+            comp = '>' if closed else '>='
+            raise ValueError('low {comp} high'.format(comp=comp))
+
+        rng = <bool_t>(high - low)
+        off = <bool_t>(<bool_t>low)
+        if size is None:
+            with lock:
+                random_bounded_bool_fill(state, off, rng, 1, use_masked, &out_val)
+            return np.bool_(<bool_t>out_val)
+        else:
+            out_arr = <np.ndarray>np.empty(size, np.bool)
+            cnt = np.PyArray_SIZE(out_arr)
+            out_data = <bool_t *>np.PyArray_DATA(out_arr)
+            with lock, nogil:
+                random_bounded_bool_fill(state, off, rng, cnt, use_masked, out_data)
+            return out_arr
+    return _rand_bool_broadcast(low_arr, high_arr, size, use_masked, closed, state, lock)
+
+cdef object _rand_int64(object low, object high, object size,
+                             bint use_masked, bint closed,
+                             bitgen_t *state, object lock):
+    """
+    _rand_int64(low, high, size, use_masked, *state, lock)
+
+    Return random np.int64 integers from `low` (inclusive) to `high` (exclusive).
+
+    Return random integers from the "discrete uniform" distribution in the
+    interval [`low`, `high`).  If `high` is None (the default),
+    then results are from [0, `low`). On entry the arguments are presumed
+    to have been validated for size and order for the np.int64 type.
+
+    Parameters
+    ----------
+    low : int or array-like
+        Lowest (signed) integer to be drawn from the distribution (unless
+        ``high=None``, in which case this parameter is the *highest* such
+        integer).
+    high : int or array-like
+        If provided, one above the largest (signed) integer to be drawn from the
+        distribution (see above for behavior if ``high=None``).
+    size : int or tuple of ints
+        Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
+        ``m * n * k`` samples are drawn.  Default is None, in which case a
+        single value is returned.
+    use_masked : bool
+        If True then rejection sampling with a range mask is used else Lemire's algorithm is used.
+    closed : bool
+        If True then sample from [low, high].  If False, sample [low, high)
+    state : bit generator
+        Bit generator state to use in the core random number generators
+    lock : threading.Lock
+        Lock to prevent multiple using a single generator simultaneously
+
+    Returns
+    -------
+    out : python scalar or ndarray of np.int64
+          `size`-shaped array of random integers from the appropriate
+          distribution, or a single such random int if `size` not provided.
+
+    Notes
+    -----
+    The internal integer generator produces values from the closed
+    interval [low, high-(not closed)].  This requires some care since
+    high can be out-of-range for uint64. The scalar path leaves
+    integers as Python integers until the 1 has been subtracted to
+    avoid needing to cast to a larger type.
+    """
+    cdef np.ndarray out_arr, low_arr, high_arr
+    cdef uint64_t rng, off, out_val
+    cdef uint64_t *out_data
+    cdef np.npy_intp i, n, cnt
+
+    if size is not None:
+        if (np.prod(size) == 0):
+            return np.empty(size, dtype=np.int64)
+
+    low_arr = <np.ndarray>np.array(low, copy=False)
+    high_arr = <np.ndarray>np.array(high, copy=False)
+    low_ndim = np.PyArray_NDIM(low_arr)
+    high_ndim = np.PyArray_NDIM(high_arr)
+    if ((low_ndim == 0 or (low_ndim == 1 and low_arr.size == 1 and size is not None)) and
+            (high_ndim == 0 or (high_ndim == 1 and high_arr.size == 1 and size is not None))):
+        low = int(low_arr)
+        high = int(high_arr)
+        # Subtract 1 since internal generator produces on closed interval [low, high]
+        if not closed:
+            high -= 1
+
+        if low < -0x8000000000000000LL:
+            raise ValueError("low is out of bounds for int64")
+        if high > 0x7FFFFFFFFFFFFFFFL:
+            raise ValueError("high is out of bounds for int64")
+        if low > high:  # -1 already subtracted, closed interval
+            comp = '>' if closed else '>='
+            raise ValueError('low {comp} high'.format(comp=comp))
+
+        rng = <uint64_t>(high - low)
+        off = <uint64_t>(<int64_t>low)
+        if size is None:
+            with lock:
+                random_bounded_uint64_fill(state, off, rng, 1, use_masked, &out_val)
+            return np.int64(<int64_t>out_val)
+        else:
+            out_arr = <np.ndarray>np.empty(size, np.int64)
+            cnt = np.PyArray_SIZE(out_arr)
+            out_data = <uint64_t *>np.PyArray_DATA(out_arr)
+            with lock, nogil:
+                random_bounded_uint64_fill(state, off, rng, cnt, use_masked, out_data)
+            return out_arr
+    return _rand_int64_broadcast(low_arr, high_arr, size, use_masked, closed, state, lock)
+
+cdef object _rand_int32(object low, object high, object size,
+                             bint use_masked, bint closed,
+                             bitgen_t *state, object lock):
+    """
+    _rand_int32(low, high, size, use_masked, *state, lock)
+
+    Return random np.int32 integers from `low` (inclusive) to `high` (exclusive).
+
+    Return random integers from the "discrete uniform" distribution in the
+    interval [`low`, `high`).  If `high` is None (the default),
+    then results are from [0, `low`). On entry the arguments are presumed
+    to have been validated for size and order for the np.int32 type.
+
+    Parameters
+    ----------
+    low : int or array-like
+        Lowest (signed) integer to be drawn from the distribution (unless
+        ``high=None``, in which case this parameter is the *highest* such
+        integer).
+    high : int or array-like
+        If provided, one above the largest (signed) integer to be drawn from the
+        distribution (see above for behavior if ``high=None``).
+    size : int or tuple of ints
+        Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
+        ``m * n * k`` samples are drawn.  Default is None, in which case a
+        single value is returned.
+    use_masked : bool
+        If True then rejection sampling with a range mask is used else Lemire's algorithm is used.
+    closed : bool
+        If True then sample from [low, high].  If False, sample [low, high)
+    state : bit generator
+        Bit generator state to use in the core random number generators
+    lock : threading.Lock
+        Lock to prevent multiple using a single generator simultaneously
+
+    Returns
+    -------
+    out : python scalar or ndarray of np.int32
+          `size`-shaped array of random integers from the appropriate
+          distribution, or a single such random int if `size` not provided.
+
+    Notes
+    -----
+    The internal integer generator produces values from the closed
+    interval [low, high-(not closed)].  This requires some care since
+    high can be out-of-range for uint32. The scalar path leaves
+    integers as Python integers until the 1 has been subtracted to
+    avoid needing to cast to a larger type.
+    """
+    cdef np.ndarray out_arr, low_arr, high_arr
+    cdef uint32_t rng, off, out_val
+    cdef uint32_t *out_data
+    cdef np.npy_intp i, n, cnt
+
+    if size is not None:
+        if (np.prod(size) == 0):
+            return np.empty(size, dtype=np.int32)
+
+    low_arr = <np.ndarray>np.array(low, copy=False)
+    high_arr = <np.ndarray>np.array(high, copy=False)
+    low_ndim = np.PyArray_NDIM(low_arr)
+    high_ndim = np.PyArray_NDIM(high_arr)
+    if ((low_ndim == 0 or (low_ndim == 1 and low_arr.size == 1 and size is not None)) and
+            (high_ndim == 0 or (high_ndim == 1 and high_arr.size == 1 and size is not None))):
+        low = int(low_arr)
+        high = int(high_arr)
+        # Subtract 1 since internal generator produces on closed interval [low, high]
+        if not closed:
+            high -= 1
+
+        if low < -0x80000000L:
+            raise ValueError("low is out of bounds for int32")
+        if high > 0x7FFFFFFFL:
+            raise ValueError("high is out of bounds for int32")
+        if low > high:  # -1 already subtracted, closed interval
+            comp = '>' if closed else '>='
+            raise ValueError('low {comp} high'.format(comp=comp))
+
+        rng = <uint32_t>(high - low)
+        off = <uint32_t>(<int32_t>low)
+        if size is None:
+            with lock:
+                random_bounded_uint32_fill(state, off, rng, 1, use_masked, &out_val)
+            return np.int32(<int32_t>out_val)
+        else:
+            out_arr = <np.ndarray>np.empty(size, np.int32)
+            cnt = np.PyArray_SIZE(out_arr)
+            out_data = <uint32_t *>np.PyArray_DATA(out_arr)
+            with lock, nogil:
+                random_bounded_uint32_fill(state, off, rng, cnt, use_masked, out_data)
+            return out_arr
+    return _rand_int32_broadcast(low_arr, high_arr, size, use_masked, closed, state, lock)
+
+cdef object _rand_int16(object low, object high, object size,
+                             bint use_masked, bint closed,
+                             bitgen_t *state, object lock):
+    """
+    _rand_int16(low, high, size, use_masked, *state, lock)
+
+    Return random np.int16 integers from `low` (inclusive) to `high` (exclusive).
+
+    Return random integers from the "discrete uniform" distribution in the
+    interval [`low`, `high`).  If `high` is None (the default),
+    then results are from [0, `low`). On entry the arguments are presumed
+    to have been validated for size and order for the np.int16 type.
+
+    Parameters
+    ----------
+    low : int or array-like
+        Lowest (signed) integer to be drawn from the distribution (unless
+        ``high=None``, in which case this parameter is the *highest* such
+        integer).
+    high : int or array-like
+        If provided, one above the largest (signed) integer to be drawn from the
+        distribution (see above for behavior if ``high=None``).
+    size : int or tuple of ints
+        Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
+        ``m * n * k`` samples are drawn.  Default is None, in which case a
+        single value is returned.
+    use_masked : bool
+        If True then rejection sampling with a range mask is used else Lemire's algorithm is used.
+    closed : bool
+        If True then sample from [low, high].  If False, sample [low, high)
+    state : bit generator
+        Bit generator state to use in the core random number generators
+    lock : threading.Lock
+        Lock to prevent multiple using a single generator simultaneously
+
+    Returns
+    -------
+    out : python scalar or ndarray of np.int16
+          `size`-shaped array of random integers from the appropriate
+          distribution, or a single such random int if `size` not provided.
+
+    Notes
+    -----
+    The internal integer generator produces values from the closed
+    interval [low, high-(not closed)].  This requires some care since
+    high can be out-of-range for uint16. The scalar path leaves
+    integers as Python integers until the 1 has been subtracted to
+    avoid needing to cast to a larger type.
+    """
+    cdef np.ndarray out_arr, low_arr, high_arr
+    cdef uint16_t rng, off, out_val
+    cdef uint16_t *out_data
+    cdef np.npy_intp i, n, cnt
+
+    if size is not None:
+        if (np.prod(size) == 0):
+            return np.empty(size, dtype=np.int16)
+
+    low_arr = <np.ndarray>np.array(low, copy=False)
+    high_arr = <np.ndarray>np.array(high, copy=False)
+    low_ndim = np.PyArray_NDIM(low_arr)
+    high_ndim = np.PyArray_NDIM(high_arr)
+    if ((low_ndim == 0 or (low_ndim == 1 and low_arr.size == 1 and size is not None)) and
+            (high_ndim == 0 or (high_ndim == 1 and high_arr.size == 1 and size is not None))):
+        low = int(low_arr)
+        high = int(high_arr)
+        # Subtract 1 since internal generator produces on closed interval [low, high]
+        if not closed:
+            high -= 1
+
+        if low < -0x8000L:
+            raise ValueError("low is out of bounds for int16")
+        if high > 0x7FFFL:
+            raise ValueError("high is out of bounds for int16")
+        if low > high:  # -1 already subtracted, closed interval
+            comp = '>' if closed else '>='
+            raise ValueError('low {comp} high'.format(comp=comp))
+
+        rng = <uint16_t>(high - low)
+        off = <uint16_t>(<int16_t>low)
+        if size is None:
+            with lock:
+                random_bounded_uint16_fill(state, off, rng, 1, use_masked, &out_val)
+            return np.int16(<int16_t>out_val)
+        else:
+            out_arr = <np.ndarray>np.empty(size, np.int16)
+            cnt = np.PyArray_SIZE(out_arr)
+            out_data = <uint16_t *>np.PyArray_DATA(out_arr)
+            with lock, nogil:
+                random_bounded_uint16_fill(state, off, rng, cnt, use_masked, out_data)
+            return out_arr
+    return _rand_int16_broadcast(low_arr, high_arr, size, use_masked, closed, state, lock)
+
+cdef object _rand_int8(object low, object high, object size,
+                             bint use_masked, bint closed,
+                             bitgen_t *state, object lock):
+    """
+    _rand_int8(low, high, size, use_masked, *state, lock)
+
+    Return random np.int8 integers from `low` (inclusive) to `high` (exclusive).
+
+    Return random integers from the "discrete uniform" distribution in the
+    interval [`low`, `high`).  If `high` is None (the default),
+    then results are from [0, `low`). On entry the arguments are presumed
+    to have been validated for size and order for the np.int8 type.
+
+    Parameters
+    ----------
+    low : int or array-like
+        Lowest (signed) integer to be drawn from the distribution (unless
+        ``high=None``, in which case this parameter is the *highest* such
+        integer).
+    high : int or array-like
+        If provided, one above the largest (signed) integer to be drawn from the
+        distribution (see above for behavior if ``high=None``).
+    size : int or tuple of ints
+        Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
+        ``m * n * k`` samples are drawn.  Default is None, in which case a
+        single value is returned.
+    use_masked : bool
+        If True then rejection sampling with a range mask is used else Lemire's algorithm is used.
+    closed : bool
+        If True then sample from [low, high].  If False, sample [low, high)
+    state : bit generator
+        Bit generator state to use in the core random number generators
+    lock : threading.Lock
+        Lock to prevent multiple using a single generator simultaneously
+
+    Returns
+    -------
+    out : python scalar or ndarray of np.int8
+          `size`-shaped array of random integers from the appropriate
+          distribution, or a single such random int if `size` not provided.
+
+    Notes
+    -----
+    The internal integer generator produces values from the closed
+    interval [low, high-(not closed)].  This requires some care since
+    high can be out-of-range for uint8. The scalar path leaves
+    integers as Python integers until the 1 has been subtracted to
+    avoid needing to cast to a larger type.
+    """
+    cdef np.ndarray out_arr, low_arr, high_arr
+    cdef uint8_t rng, off, out_val
+    cdef uint8_t *out_data
+    cdef np.npy_intp i, n, cnt
+
+    if size is not None:
+        if (np.prod(size) == 0):
+            return np.empty(size, dtype=np.int8)
+
+    low_arr = <np.ndarray>np.array(low, copy=False)
+    high_arr = <np.ndarray>np.array(high, copy=False)
+    low_ndim = np.PyArray_NDIM(low_arr)
+    high_ndim = np.PyArray_NDIM(high_arr)
+    if ((low_ndim == 0 or (low_ndim == 1 and low_arr.size == 1 and size is not None)) and
+            (high_ndim == 0 or (high_ndim == 1 and high_arr.size == 1 and size is not None))):
+        low = int(low_arr)
+        high = int(high_arr)
+        # Subtract 1 since internal generator produces on closed interval [low, high]
+        if not closed:
+            high -= 1
+
+        if low < -0x80L:
+            raise ValueError("low is out of bounds for int8")
+        if high > 0x7FL:
+            raise ValueError("high is out of bounds for int8")
+        if low > high:  # -1 already subtracted, closed interval
+            comp = '>' if closed else '>='
+            raise ValueError('low {comp} high'.format(comp=comp))
+
+        rng = <uint8_t>(high - low)
+        off = <uint8_t>(<int8_t>low)
+        if size is None:
+            with lock:
+                random_bounded_uint8_fill(state, off, rng, 1, use_masked, &out_val)
+            return np.int8(<int8_t>out_val)
+        else:
+            out_arr = <np.ndarray>np.empty(size, np.int8)
+            cnt = np.PyArray_SIZE(out_arr)
+            out_data = <uint8_t *>np.PyArray_DATA(out_arr)
+            with lock, nogil:
+                random_bounded_uint8_fill(state, off, rng, cnt, use_masked, out_data)
+            return out_arr
+    return _rand_int8_broadcast(low_arr, high_arr, size, use_masked, closed, state, lock)
diff --git a/numpy/random/bounded_integers.pyx.in b/numpy/random/_bounded_integers.pyx.in
index 411b65a37..47cb13b3a 100644
--- a/numpy/random/bounded_integers.pyx.in
+++ b/numpy/random/_bounded_integers.pyx.in
@@ -4,12 +4,54 @@
 import numpy as np
 cimport numpy as np
 
-from .distributions cimport *
-
 __all__ = []
 
 np.import_array()
 
+cdef extern from "include/distributions.h":
+    # Generate random numbers in closed interval [off, off + rng].
+    uint64_t random_bounded_uint64(bitgen_t *bitgen_state,
+                                   uint64_t off, uint64_t rng,
+                                   uint64_t mask, bint use_masked) nogil
+    uint32_t random_buffered_bounded_uint32(bitgen_t *bitgen_state,
+                                            uint32_t off, uint32_t rng,
+                                            uint32_t mask, bint use_masked,
+                                            int *bcnt, uint32_t *buf) nogil
+    uint16_t random_buffered_bounded_uint16(bitgen_t *bitgen_state,
+                                            uint16_t off, uint16_t rng,
+                                            uint16_t mask, bint use_masked,
+                                            int *bcnt, uint32_t *buf) nogil
+    uint8_t random_buffered_bounded_uint8(bitgen_t *bitgen_state,
+                                          uint8_t off, uint8_t rng,
+                                          uint8_t mask, bint use_masked,
+                                          int *bcnt, uint32_t *buf) nogil
+    np.npy_bool random_buffered_bounded_bool(bitgen_t *bitgen_state,
+                                             np.npy_bool off, np.npy_bool rng,
+                                             np.npy_bool mask, bint use_masked,
+                                             int *bcnt, uint32_t *buf) nogil
+    void random_bounded_uint64_fill(bitgen_t *bitgen_state,
+                                    uint64_t off, uint64_t rng, np.npy_intp cnt,
+                                    bint use_masked,
+                                    uint64_t *out) nogil
+    void random_bounded_uint32_fill(bitgen_t *bitgen_state,
+                                    uint32_t off, uint32_t rng, np.npy_intp cnt,
+                                    bint use_masked,
+                                    uint32_t *out) nogil
+    void random_bounded_uint16_fill(bitgen_t *bitgen_state,
+                                    uint16_t off, uint16_t rng, np.npy_intp cnt,
+                                    bint use_masked,
+                                    uint16_t *out) nogil
+    void random_bounded_uint8_fill(bitgen_t *bitgen_state,
+                                   uint8_t off, uint8_t rng, np.npy_intp cnt,
+                                   bint use_masked,
+                                   uint8_t *out) nogil
+    void random_bounded_bool_fill(bitgen_t *bitgen_state,
+                                  np.npy_bool off, np.npy_bool rng, np.npy_intp cnt,
+                                  bint use_masked,
+                                  np.npy_bool *out) nogil
+
+
+
 _integers_types = {'bool': (0, 2),
                  'int8': (-2**7, 2**7),
                  'int16': (-2**15, 2**15),
diff --git a/numpy/random/common.pxd b/numpy/random/_common.pxd
index ac0a94bb0..74bebca83 100644
--- a/numpy/random/common.pxd
+++ b/numpy/random/_common.pxd
@@ -1,23 +1,12 @@
 #cython: language_level=3
 
-from libc.stdint cimport (uint8_t, uint16_t, uint32_t, uint64_t,
-                          int8_t, int16_t, int32_t, int64_t, intptr_t,
-                          uintptr_t)
-from libc.math cimport sqrt
-
-cdef extern from "src/bitgen.h":
-    struct bitgen:
-        void *state
-        uint64_t (*next_uint64)(void *st) nogil
-        uint32_t (*next_uint32)(void *st) nogil
-        double (*next_double)(void *st) nogil
-        uint64_t (*next_raw)(void *st) nogil
-
-    ctypedef bitgen bitgen_t
+from libc.stdint cimport uint32_t, uint64_t, int32_t, int64_t
 
 import numpy as np
 cimport numpy as np
 
+from ._bit_generator cimport bitgen_t
+
 cdef double POISSON_LAM_MAX
 cdef double LEGACY_POISSON_LAM_MAX
 cdef uint64_t MAXSIZE
@@ -44,7 +33,7 @@ cdef object prepare_ctypes(bitgen_t *bitgen)
 cdef int check_constraint(double val, object name, constraint_type cons) except -1
 cdef int check_array_constraint(np.ndarray val, object name, constraint_type cons) except -1
 
-cdef extern from "src/aligned_malloc/aligned_malloc.h":
+cdef extern from "include/aligned_malloc.h":
     cdef void *PyArray_realloc_aligned(void *p, size_t n)
     cdef void *PyArray_malloc_aligned(size_t n)
     cdef void *PyArray_calloc_aligned(size_t n, size_t s)
@@ -56,6 +45,7 @@ ctypedef double (*random_double_1)(void *state, double a) nogil
 ctypedef double (*random_double_2)(void *state, double a, double b) nogil
 ctypedef double (*random_double_3)(void *state, double a, double b, double c) nogil
 
+ctypedef double (*random_float_fill)(bitgen_t *state, np.npy_intp count, float* out) nogil
 ctypedef float (*random_float_0)(bitgen_t *state) nogil
 ctypedef float (*random_float_1)(bitgen_t *state, float a) nogil
 
diff --git a/numpy/random/common.pyx b/numpy/random/_common.pyx
index 74cd5f033..ef1afac7c 100644
--- a/numpy/random/common.pyx
+++ b/numpy/random/_common.pyx
@@ -6,7 +6,7 @@ import sys
 import numpy as np
 cimport numpy as np
 
-from .common cimport *
+from libc.stdint cimport uintptr_t
 
 __all__ = ['interface']
 
@@ -262,14 +262,16 @@ cdef object double_fill(void *func, bitgen_t *state, object size, object lock, o
     return out_array
 
 cdef object float_fill(void *func, bitgen_t *state, object size, object lock, object out):
-    cdef random_float_0 random_func = (<random_float_0>func)
+    cdef random_float_fill random_func = (<random_float_fill>func)
+    cdef float out_val
     cdef float *out_array_data
     cdef np.ndarray out_array
     cdef np.npy_intp i, n
 
     if size is None and out is None:
         with lock:
-            return random_func(state)
+            random_func(state, 1, &out_val)
+            return out_val
 
     if out is not None:
         check_output(out, np.float32, size)
@@ -280,8 +282,7 @@ cdef object float_fill(void *func, bitgen_t *state, object size, object lock, ob
     n = np.PyArray_SIZE(out_array)
     out_array_data = <float *>np.PyArray_DATA(out_array)
     with lock, nogil:
-        for i in range(n):
-            out_array_data[i] = random_func(state)
+        random_func(state, n, out_array_data)
     return out_array
 
 cdef object float_fill_from_double(void *func, bitgen_t *state, object size, object lock, object out):
diff --git a/numpy/random/generator.pyx b/numpy/random/_generator.pyx
index df7485a97..b842d6a32 100644
--- a/numpy/random/generator.pyx
+++ b/numpy/random/_generator.pyx
@@ -3,36 +3,159 @@
 import operator
 import warnings
 
-import numpy as np
-from numpy.core.multiarray import normalize_axis_index
-
-from .bounded_integers import _integers_types
-from .pcg64 import PCG64
-
 from cpython.pycapsule cimport PyCapsule_IsValid, PyCapsule_GetPointer
 from cpython cimport (Py_INCREF, PyFloat_AsDouble)
-from libc cimport string
 
 cimport cython
+import numpy as np
 cimport numpy as np
+from numpy.core.multiarray import normalize_axis_index
 
-from .bounded_integers cimport *
-from .common cimport *
-from .distributions cimport *
+from libc cimport string
+from libc.stdint cimport (uint8_t, uint16_t, uint32_t, uint64_t,
+                          int32_t, int64_t, INT64_MAX, SIZE_MAX)
+from ._bounded_integers cimport (_rand_bool, _rand_int32, _rand_int64,
+         _rand_int16, _rand_int8, _rand_uint64, _rand_uint32, _rand_uint16,
+         _rand_uint8, _gen_mask)
+from ._bounded_integers import _integers_types
+from ._pcg64 import PCG64
+from ._bit_generator cimport bitgen_t
+from ._common cimport (POISSON_LAM_MAX, CONS_POSITIVE, CONS_NONE,
+            CONS_NON_NEGATIVE, CONS_BOUNDED_0_1, CONS_BOUNDED_GT_0_1,
+            CONS_GT_1, CONS_POSITIVE_NOT_NAN, CONS_POISSON,
+            double_fill, cont, kahan_sum, cont_broadcast_3, float_fill, cont_f,
+            check_array_constraint, check_constraint, disc, discrete_broadcast_iii,
+        )
+
+
+cdef extern from "include/distributions.h":
+
+    struct s_binomial_t:
+        int has_binomial
+        double psave
+        int64_t nsave
+        double r
+        double q
+        double fm
+        int64_t m
+        double p1
+        double xm
+        double xl
+        double xr
+        double c
+        double laml
+        double lamr
+        double p2
+        double p3
+        double p4
+
+    ctypedef s_binomial_t binomial_t
+
+    double random_standard_uniform(bitgen_t *bitgen_state) nogil
+    void random_standard_uniform_fill(bitgen_t* bitgen_state, np.npy_intp cnt, double *out) nogil
+    double random_standard_exponential(bitgen_t *bitgen_state) nogil
+    void random_standard_exponential_fill(bitgen_t *bitgen_state, np.npy_intp cnt, double *out) nogil
+    double random_standard_exponential_zig(bitgen_t *bitgen_state) nogil
+    void random_standard_exponential_zig_fill(bitgen_t *bitgen_state, np.npy_intp cnt, double *out) nogil
+    double random_standard_normal(bitgen_t* bitgen_state) nogil
+    void random_standard_normal_fill(bitgen_t *bitgen_state, np.npy_intp count, double *out) nogil
+    void random_standard_normal_fill_f(bitgen_t *bitgen_state, np.npy_intp count, float *out) nogil
+    double random_standard_gamma(bitgen_t *bitgen_state, double shape) nogil
+
+    float random_standard_uniform_f(bitgen_t *bitgen_state) nogil
+    void random_standard_uniform_fill_f(bitgen_t* bitgen_state, np.npy_intp cnt, float *out) nogil
+    float random_standard_exponential_f(bitgen_t *bitgen_state) nogil
+    float random_standard_exponential_zig_f(bitgen_t *bitgen_state) nogil
+    void random_standard_exponential_fill_f(bitgen_t *bitgen_state, np.npy_intp cnt, float *out) nogil
+    void random_standard_exponential_zig_fill_f(bitgen_t *bitgen_state, np.npy_intp cnt, float *out) nogil
+    float random_standard_normal_f(bitgen_t* bitgen_state) nogil
+    float random_standard_gamma_f(bitgen_t *bitgen_state, float shape) nogil
+
+    int64_t random_positive_int64(bitgen_t *bitgen_state) nogil
+    int32_t random_positive_int32(bitgen_t *bitgen_state) nogil
+    int64_t random_positive_int(bitgen_t *bitgen_state) nogil
+    uint64_t random_uint(bitgen_t *bitgen_state) nogil
+
+    double random_normal(bitgen_t *bitgen_state, double loc, double scale) nogil
+
+    double random_gamma(bitgen_t *bitgen_state, double shape, double scale) nogil
+    float random_gamma_f(bitgen_t *bitgen_state, float shape, float scale) nogil
+
+    double random_exponential(bitgen_t *bitgen_state, double scale) nogil
+    double random_uniform(bitgen_t *bitgen_state, double lower, double range) nogil
+    double random_beta(bitgen_t *bitgen_state, double a, double b) nogil
+    double random_chisquare(bitgen_t *bitgen_state, double df) nogil
+    double random_f(bitgen_t *bitgen_state, double dfnum, double dfden) nogil
+    double random_standard_cauchy(bitgen_t *bitgen_state) nogil
+    double random_pareto(bitgen_t *bitgen_state, double a) nogil
+    double random_weibull(bitgen_t *bitgen_state, double a) nogil
+    double random_power(bitgen_t *bitgen_state, double a) nogil
+    double random_laplace(bitgen_t *bitgen_state, double loc, double scale) nogil
+    double random_gumbel(bitgen_t *bitgen_state, double loc, double scale) nogil
+    double random_logistic(bitgen_t *bitgen_state, double loc, double scale) nogil
+    double random_lognormal(bitgen_t *bitgen_state, double mean, double sigma) nogil
+    double random_rayleigh(bitgen_t *bitgen_state, double mode) nogil
+    double random_standard_t(bitgen_t *bitgen_state, double df) nogil
+    double random_noncentral_chisquare(bitgen_t *bitgen_state, double df,
+                                       double nonc) nogil
+    double random_noncentral_f(bitgen_t *bitgen_state, double dfnum,
+                               double dfden, double nonc) nogil
+    double random_wald(bitgen_t *bitgen_state, double mean, double scale) nogil
+    double random_vonmises(bitgen_t *bitgen_state, double mu, double kappa) nogil
+    double random_triangular(bitgen_t *bitgen_state, double left, double mode,
+                             double right) nogil
+
+    int64_t random_poisson(bitgen_t *bitgen_state, double lam) nogil
+    int64_t random_negative_binomial(bitgen_t *bitgen_state, double n, double p) nogil
+    int64_t random_binomial(bitgen_t *bitgen_state, double p, int64_t n, binomial_t *binomial) nogil
+    int64_t random_logseries(bitgen_t *bitgen_state, double p) nogil
+    int64_t random_geometric_search(bitgen_t *bitgen_state, double p) nogil
+    int64_t random_geometric_inversion(bitgen_t *bitgen_state, double p) nogil
+    int64_t random_geometric(bitgen_t *bitgen_state, double p) nogil
+    int64_t random_zipf(bitgen_t *bitgen_state, double a) nogil
+    int64_t random_hypergeometric(bitgen_t *bitgen_state, int64_t good, int64_t bad,
+                                    int64_t sample) nogil
+
+    uint64_t random_interval(bitgen_t *bitgen_state, uint64_t max) nogil
+
+    # Generate random uint64 numbers in closed interval [off, off + rng].
+    uint64_t random_bounded_uint64(bitgen_t *bitgen_state,
+                                   uint64_t off, uint64_t rng,
+                                   uint64_t mask, bint use_masked) nogil
+
+    void random_multinomial(bitgen_t *bitgen_state, int64_t n, int64_t *mnix,
+                            double *pix, np.npy_intp d, binomial_t *binomial) nogil
+
+    int random_mvhg_count(bitgen_t *bitgen_state,
+                          int64_t total,
+                          size_t num_colors, int64_t *colors,
+                          int64_t nsample,
+                          size_t num_variates, int64_t *variates) nogil
+    void random_mvhg_marginals(bitgen_t *bitgen_state,
+                               int64_t total,
+                               size_t num_colors, int64_t *colors,
+                               int64_t nsample,
+                               size_t num_variates, int64_t *variates) nogil
 
+np.import_array()
 
-__all__ = ['Generator', 'beta', 'binomial', 'bytes', 'chisquare', 'choice',
-           'dirichlet', 'exponential', 'f', 'gamma',
-           'geometric', 'gumbel', 'hypergeometric', 'integers', 'laplace',
-           'logistic', 'lognormal', 'logseries', 'multinomial',
-           'multivariate_normal', 'negative_binomial', 'noncentral_chisquare',
-           'noncentral_f', 'normal', 'pareto', 'permutation',
-           'poisson', 'power', 'random',  'rayleigh', 'shuffle',
-           'standard_cauchy', 'standard_exponential', 'standard_gamma',
-           'standard_normal', 'standard_t', 'triangular',
-           'uniform', 'vonmises', 'wald', 'weibull', 'zipf']
 
-np.import_array()
+cdef int64_t _safe_sum_nonneg_int64(size_t num_colors, int64_t *colors):
+    """
+    Sum the values in the array `colors`.
+
+    Return -1 if an overflow occurs.
+    The values in *colors are assumed to be nonnegative.
+    """
+    cdef size_t i
+    cdef int64_t sum
+
+    sum = 0
+    for i in range(num_colors):
+        if colors[i] > INT64_MAX - sum:
+            return -1
+        sum += colors[i]
+    return sum
 
 
 cdef bint _check_bit_generator(object bitgen):
@@ -193,9 +316,9 @@ cdef class Generator:
         cdef double temp
         key = np.dtype(dtype).name
         if key == 'float64':
-            return double_fill(&random_double_fill, &self._bitgen, size, self.lock, out)
+            return double_fill(&random_standard_uniform_fill, &self._bitgen, size, self.lock, out)
         elif key == 'float32':
-            return float_fill(&random_float, &self._bitgen, size, self.lock, out)
+            return float_fill(&random_standard_uniform_fill_f, &self._bitgen, size, self.lock, out)
         else:
             raise TypeError('Unsupported dtype "%s" for random' % key)
 
@@ -341,9 +464,9 @@ cdef class Generator:
                 return double_fill(&random_standard_exponential_fill, &self._bitgen, size, self.lock, out)
         elif key == 'float32':
             if method == u'zig':
-                return float_fill(&random_standard_exponential_zig_f, &self._bitgen, size, self.lock, out)
+                return float_fill(&random_standard_exponential_zig_fill_f, &self._bitgen, size, self.lock, out)
             else:
-                return float_fill(&random_standard_exponential_f, &self._bitgen, size, self.lock, out)
+                return float_fill(&random_standard_exponential_fill_f, &self._bitgen, size, self.lock, out)
         else:
             raise TypeError('Unsupported dtype "%s" for standard_exponential'
                             % key)
@@ -781,7 +904,6 @@ cdef class Generator:
         --------
         integers : Discrete uniform distribution, yielding integers.
         random : Floats uniformly distributed over ``[0, 1)``.
-        random : Alias for `random`.
 
         Notes
         -----
@@ -920,9 +1042,9 @@ cdef class Generator:
         """
         key = np.dtype(dtype).name
         if key == 'float64':
-            return double_fill(&random_gauss_zig_fill, &self._bitgen, size, self.lock, out)
+            return double_fill(&random_standard_normal_fill, &self._bitgen, size, self.lock, out)
         elif key == 'float32':
-            return float_fill(&random_gauss_zig_f, &self._bitgen, size, self.lock, out)
+            return float_fill(&random_standard_normal_fill_f, &self._bitgen, size, self.lock, out)
 
         else:
             raise TypeError('Unsupported dtype "%s" for standard_normal' % key)
@@ -1023,7 +1145,7 @@ cdef class Generator:
                [ 0.39924804,  4.68456316,  4.99394529,  4.84057254]])  # random
 
         """
-        return cont(&random_normal_zig, &self._bitgen, size, self.lock, 2,
+        return cont(&random_normal, &self._bitgen, size, self.lock, 2,
                     loc, '', CONS_NONE,
                     scale, 'scale', CONS_NON_NEGATIVE,
                     0.0, '', CONS_NONE,
@@ -1109,13 +1231,13 @@ cdef class Generator:
         cdef void *func
         key = np.dtype(dtype).name
         if key == 'float64':
-            return cont(&random_standard_gamma_zig, &self._bitgen, size, self.lock, 1,
+            return cont(&random_standard_gamma, &self._bitgen, size, self.lock, 1,
                         shape, 'shape', CONS_NON_NEGATIVE,
                         0.0, '', CONS_NONE,
                         0.0, '', CONS_NONE,
                         out)
         if key == 'float32':
-            return cont_f(&random_standard_gamma_zig_f, &self._bitgen, size, self.lock,
+            return cont_f(&random_standard_gamma_f, &self._bitgen, size, self.lock,
                           shape, 'shape', CONS_NON_NEGATIVE,
                           out)
         else:
@@ -3147,6 +3269,8 @@ cdef class Generator:
 
         See Also
         --------
+        multivariate_hypergeometric : Draw samples from the multivariate
+            hypergeometric distribution.
         scipy.stats.hypergeom : probability density function, distribution or
             cumulative density function, etc.
 
@@ -3645,6 +3769,222 @@ cdef class Generator:
 
         return multin
 
+    def multivariate_hypergeometric(self, object colors, object nsample,
+                                    size=None, method='marginals'):
+        """
+        multivariate_hypergeometric(colors, nsample, size=None,
+                                    method='marginals')
+
+        Generate variates from a multivariate hypergeometric distribution.
+
+        The multivariate hypergeometric distribution is a generalization
+        of the hypergeometric distribution.
+
+        Choose ``nsample`` items at random without replacement from a
+        collection with ``N`` distinct types.  ``N`` is the length of
+        ``colors``, and the values in ``colors`` are the number of occurrences
+        of that type in the collection.  The total number of items in the
+        collection is ``sum(colors)``.  Each random variate generated by this
+        function is a vector of length ``N`` holding the counts of the
+        different types that occurred in the ``nsample`` items.
+
+        The name ``colors`` comes from a common description of the
+        distribution: it is the probability distribution of the number of
+        marbles of each color selected without replacement from an urn
+        containing marbles of different colors; ``colors[i]`` is the number
+        of marbles in the urn with color ``i``.
+
+        Parameters
+        ----------
+        colors : sequence of integers
+            The number of each type of item in the collection from which
+            a sample is drawn.  The values in ``colors`` must be nonnegative.
+            To avoid loss of precision in the algorithm, ``sum(colors)``
+            must be less than ``10**9`` when `method` is "marginals".
+        nsample : int
+            The number of items selected.  ``nsample`` must not be greater
+            than ``sum(colors)``.
+        size : int or tuple of ints, optional
+            The number of variates to generate, either an integer or a tuple
+            holding the shape of the array of variates.  If the given size is,
+            e.g., ``(k, m)``, then ``k * m`` variates are drawn, where one
+            variate is a vector of length ``len(colors)``, and the return value
+            has shape ``(k, m, len(colors))``.  If `size` is an integer, the
+            output has shape ``(size, len(colors))``.  Default is None, in
+            which case a single variate is returned as an array with shape
+            ``(len(colors),)``.
+        method : string, optional
+            Specify the algorithm that is used to generate the variates.
+            Must be 'count' or 'marginals' (the default).  See the Notes
+            for a description of the methods.
+
+        Returns
+        -------
+        variates : ndarray
+            Array of variates drawn from the multivariate hypergeometric
+            distribution.
+
+        See Also
+        --------
+        hypergeometric : Draw samples from the (univariate) hypergeometric
+            distribution.
+
+        Notes
+        -----
+        The two methods do not return the same sequence of variates.
+
+        The "count" algorithm is roughly equivalent to the following numpy
+        code::
+
+            choices = np.repeat(np.arange(len(colors)), colors)
+            selection = np.random.choice(choices, nsample, replace=False)
+            variate = np.bincount(selection, minlength=len(colors))
+
+        The "count" algorithm uses a temporary array of integers with length
+        ``sum(colors)``.
+
+        The "marginals" algorithm generates a variate by using repeated
+        calls to the univariate hypergeometric sampler.  It is roughly
+        equivalent to::
+
+            variate = np.zeros(len(colors), dtype=np.int64)
+            # `remaining` is the cumulative sum of `colors` from the last
+            # element to the first; e.g. if `colors` is [3, 1, 5], then
+            # `remaining` is [9, 6, 5].
+            remaining = np.cumsum(colors[::-1])[::-1]
+            for i in range(len(colors)-1):
+                if nsample < 1:
+                    break
+                variate[i] = hypergeometric(colors[i], remaining[i+1],
+                                           nsample)
+                nsample -= variate[i]
+            variate[-1] = nsample
+
+        The default method is "marginals".  For some cases (e.g. when
+        `colors` contains relatively small integers), the "count" method
+        can be significantly faster than the "marginals" method.  If
+        performance of the algorithm is important, test the two methods
+        with typical inputs to decide which works best.
+
+        .. versionadded:: 1.18.0
+
+        Examples
+        --------
+        >>> colors = [16, 8, 4]
+        >>> seed = 4861946401452
+        >>> gen = np.random.Generator(np.random.PCG64(seed))
+        >>> gen.multivariate_hypergeometric(colors, 6)
+        array([5, 0, 1])
+        >>> gen.multivariate_hypergeometric(colors, 6, size=3)
+        array([[5, 0, 1],
+               [2, 2, 2],
+               [3, 3, 0]])
+        >>> gen.multivariate_hypergeometric(colors, 6, size=(2, 2))
+        array([[[3, 2, 1],
+                [3, 2, 1]],
+               [[4, 1, 1],
+                [3, 2, 1]]])
+        """
+        cdef int64_t nsamp
+        cdef size_t num_colors
+        cdef int64_t total
+        cdef int64_t *colors_ptr
+        cdef int64_t max_index
+        cdef size_t num_variates
+        cdef int64_t *variates_ptr
+        cdef int result
+
+        if method not in ['count', 'marginals']:
+            raise ValueError('method must be "count" or "marginals".')
+
+        try:
+            operator.index(nsample)
+        except TypeError:
+            raise ValueError('nsample must be an integer')
+
+        if nsample < 0:
+            raise ValueError("nsample must be nonnegative.")
+        if nsample > INT64_MAX:
+            raise ValueError("nsample must not exceed %d" % INT64_MAX)
+        nsamp = nsample
+
+        # Validation of colors, a 1-d sequence of nonnegative integers.
+        invalid_colors = False
+        try:
+            colors = np.asarray(colors)
+            if colors.ndim != 1:
+                invalid_colors = True
+            elif colors.size > 0 and not np.issubdtype(colors.dtype,
+                                                       np.integer):
+                invalid_colors = True
+            elif np.any((colors < 0) | (colors > INT64_MAX)):
+                invalid_colors = True
+        except ValueError:
+            invalid_colors = True
+        if invalid_colors:
+            raise ValueError('colors must be a one-dimensional sequence '
+                             'of nonnegative integers not exceeding %d.' %
+                             INT64_MAX)
+
+        colors = np.ascontiguousarray(colors, dtype=np.int64)
+        num_colors = colors.size
+
+        colors_ptr = <int64_t *> np.PyArray_DATA(colors)
+
+        total = _safe_sum_nonneg_int64(num_colors, colors_ptr)
+        if total == -1:
+            raise ValueError("sum(colors) must not exceed the maximum value "
+                             "of a 64 bit signed integer (%d)" % INT64_MAX)
+
+        if method == 'marginals' and total >= 1000000000:
+            raise ValueError('When method is "marginals", sum(colors) must '
+                             'be less than 1000000000.')
+
+        # The C code that implements the 'count' method will malloc an
+        # array of size total*sizeof(size_t). Here we ensure that that
+        # product does not overflow.
+        if SIZE_MAX > <uint64_t>INT64_MAX:
+            max_index = INT64_MAX // sizeof(size_t)
+        else:
+            max_index = SIZE_MAX // sizeof(size_t)
+        if method == 'count' and total > max_index:
+            raise ValueError("When method is 'count', sum(colors) must not "
+                             "exceed %d" % max_index)
+        if nsamp > total:
+            raise ValueError("nsample > sum(colors)")
+
+        # Figure out the shape of the return array.
+        if size is None:
+            shape = (num_colors,)
+        elif np.isscalar(size):
+            shape = (size, num_colors)
+        else:
+            shape = tuple(size) + (num_colors,)
+        variates = np.zeros(shape, dtype=np.int64)
+
+        if num_colors == 0:
+            return variates
+
+        # One variate is a vector of length num_colors.
+        num_variates = variates.size // num_colors
+        variates_ptr = <int64_t *> np.PyArray_DATA(variates)
+
+        if method == 'count':
+            with self.lock, nogil:
+                result = random_mvhg_count(&self._bitgen, total,
+                                           num_colors, colors_ptr, nsamp,
+                                           num_variates, variates_ptr)
+            if result == -1:
+                raise MemoryError("Insufficent memory for multivariate_"
+                                  "hypergeometric with method='count' and "
+                                  "sum(colors)=%d" % total)
+        else:
+            with self.lock, nogil:
+                random_mvhg_marginals(&self._bitgen, total,
+                                      num_colors, colors_ptr, nsamp,
+                                      num_variates, variates_ptr)
+        return variates
+
     def dirichlet(self, object alpha, size=None):
         """
         dirichlet(alpha, size=None)
@@ -3773,7 +4113,7 @@ cdef class Generator:
             while i < totsize:
                 acc = 0.0
                 for j in range(k):
-                    val_data[i+j] = random_standard_gamma_zig(&self._bitgen,
+                    val_data[i+j] = random_standard_gamma(&self._bitgen,
                                                               alpha_data[j])
                     acc = acc + val_data[i + j]
                 invacc = 1/acc
@@ -4003,19 +4343,18 @@ def default_rng(seed=None):
 
     Parameters
     ----------
-    seed : {None, int, array_like[ints], ISeedSequence, BitGenerator, Generator}, optional
+    seed : {None, int, array_like[ints], SeedSequence, BitGenerator, Generator}, optional
         A seed to initialize the `BitGenerator`. If None, then fresh,
         unpredictable entropy will be pulled from the OS. If an ``int`` or
         ``array_like[ints]`` is passed, then it will be passed to
         `SeedSequence` to derive the initial `BitGenerator` state. One may also
-        pass in an implementor of the `ISeedSequence` interface like
-        `SeedSequence`.
+        pass in a`SeedSequence` instance
         Additionally, when passed a `BitGenerator`, it will be wrapped by
         `Generator`. If passed a `Generator`, it will be returned unaltered.
 
     Notes
     -----
-    When `seed` is omitted or ``None``, a new `BitGenerator` and `Generator` will
+    When ``seed`` is omitted or ``None``, a new `BitGenerator` and `Generator` will
     be instantiated each time. This function does not manage a default global
     instance.
     """
diff --git a/numpy/random/mt19937.pyx b/numpy/random/_mt19937.pyx
index 7d0f6cd22..e99652b73 100644
--- a/numpy/random/mt19937.pyx
+++ b/numpy/random/_mt19937.pyx
@@ -3,8 +3,8 @@ import operator
 import numpy as np
 cimport numpy as np
 
-from .common cimport *
-from .bit_generator cimport BitGenerator, SeedSequence
+from libc.stdint cimport uint32_t, uint64_t
+from ._bit_generator cimport BitGenerator, SeedSequence
 
 __all__ = ['MT19937']
 
@@ -48,13 +48,12 @@ cdef class MT19937(BitGenerator):
 
     Parameters
     ----------
-    seed : {None, int, array_like[ints], ISeedSequence}, optional
+    seed : {None, int, array_like[ints], SeedSequence}, optional
         A seed to initialize the `BitGenerator`. If None, then fresh,
         unpredictable entropy will be pulled from the OS. If an ``int`` or
         ``array_like[ints]`` is passed, then it will be passed to
         `SeedSequence` to derive the initial `BitGenerator` state. One may also
-        pass in an implementor of the `ISeedSequence` interface like
-        `SeedSequence`.
+        pass in a `SeedSequence` instance.
 
     Attributes
     ----------
diff --git a/numpy/random/pcg64.pyx b/numpy/random/_pcg64.pyx
index 585520139..1a5d852a2 100644
--- a/numpy/random/pcg64.pyx
+++ b/numpy/random/_pcg64.pyx
@@ -1,8 +1,9 @@
 import numpy as np
 cimport numpy as np
 
-from .common cimport *
-from .bit_generator cimport BitGenerator
+from libc.stdint cimport uint32_t, uint64_t
+from ._common cimport uint64_to_double, wrap_int
+from ._bit_generator cimport BitGenerator
 
 __all__ = ['PCG64']
 
@@ -43,13 +44,12 @@ cdef class PCG64(BitGenerator):
 
     Parameters
     ----------
-    seed : {None, int, array_like[ints], ISeedSequence}, optional
+    seed : {None, int, array_like[ints], SeedSequence}, optional
         A seed to initialize the `BitGenerator`. If None, then fresh,
         unpredictable entropy will be pulled from the OS. If an ``int`` or
         ``array_like[ints]`` is passed, then it will be passed to
         `SeedSequence` to derive the initial `BitGenerator` state. One may also
-        pass in an implementor of the `ISeedSequence` interface like
-        `SeedSequence`.
+        pass in a `SeedSequence` instance.
 
     Notes
     -----
diff --git a/numpy/random/philox.pyx b/numpy/random/_philox.pyx
index 8b7683017..9f136c32f 100644
--- a/numpy/random/philox.pyx
+++ b/numpy/random/_philox.pyx
@@ -6,9 +6,11 @@ except ImportError:
     from dummy_threading import Lock
 
 import numpy as np
+cimport numpy as np
 
-from .common cimport *
-from .bit_generator cimport BitGenerator
+from libc.stdint cimport uint32_t, uint64_t
+from ._common cimport uint64_to_double, int_to_array, wrap_int
+from ._bit_generator cimport BitGenerator
 
 __all__ = ['Philox']
 
@@ -62,21 +64,20 @@ cdef class Philox(BitGenerator):
 
     Parameters
     ----------
-    seed : {None, int, array_like[ints], ISeedSequence}, optional
+    seed : {None, int, array_like[ints], SeedSequence}, optional
         A seed to initialize the `BitGenerator`. If None, then fresh,
         unpredictable entropy will be pulled from the OS. If an ``int`` or
         ``array_like[ints]`` is passed, then it will be passed to
         `SeedSequence` to derive the initial `BitGenerator` state. One may also
-        pass in an implementor of the `ISeedSequence` interface like
-        `SeedSequence`.
+        pass in a `SeedSequence` instance.
     counter : {None, int, array_like}, optional
         Counter to use in the Philox state. Can be either
         a Python int (long in 2.x) in [0, 2**256) or a 4-element uint64 array.
         If not provided, the RNG is initialized at 0.
     key : {None, int, array_like}, optional
-        Key to use in the Philox state.  Unlike seed, the value in key is
+        Key to use in the Philox state.  Unlike ``seed``, the value in key is
         directly set. Can be either a Python int in [0, 2**128) or a 2-element
-        uint64 array. `key` and `seed` cannot both be used.
+        uint64 array. `key` and ``seed`` cannot both be used.
 
     Attributes
     ----------
@@ -108,10 +109,10 @@ cdef class Philox(BitGenerator):
     randoms produced. The second is a key which determined the sequence
     produced. Using different keys produces independent sequences.
 
-    The input seed is processed by `SeedSequence` to generate the key. The
+    The input ``seed`` is processed by `SeedSequence` to generate the key. The
     counter is set to 0.
 
-    Alternately, one can omit the seed parameter and set the ``key`` and
+    Alternately, one can omit the ``seed`` parameter and set the ``key`` and
     ``counter`` directly.
 
     **Parallel Features**
@@ -146,7 +147,7 @@ cdef class Philox(BitGenerator):
 
     **Compatibility Guarantee**
 
-    ``Philox`` makes a guarantee that a fixed seed will always produce
+    ``Philox`` makes a guarantee that a fixed ``seed`` will always produce
     the same random integer stream.
 
     Examples
diff --git a/numpy/random/_pickle.py b/numpy/random/_pickle.py
index 3b58f21e8..29ff69644 100644
--- a/numpy/random/_pickle.py
+++ b/numpy/random/_pickle.py
@@ -1,10 +1,10 @@
 from .mtrand import RandomState
-from .philox import Philox
-from .pcg64 import PCG64
-from .sfc64 import SFC64
+from ._philox import Philox
+from ._pcg64 import PCG64
+from ._sfc64 import SFC64
 
-from .generator import Generator
-from .mt19937 import MT19937
+from ._generator import Generator
+from ._mt19937 import MT19937
 
 BitGenerators = {'MT19937': MT19937,
                  'PCG64': PCG64,
diff --git a/numpy/random/sfc64.pyx b/numpy/random/_sfc64.pyx
index a881096e9..1633669d5 100644
--- a/numpy/random/sfc64.pyx
+++ b/numpy/random/_sfc64.pyx
@@ -1,8 +1,9 @@
 import numpy as np
 cimport numpy as np
 
-from .common cimport *
-from .bit_generator cimport BitGenerator
+from libc.stdint cimport uint32_t, uint64_t
+from ._common cimport uint64_to_double
+from ._bit_generator cimport BitGenerator
 
 __all__ = ['SFC64']
 
@@ -38,13 +39,12 @@ cdef class SFC64(BitGenerator):
 
     Parameters
     ----------
-    seed : {None, int, array_like[ints], ISeedSequence}, optional
+    seed : {None, int, array_like[ints], SeedSequence}, optional
         A seed to initialize the `BitGenerator`. If None, then fresh,
         unpredictable entropy will be pulled from the OS. If an ``int`` or
         ``array_like[ints]`` is passed, then it will be passed to
         `SeedSequence` to derive the initial `BitGenerator` state. One may also
-        pass in an implementor of the `ISeedSequence` interface like
-        `SeedSequence`.
+        pass in a `SeedSequence` instance.
 
     Notes
     -----
diff --git a/numpy/random/distributions.pxd b/numpy/random/distributions.pxd
deleted file mode 100644
index 75edaee9d..000000000
--- a/numpy/random/distributions.pxd
+++ /dev/null
@@ -1,140 +0,0 @@
-#cython: language_level=3
-
-from .common cimport (uint8_t, uint16_t, uint32_t, uint64_t,
-                          int32_t, int64_t, bitgen_t)
-import numpy as np
-cimport numpy as np
-
-cdef extern from "src/distributions/distributions.h":
-
-    struct s_binomial_t:
-        int has_binomial
-        double psave
-        int64_t nsave
-        double r
-        double q
-        double fm
-        int64_t m
-        double p1
-        double xm
-        double xl
-        double xr
-        double c
-        double laml
-        double lamr
-        double p2
-        double p3
-        double p4
-
-    ctypedef s_binomial_t binomial_t
-
-    double random_double(bitgen_t *bitgen_state) nogil
-    void random_double_fill(bitgen_t* bitgen_state, np.npy_intp cnt, double *out) nogil
-    double random_standard_exponential(bitgen_t *bitgen_state) nogil
-    void random_standard_exponential_fill(bitgen_t *bitgen_state, np.npy_intp cnt, double *out) nogil
-    double random_standard_exponential_zig(bitgen_t *bitgen_state) nogil
-    void random_standard_exponential_zig_fill(bitgen_t *bitgen_state, np.npy_intp cnt, double *out) nogil
-    double random_gauss_zig(bitgen_t* bitgen_state) nogil
-    void random_gauss_zig_fill(bitgen_t *bitgen_state, np.npy_intp count, double *out) nogil
-    double random_standard_gamma_zig(bitgen_t *bitgen_state, double shape) nogil
-
-    float random_float(bitgen_t *bitgen_state) nogil
-    float random_standard_exponential_f(bitgen_t *bitgen_state) nogil
-    float random_standard_exponential_zig_f(bitgen_t *bitgen_state) nogil
-    float random_gauss_zig_f(bitgen_t* bitgen_state) nogil
-    float random_standard_gamma_f(bitgen_t *bitgen_state, float shape) nogil
-    float random_standard_gamma_zig_f(bitgen_t *bitgen_state, float shape) nogil
-
-    int64_t random_positive_int64(bitgen_t *bitgen_state) nogil
-    int32_t random_positive_int32(bitgen_t *bitgen_state) nogil
-    int64_t random_positive_int(bitgen_t *bitgen_state) nogil
-    uint64_t random_uint(bitgen_t *bitgen_state) nogil
-
-    double random_normal_zig(bitgen_t *bitgen_state, double loc, double scale) nogil
-
-    double random_gamma(bitgen_t *bitgen_state, double shape, double scale) nogil
-    float random_gamma_float(bitgen_t *bitgen_state, float shape, float scale) nogil
-
-    double random_exponential(bitgen_t *bitgen_state, double scale) nogil
-    double random_uniform(bitgen_t *bitgen_state, double lower, double range) nogil
-    double random_beta(bitgen_t *bitgen_state, double a, double b) nogil
-    double random_chisquare(bitgen_t *bitgen_state, double df) nogil
-    double random_f(bitgen_t *bitgen_state, double dfnum, double dfden) nogil
-    double random_standard_cauchy(bitgen_t *bitgen_state) nogil
-    double random_pareto(bitgen_t *bitgen_state, double a) nogil
-    double random_weibull(bitgen_t *bitgen_state, double a) nogil
-    double random_power(bitgen_t *bitgen_state, double a) nogil
-    double random_laplace(bitgen_t *bitgen_state, double loc, double scale) nogil
-    double random_gumbel(bitgen_t *bitgen_state, double loc, double scale) nogil
-    double random_logistic(bitgen_t *bitgen_state, double loc, double scale) nogil
-    double random_lognormal(bitgen_t *bitgen_state, double mean, double sigma) nogil
-    double random_rayleigh(bitgen_t *bitgen_state, double mode) nogil
-    double random_standard_t(bitgen_t *bitgen_state, double df) nogil
-    double random_noncentral_chisquare(bitgen_t *bitgen_state, double df,
-                                       double nonc) nogil
-    double random_noncentral_f(bitgen_t *bitgen_state, double dfnum,
-                               double dfden, double nonc) nogil
-    double random_wald(bitgen_t *bitgen_state, double mean, double scale) nogil
-    double random_vonmises(bitgen_t *bitgen_state, double mu, double kappa) nogil
-    double random_triangular(bitgen_t *bitgen_state, double left, double mode,
-                             double right) nogil
-
-    int64_t random_poisson(bitgen_t *bitgen_state, double lam) nogil
-    int64_t random_negative_binomial(bitgen_t *bitgen_state, double n, double p) nogil
-    int64_t random_binomial(bitgen_t *bitgen_state, double p, int64_t n, binomial_t *binomial) nogil
-    int64_t random_logseries(bitgen_t *bitgen_state, double p) nogil
-    int64_t random_geometric_search(bitgen_t *bitgen_state, double p) nogil
-    int64_t random_geometric_inversion(bitgen_t *bitgen_state, double p) nogil
-    int64_t random_geometric(bitgen_t *bitgen_state, double p) nogil
-    int64_t random_zipf(bitgen_t *bitgen_state, double a) nogil
-    int64_t random_hypergeometric(bitgen_t *bitgen_state, int64_t good, int64_t bad,
-                                    int64_t sample) nogil
-
-    uint64_t random_interval(bitgen_t *bitgen_state, uint64_t max) nogil
-
-    # Generate random uint64 numbers in closed interval [off, off + rng].
-    uint64_t random_bounded_uint64(bitgen_t *bitgen_state,
-                                   uint64_t off, uint64_t rng,
-                                   uint64_t mask, bint use_masked) nogil
-
-    # Generate random uint32 numbers in closed interval [off, off + rng].
-    uint32_t random_buffered_bounded_uint32(bitgen_t *bitgen_state,
-                                            uint32_t off, uint32_t rng,
-                                            uint32_t mask, bint use_masked,
-                                            int *bcnt, uint32_t *buf) nogil
-    uint16_t random_buffered_bounded_uint16(bitgen_t *bitgen_state,
-                                            uint16_t off, uint16_t rng,
-                                            uint16_t mask, bint use_masked,
-                                            int *bcnt, uint32_t *buf) nogil
-    uint8_t random_buffered_bounded_uint8(bitgen_t *bitgen_state,
-                                          uint8_t off, uint8_t rng,
-                                          uint8_t mask, bint use_masked,
-                                          int *bcnt, uint32_t *buf) nogil
-    np.npy_bool random_buffered_bounded_bool(bitgen_t *bitgen_state,
-                                             np.npy_bool off, np.npy_bool rng,
-                                             np.npy_bool mask, bint use_masked,
-                                             int *bcnt, uint32_t *buf) nogil
-
-    void random_bounded_uint64_fill(bitgen_t *bitgen_state,
-                                    uint64_t off, uint64_t rng, np.npy_intp cnt,
-                                    bint use_masked,
-                                    uint64_t *out) nogil
-    void random_bounded_uint32_fill(bitgen_t *bitgen_state,
-                                    uint32_t off, uint32_t rng, np.npy_intp cnt,
-                                    bint use_masked,
-                                    uint32_t *out) nogil
-    void random_bounded_uint16_fill(bitgen_t *bitgen_state,
-                                    uint16_t off, uint16_t rng, np.npy_intp cnt,
-                                    bint use_masked,
-                                    uint16_t *out) nogil
-    void random_bounded_uint8_fill(bitgen_t *bitgen_state,
-                                   uint8_t off, uint8_t rng, np.npy_intp cnt,
-                                   bint use_masked,
-                                   uint8_t *out) nogil
-    void random_bounded_bool_fill(bitgen_t *bitgen_state,
-                                  np.npy_bool off, np.npy_bool rng, np.npy_intp cnt,
-                                  bint use_masked,
-                                  np.npy_bool *out) nogil
-
-    void random_multinomial(bitgen_t *bitgen_state, int64_t n, int64_t *mnix,
-                            double *pix, np.npy_intp d, binomial_t *binomial) nogil
diff --git a/numpy/random/src/aligned_malloc/aligned_malloc.h b/numpy/random/include/aligned_malloc.h
index ea24f6d23..ea24f6d23 100644
--- a/numpy/random/src/aligned_malloc/aligned_malloc.h
+++ b/numpy/random/include/aligned_malloc.h
diff --git a/numpy/random/src/bitgen.h b/numpy/random/include/bitgen.h
index 0adaaf2ee..83c2858dd 100644
--- a/numpy/random/src/bitgen.h
+++ b/numpy/random/include/bitgen.h
@@ -6,7 +6,7 @@
 #include <stdbool.h>
 #include <stdint.h>
 
-/* Must match the declaration in numpy/random/common.pxd */
+/* Must match the declaration in numpy/random/<any>.pxd */
 
 typedef struct bitgen {
   void *state;
diff --git a/numpy/random/src/distributions/distributions.h b/numpy/random/include/distributions.h
index 2a6b2a045..c02ea605e 100644
--- a/numpy/random/src/distributions/distributions.h
+++ b/numpy/random/include/distributions.h
@@ -8,7 +8,7 @@
 #include <stdint.h>
 
 #include "numpy/npy_math.h"
-#include "src/bitgen.h"
+#include "include/bitgen.h"
 
 /*
  * RAND_INT_TYPE is used to share integer generators with RandomState which
@@ -59,28 +59,10 @@ typedef struct s_binomial_t {
   double p4;
 } binomial_t;
 
-/* Inline generators for internal use */
-static NPY_INLINE uint32_t next_uint32(bitgen_t *bitgen_state) {
-  return bitgen_state->next_uint32(bitgen_state->state);
-}
-
-static NPY_INLINE uint64_t next_uint64(bitgen_t *bitgen_state) {
-  return bitgen_state->next_uint64(bitgen_state->state);
-}
-
-static NPY_INLINE float next_float(bitgen_t *bitgen_state) {
-  return (next_uint32(bitgen_state) >> 9) * (1.0f / 8388608.0f);
-}
-
-static NPY_INLINE double next_double(bitgen_t *bitgen_state) {
-  return bitgen_state->next_double(bitgen_state->state);
-}
-
-DECLDIR double loggam(double x);
-
-DECLDIR float random_float(bitgen_t *bitgen_state);
-DECLDIR double random_double(bitgen_t *bitgen_state);
-DECLDIR void random_double_fill(bitgen_t *bitgen_state, npy_intp cnt, double *out);
+DECLDIR float random_standard_uniform_f(bitgen_t *bitgen_state);
+DECLDIR double random_standard_uniform(bitgen_t *bitgen_state);
+DECLDIR void random_standard_uniform_fill(bitgen_t *, npy_intp, double *);
+DECLDIR void random_standard_uniform_fill_f(bitgen_t *, npy_intp, float *);
 
 DECLDIR int64_t random_positive_int64(bitgen_t *bitgen_state);
 DECLDIR int32_t random_positive_int32(bitgen_t *bitgen_state);
@@ -88,37 +70,25 @@ DECLDIR int64_t random_positive_int(bitgen_t *bitgen_state);
 DECLDIR uint64_t random_uint(bitgen_t *bitgen_state);
 
 DECLDIR double random_standard_exponential(bitgen_t *bitgen_state);
-DECLDIR void random_standard_exponential_fill(bitgen_t *bitgen_state, npy_intp cnt,
-                                              double *out);
 DECLDIR float random_standard_exponential_f(bitgen_t *bitgen_state);
 DECLDIR double random_standard_exponential_zig(bitgen_t *bitgen_state);
-DECLDIR void random_standard_exponential_zig_fill(bitgen_t *bitgen_state,
-                                                  npy_intp cnt, double *out);
 DECLDIR float random_standard_exponential_zig_f(bitgen_t *bitgen_state);
-
-/*
-DECLDIR double random_gauss(bitgen_t *bitgen_state);
-DECLDIR float random_gauss_f(bitgen_t *bitgen_state);
-*/
-DECLDIR double random_gauss_zig(bitgen_t *bitgen_state);
-DECLDIR float random_gauss_zig_f(bitgen_t *bitgen_state);
-DECLDIR void random_gauss_zig_fill(bitgen_t *bitgen_state, npy_intp cnt,
-                                   double *out);
-
-/*
+DECLDIR void random_standard_exponential_fill(bitgen_t *, npy_intp, double *);
+DECLDIR void random_standard_exponential_fill_f(bitgen_t *, npy_intp, float *);
+DECLDIR void random_standard_exponential_zig_fill(bitgen_t *, npy_intp, double *);
+DECLDIR void random_standard_exponential_zig_fill_f(bitgen_t *, npy_intp, float *);
+
+DECLDIR double random_standard_normal(bitgen_t *bitgen_state);
+DECLDIR float random_standard_normal_f(bitgen_t *bitgen_state);
+DECLDIR void random_standard_normal_fill(bitgen_t *, npy_intp, double *);
+DECLDIR void random_standard_normal_fill_f(bitgen_t *, npy_intp, float *);
 DECLDIR double random_standard_gamma(bitgen_t *bitgen_state, double shape);
 DECLDIR float random_standard_gamma_f(bitgen_t *bitgen_state, float shape);
-*/
-DECLDIR double random_standard_gamma_zig(bitgen_t *bitgen_state, double shape);
-DECLDIR float random_standard_gamma_zig_f(bitgen_t *bitgen_state, float shape);
 
-/*
 DECLDIR double random_normal(bitgen_t *bitgen_state, double loc, double scale);
-*/
-DECLDIR double random_normal_zig(bitgen_t *bitgen_state, double loc, double scale);
 
 DECLDIR double random_gamma(bitgen_t *bitgen_state, double shape, double scale);
-DECLDIR float random_gamma_float(bitgen_t *bitgen_state, float shape, float scale);
+DECLDIR float random_gamma_f(bitgen_t *bitgen_state, float shape, float scale);
 
 DECLDIR double random_exponential(bitgen_t *bitgen_state, double scale);
 DECLDIR double random_uniform(bitgen_t *bitgen_state, double lower, double range);
@@ -146,27 +116,16 @@ DECLDIR double random_triangular(bitgen_t *bitgen_state, double left, double mod
 
 DECLDIR RAND_INT_TYPE random_poisson(bitgen_t *bitgen_state, double lam);
 DECLDIR RAND_INT_TYPE random_negative_binomial(bitgen_t *bitgen_state, double n,
-                                         double p);
-
-DECLDIR RAND_INT_TYPE random_binomial_btpe(bitgen_t *bitgen_state,
-                                           RAND_INT_TYPE n,
-                                           double p,
-                                           binomial_t *binomial);
-DECLDIR RAND_INT_TYPE random_binomial_inversion(bitgen_t *bitgen_state,
-                                                RAND_INT_TYPE n,
-                                                double p,
-                                                binomial_t *binomial);
+                                 double p);
+
 DECLDIR int64_t random_binomial(bitgen_t *bitgen_state, double p,
                                 int64_t n, binomial_t *binomial);
 
 DECLDIR RAND_INT_TYPE random_logseries(bitgen_t *bitgen_state, double p);
-DECLDIR RAND_INT_TYPE random_geometric_search(bitgen_t *bitgen_state, double p);
-DECLDIR RAND_INT_TYPE random_geometric_inversion(bitgen_t *bitgen_state, double p);
 DECLDIR RAND_INT_TYPE random_geometric(bitgen_t *bitgen_state, double p);
 DECLDIR RAND_INT_TYPE random_zipf(bitgen_t *bitgen_state, double a);
 DECLDIR int64_t random_hypergeometric(bitgen_t *bitgen_state,
                                       int64_t good, int64_t bad, int64_t sample);
-
 DECLDIR uint64_t random_interval(bitgen_t *bitgen_state, uint64_t max);
 
 /* Generate random uint64 numbers in closed interval [off, off + rng]. */
@@ -211,4 +170,33 @@ DECLDIR void random_bounded_bool_fill(bitgen_t *bitgen_state, npy_bool off,
 DECLDIR void random_multinomial(bitgen_t *bitgen_state, RAND_INT_TYPE n, RAND_INT_TYPE *mnix,
                                 double *pix, npy_intp d, binomial_t *binomial);
 
+/* multivariate hypergeometric, "count" method */
+DECLDIR int random_mvhg_count(bitgen_t *bitgen_state,
+                              int64_t total,
+                              size_t num_colors, int64_t *colors,
+                              int64_t nsample,
+                              size_t num_variates, int64_t *variates);
+
+/* multivariate hypergeometric, "marginals" method */
+DECLDIR void random_mvhg_marginals(bitgen_t *bitgen_state,
+                                   int64_t total,
+                                   size_t num_colors, int64_t *colors,
+                                   int64_t nsample,
+                                   size_t num_variates, int64_t *variates);
+
+/* Common to legacy-distributions.c and distributions.c but not exported */
+
+RAND_INT_TYPE random_binomial_btpe(bitgen_t *bitgen_state,
+                                   RAND_INT_TYPE n,
+                                   double p,
+                                   binomial_t *binomial);
+RAND_INT_TYPE random_binomial_inversion(bitgen_t *bitgen_state,
+                                        RAND_INT_TYPE n,
+                                        double p,
+                                        binomial_t *binomial);
+double random_loggam(double x);
+static NPY_INLINE double next_double(bitgen_t *bitgen_state) {
+    return bitgen_state->next_double(bitgen_state->state);
+}
+
 #endif
diff --git a/numpy/random/src/legacy/legacy-distributions.h b/numpy/random/include/legacy-distributions.h
index 4bc15d58e..6a0fc7dc4 100644
--- a/numpy/random/src/legacy/legacy-distributions.h
+++ b/numpy/random/include/legacy-distributions.h
@@ -2,7 +2,7 @@
 #define _RANDOMDGEN__DISTRIBUTIONS_LEGACY_H_
 
 
-#include "../distributions/distributions.h"
+#include "distributions.h"
 
 typedef struct aug_bitgen {
   bitgen_t *bit_generator;
diff --git a/numpy/random/legacy_distributions.pxd b/numpy/random/legacy_distributions.pxd
deleted file mode 100644
index c681388db..000000000
--- a/numpy/random/legacy_distributions.pxd
+++ /dev/null
@@ -1,50 +0,0 @@
-#cython: language_level=3
-
-from libc.stdint cimport int64_t
-
-import numpy as np
-cimport numpy as np
-
-from .distributions cimport bitgen_t, binomial_t
-
-cdef extern from "legacy-distributions.h":
-
-    struct aug_bitgen:
-        bitgen_t *bit_generator
-        int has_gauss
-        double gauss
-
-    ctypedef aug_bitgen aug_bitgen_t
-
-    double legacy_gauss(aug_bitgen_t *aug_state) nogil
-    double legacy_pareto(aug_bitgen_t *aug_state, double a) nogil
-    double legacy_weibull(aug_bitgen_t *aug_state, double a) nogil
-    double legacy_standard_gamma(aug_bitgen_t *aug_state, double shape) nogil
-    double legacy_normal(aug_bitgen_t *aug_state, double loc, double scale) nogil
-    double legacy_standard_t(aug_bitgen_t *aug_state, double df) nogil
-
-    double legacy_standard_exponential(aug_bitgen_t *aug_state) nogil
-    double legacy_power(aug_bitgen_t *aug_state, double a) nogil
-    double legacy_gamma(aug_bitgen_t *aug_state, double shape, double scale) nogil
-    double legacy_power(aug_bitgen_t *aug_state, double a) nogil
-    double legacy_chisquare(aug_bitgen_t *aug_state, double df) nogil
-    double legacy_noncentral_chisquare(aug_bitgen_t *aug_state, double df,
-                                    double nonc) nogil
-    double legacy_noncentral_f(aug_bitgen_t *aug_state, double dfnum, double dfden,
-                            double nonc) nogil
-    double legacy_wald(aug_bitgen_t *aug_state, double mean, double scale) nogil
-    double legacy_lognormal(aug_bitgen_t *aug_state, double mean, double sigma) nogil
-    int64_t legacy_random_binomial(bitgen_t *bitgen_state, double p,
-                                   int64_t n, binomial_t *binomial) nogil
-    int64_t legacy_negative_binomial(aug_bitgen_t *aug_state, double n, double p) nogil
-    int64_t legacy_random_hypergeometric(bitgen_t *bitgen_state, int64_t good, int64_t bad, int64_t sample) nogil
-    int64_t legacy_random_logseries(bitgen_t *bitgen_state, double p) nogil
-    int64_t legacy_random_poisson(bitgen_t *bitgen_state, double lam) nogil
-    int64_t legacy_random_zipf(bitgen_t *bitgen_state, double a) nogil
-    int64_t legacy_random_geometric(bitgen_t *bitgen_state, double p) nogil
-    void legacy_random_multinomial(bitgen_t *bitgen_state, long n, long *mnix, double *pix, np.npy_intp d, binomial_t *binomial) nogil
-    double legacy_standard_cauchy(aug_bitgen_t *state) nogil
-    double legacy_beta(aug_bitgen_t *aug_state, double a, double b) nogil
-    double legacy_f(aug_bitgen_t *aug_state, double dfnum, double dfden) nogil
-    double legacy_exponential(aug_bitgen_t *aug_state, double scale) nogil
-    double legacy_power(aug_bitgen_t *state, double a) nogil
diff --git a/numpy/random/mtrand.pyx b/numpy/random/mtrand.pyx
index c469a4645..683a771cc 100644
--- a/numpy/random/mtrand.pyx
+++ b/numpy/random/mtrand.pyx
@@ -5,19 +5,100 @@ import warnings
 
 import numpy as np
 
-from .bounded_integers import _integers_types
-from .mt19937 import MT19937 as _MT19937
 from cpython.pycapsule cimport PyCapsule_IsValid, PyCapsule_GetPointer
 from cpython cimport (Py_INCREF, PyFloat_AsDouble)
-from libc cimport string
-
 cimport cython
 cimport numpy as np
 
-from .bounded_integers cimport *
-from .common cimport *
-from .distributions cimport *
-from .legacy_distributions cimport *
+from libc cimport string
+from libc.stdint cimport int64_t, uint64_t
+from ._bounded_integers cimport (_rand_bool, _rand_int32, _rand_int64,
+         _rand_int16, _rand_int8, _rand_uint64, _rand_uint32, _rand_uint16,
+         _rand_uint8,)
+from ._bounded_integers import _integers_types
+from ._mt19937 import MT19937 as _MT19937
+from ._bit_generator cimport bitgen_t
+from ._common cimport (POISSON_LAM_MAX, CONS_POSITIVE, CONS_NONE,
+            CONS_NON_NEGATIVE, CONS_BOUNDED_0_1, CONS_BOUNDED_GT_0_1, CONS_GTE_1,
+            CONS_GT_1, LEGACY_CONS_POISSON,
+            double_fill, cont, kahan_sum, cont_broadcast_3,
+            check_array_constraint, check_constraint, disc, discrete_broadcast_iii,
+        )
+
+cdef extern from "include/distributions.h":
+    struct s_binomial_t:
+        int has_binomial
+        double psave
+        int64_t nsave
+        double r
+        double q
+        double fm
+        int64_t m
+        double p1
+        double xm
+        double xl
+        double xr
+        double c
+        double laml
+        double lamr
+        double p2
+        double p3
+        double p4
+
+    ctypedef s_binomial_t binomial_t
+
+    void random_standard_uniform_fill(bitgen_t* bitgen_state, np.npy_intp cnt, double *out) nogil
+    int64_t random_positive_int(bitgen_t *bitgen_state) nogil
+    double random_uniform(bitgen_t *bitgen_state, double lower, double range) nogil
+    double random_vonmises(bitgen_t *bitgen_state, double mu, double kappa) nogil
+    double random_laplace(bitgen_t *bitgen_state, double loc, double scale) nogil
+    double random_gumbel(bitgen_t *bitgen_state, double loc, double scale) nogil
+    double random_logistic(bitgen_t *bitgen_state, double loc, double scale) nogil
+    double random_rayleigh(bitgen_t *bitgen_state, double mode) nogil
+    double random_triangular(bitgen_t *bitgen_state, double left, double mode,
+                                 double right) nogil
+    uint64_t random_interval(bitgen_t *bitgen_state, uint64_t max) nogil
+
+cdef extern from "include/legacy-distributions.h":
+    struct aug_bitgen:
+        bitgen_t *bit_generator
+        int has_gauss
+        double gauss
+
+    ctypedef aug_bitgen aug_bitgen_t
+
+    double legacy_gauss(aug_bitgen_t *aug_state) nogil
+    double legacy_pareto(aug_bitgen_t *aug_state, double a) nogil
+    double legacy_weibull(aug_bitgen_t *aug_state, double a) nogil
+    double legacy_standard_gamma(aug_bitgen_t *aug_state, double shape) nogil
+    double legacy_normal(aug_bitgen_t *aug_state, double loc, double scale) nogil
+    double legacy_standard_t(aug_bitgen_t *aug_state, double df) nogil
+
+    double legacy_standard_exponential(aug_bitgen_t *aug_state) nogil
+    double legacy_power(aug_bitgen_t *aug_state, double a) nogil
+    double legacy_gamma(aug_bitgen_t *aug_state, double shape, double scale) nogil
+    double legacy_power(aug_bitgen_t *aug_state, double a) nogil
+    double legacy_chisquare(aug_bitgen_t *aug_state, double df) nogil
+    double legacy_noncentral_chisquare(aug_bitgen_t *aug_state, double df,
+                                    double nonc) nogil
+    double legacy_noncentral_f(aug_bitgen_t *aug_state, double dfnum, double dfden,
+                            double nonc) nogil
+    double legacy_wald(aug_bitgen_t *aug_state, double mean, double scale) nogil
+    double legacy_lognormal(aug_bitgen_t *aug_state, double mean, double sigma) nogil
+    int64_t legacy_random_binomial(bitgen_t *bitgen_state, double p,
+                                   int64_t n, binomial_t *binomial) nogil
+    int64_t legacy_negative_binomial(aug_bitgen_t *aug_state, double n, double p) nogil
+    int64_t legacy_random_hypergeometric(bitgen_t *bitgen_state, int64_t good, int64_t bad, int64_t sample) nogil
+    int64_t legacy_random_logseries(bitgen_t *bitgen_state, double p) nogil
+    int64_t legacy_random_poisson(bitgen_t *bitgen_state, double lam) nogil
+    int64_t legacy_random_zipf(bitgen_t *bitgen_state, double a) nogil
+    int64_t legacy_random_geometric(bitgen_t *bitgen_state, double p) nogil
+    void legacy_random_multinomial(bitgen_t *bitgen_state, long n, long *mnix, double *pix, np.npy_intp d, binomial_t *binomial) nogil
+    double legacy_standard_cauchy(aug_bitgen_t *state) nogil
+    double legacy_beta(aug_bitgen_t *aug_state, double a, double b) nogil
+    double legacy_f(aug_bitgen_t *aug_state, double dfnum, double dfden) nogil
+    double legacy_exponential(aug_bitgen_t *aug_state, double scale) nogil
+    double legacy_power(aug_bitgen_t *state, double a) nogil
 
 np.import_array()
 
@@ -84,7 +165,7 @@ cdef class RandomState:
     --------
     Generator
     MT19937
-    :ref:`bit_generator`
+    numpy.random.BitGenerator
 
     """
     cdef public object _bit_generator
@@ -329,7 +410,7 @@ cdef class RandomState:
 
         """
         cdef double temp
-        return double_fill(&random_double_fill, &self._bitgen, size, self.lock, None)
+        return double_fill(&random_standard_uniform_fill, &self._bitgen, size, self.lock, None)
 
     def random(self, size=None):
         """
@@ -567,7 +648,7 @@ cdef class RandomState:
 
         See Also
         --------
-        random.random_integers : similar to `randint`, only for the closed
+        random_integers : similar to `randint`, only for the closed
             interval [`low`, `high`], and 1 is the lowest value if `high` is
             omitted.
 
@@ -985,7 +1066,7 @@ cdef class RandomState:
 
         .. note::
             This is a convenience function for users porting code from Matlab,
-            and wraps `numpy.random.random_sample`. That function takes a
+            and wraps `random_sample`. That function takes a
             tuple to specify the size of the output, which is consistent with
             other NumPy functions like `numpy.zeros` and `numpy.ones`.
 
@@ -1029,7 +1110,7 @@ cdef class RandomState:
 
         .. note::
             This is a convenience function for users porting code from Matlab,
-            and wraps `numpy.random.standard_normal`. That function takes a
+            and wraps `standard_normal`. That function takes a
             tuple to specify the size of the output, which is consistent with
             other NumPy functions like `numpy.zeros` and `numpy.ones`.
 
@@ -1289,8 +1370,8 @@ cdef class RandomState:
         The function has its peak at the mean, and its "spread" increases with
         the standard deviation (the function reaches 0.607 times its maximum at
         :math:`x + \\sigma` and :math:`x - \\sigma` [2]_).  This implies that
-        `numpy.random.normal` is more likely to return samples lying close to
-        the mean, rather than those far away.
+        normal is more likely to return samples lying close to the mean, rather
+        than those far away.
 
         References
         ----------
diff --git a/numpy/random/setup.py b/numpy/random/setup.py
index ce7f0565f..ca01250f4 100644
--- a/numpy/random/setup.py
+++ b/numpy/random/setup.py
@@ -61,32 +61,32 @@ def configuration(parent_package='', top_path=None):
 
     for gen in ['mt19937']:
         # gen.pyx, src/gen/gen.c, src/gen/gen-jump.c
-        config.add_extension(gen,
-                             sources=['{0}.c'.format(gen),
+        config.add_extension('_{0}'.format(gen),
+                             sources=['_{0}.c'.format(gen),
                                       'src/{0}/{0}.c'.format(gen),
                                       'src/{0}/{0}-jump.c'.format(gen)],
                              include_dirs=['.', 'src', join('src', gen)],
                              libraries=EXTRA_LIBRARIES,
                              extra_compile_args=EXTRA_COMPILE_ARGS,
                              extra_link_args=EXTRA_LINK_ARGS,
-                             depends=['%s.pyx' % gen],
+                             depends=['_%s.pyx' % gen],
                              define_macros=defs,
                              )
     for gen in ['philox', 'pcg64', 'sfc64']:
         # gen.pyx, src/gen/gen.c
         _defs = defs + PCG64_DEFS if gen == 'pcg64' else defs
-        config.add_extension(gen,
-                             sources=['{0}.c'.format(gen),
+        config.add_extension('_{0}'.format(gen),
+                             sources=['_{0}.c'.format(gen),
                                       'src/{0}/{0}.c'.format(gen)],
                              include_dirs=['.', 'src', join('src', gen)],
                              libraries=EXTRA_LIBRARIES,
                              extra_compile_args=EXTRA_COMPILE_ARGS,
                              extra_link_args=EXTRA_LINK_ARGS,
-                             depends=['%s.pyx' % gen, 'bit_generator.pyx',
+                             depends=['_%s.pyx' % gen, 'bit_generator.pyx',
                                       'bit_generator.pxd'],
                              define_macros=_defs,
                              )
-    for gen in ['common', 'bit_generator']:
+    for gen in ['_common', '_bit_generator']:
         # gen.pyx
         config.add_extension(gen,
                              sources=['{0}.c'.format(gen)],
@@ -100,9 +100,11 @@ def configuration(parent_package='', top_path=None):
     other_srcs = [
         'src/distributions/logfactorial.c',
         'src/distributions/distributions.c',
+        'src/distributions/random_mvhg_count.c',
+        'src/distributions/random_mvhg_marginals.c',
         'src/distributions/random_hypergeometric.c',
     ]
-    for gen in ['generator', 'bounded_integers']:
+    for gen in ['_generator', '_bounded_integers']:
         # gen.pyx, src/distributions/distributions.c
         config.add_extension(gen,
                              sources=['{0}.c'.format(gen)] + other_srcs,
@@ -114,7 +116,6 @@ def configuration(parent_package='', top_path=None):
                              define_macros=defs,
                              )
     config.add_extension('mtrand',
-                         # mtrand does not depend on random_hypergeometric.c.
                          sources=['mtrand.c',
                                   'src/legacy/legacy-distributions.c',
                                   'src/distributions/logfactorial.c',
diff --git a/numpy/random/src/aligned_malloc/aligned_malloc.c b/numpy/random/src/aligned_malloc/aligned_malloc.c
deleted file mode 100644
index 6e8192cfb..000000000
--- a/numpy/random/src/aligned_malloc/aligned_malloc.c
+++ /dev/null
@@ -1,9 +0,0 @@
-#include "aligned_malloc.h"
-
-static NPY_INLINE void *PyArray_realloc_aligned(void *p, size_t n);
-
-static NPY_INLINE void *PyArray_malloc_aligned(size_t n);
-
-static NPY_INLINE void *PyArray_calloc_aligned(size_t n, size_t s);
-
-static NPY_INLINE void PyArray_free_aligned(void *p);
-\ No newline at end of file
diff --git a/numpy/random/src/distributions/distributions.c b/numpy/random/src/distributions/distributions.c
index 1244ffe65..ab8de8bcb 100644
--- a/numpy/random/src/distributions/distributions.c
+++ b/numpy/random/src/distributions/distributions.c
@@ -1,4 +1,4 @@
-#include "distributions.h"
+#include "include/distributions.h"
 #include "ziggurat_constants.h"
 #include "logfactorial.h"
 
@@ -6,90 +6,52 @@
 #include <intrin.h>
 #endif
 
-/* Random generators for external use */
-float random_float(bitgen_t *bitgen_state) { return next_float(bitgen_state); }
-
-double random_double(bitgen_t *bitgen_state) {
-  return next_double(bitgen_state);
+/* Inline generators for internal use */
+static NPY_INLINE uint32_t next_uint32(bitgen_t *bitgen_state) {
+  return bitgen_state->next_uint32(bitgen_state->state);
 }
-
-static NPY_INLINE double next_standard_exponential(bitgen_t *bitgen_state) {
-  return -log(1.0 - next_double(bitgen_state));
+static NPY_INLINE uint64_t next_uint64(bitgen_t *bitgen_state) {
+  return bitgen_state->next_uint64(bitgen_state->state);
 }
 
-double random_standard_exponential(bitgen_t *bitgen_state) {
-  return next_standard_exponential(bitgen_state);
+static NPY_INLINE float next_float(bitgen_t *bitgen_state) {
+  return (next_uint32(bitgen_state) >> 9) * (1.0f / 8388608.0f);
 }
 
-void random_standard_exponential_fill(bitgen_t *bitgen_state, npy_intp cnt,
-                                      double *out) {
-  npy_intp i;
-  for (i = 0; i < cnt; i++) {
-    out[i] = next_standard_exponential(bitgen_state);
-  }
+/* Random generators for external use */
+float random_standard_uniform_f(bitgen_t *bitgen_state) {
+    return next_float(bitgen_state); 
 }
 
-float random_standard_exponential_f(bitgen_t *bitgen_state) {
-  return -logf(1.0f - next_float(bitgen_state));
+double random_standard_uniform(bitgen_t *bitgen_state) {
+    return next_double(bitgen_state);
 }
 
-void random_double_fill(bitgen_t *bitgen_state, npy_intp cnt, double *out) {
+void random_standard_uniform_fill(bitgen_t *bitgen_state, npy_intp cnt, double *out) {
   npy_intp i;
   for (i = 0; i < cnt; i++) {
     out[i] = next_double(bitgen_state);
   }
 }
-#if 0
-double random_gauss(bitgen_t *bitgen_state) {
-  if (bitgen_state->has_gauss) {
-    const double temp = bitgen_state->gauss;
-    bitgen_state->has_gauss = false;
-    bitgen_state->gauss = 0.0;
-    return temp;
-  } else {
-    double f, x1, x2, r2;
 
-    do {
-      x1 = 2.0 * next_double(bitgen_state) - 1.0;
-      x2 = 2.0 * next_double(bitgen_state) - 1.0;
-      r2 = x1 * x1 + x2 * x2;
-    } while (r2 >= 1.0 || r2 == 0.0);
-
-    /* Polar method, a more efficient version of the Box-Muller approach. */
-    f = sqrt(-2.0 * log(r2) / r2);
-    /* Keep for next call */
-    bitgen_state->gauss = f * x1;
-    bitgen_state->has_gauss = true;
-    return f * x2;
+void random_standard_uniform_fill_f(bitgen_t *bitgen_state, npy_intp cnt, float *out) {
+  npy_intp i;
+  for (i = 0; i < cnt; i++) {
+    out[i] = next_float(bitgen_state);
   }
 }
 
-float random_gauss_f(bitgen_t *bitgen_state) {
-  if (bitgen_state->has_gauss_f) {
-    const float temp = bitgen_state->gauss_f;
-    bitgen_state->has_gauss_f = false;
-    bitgen_state->gauss_f = 0.0f;
-    return temp;
-  } else {
-    float f, x1, x2, r2;
-
-    do {
-      x1 = 2.0f * next_float(bitgen_state) - 1.0f;
-      x2 = 2.0f * next_float(bitgen_state) - 1.0f;
-      r2 = x1 * x1 + x2 * x2;
-    } while (r2 >= 1.0 || r2 == 0.0);
+double random_standard_exponential(bitgen_t *bitgen_state) {
+    return -log(1.0 - next_double(bitgen_state));
+}
 
-    /* Polar method, a more efficient version of the Box-Muller approach. */
-    f = sqrtf(-2.0f * logf(r2) / r2);
-    /* Keep for next call */
-    bitgen_state->gauss_f = f * x1;
-    bitgen_state->has_gauss_f = true;
-    return f * x2;
+void random_standard_exponential_fill(bitgen_t * bitgen_state, npy_intp cnt, double * out)
+{
+  npy_intp i;
+  for (i = 0; i < cnt; i++) {
+    out[i] = random_standard_exponential(bitgen_state);
   }
 }
-#endif
-
-static NPY_INLINE double standard_exponential_zig(bitgen_t *bitgen_state);
 
 static double standard_exponential_zig_unlikely(bitgen_t *bitgen_state,
                                                 uint8_t idx, double x) {
@@ -101,11 +63,11 @@ static double standard_exponential_zig_unlikely(bitgen_t *bitgen_state,
              exp(-x)) {
     return x;
   } else {
-    return standard_exponential_zig(bitgen_state);
+    return random_standard_exponential_zig(bitgen_state);
   }
 }
 
-static NPY_INLINE double standard_exponential_zig(bitgen_t *bitgen_state) {
+double random_standard_exponential_zig(bitgen_t *bitgen_state) {
   uint64_t ri;
   uint8_t idx;
   double x;
@@ -120,20 +82,26 @@ static NPY_INLINE double standard_exponential_zig(bitgen_t *bitgen_state) {
   return standard_exponential_zig_unlikely(bitgen_state, idx, x);
 }
 
-double random_standard_exponential_zig(bitgen_t *bitgen_state) {
-  return standard_exponential_zig(bitgen_state);
+void random_standard_exponential_zig_fill(bitgen_t * bitgen_state, npy_intp cnt, double * out)
+{
+  npy_intp i;
+  for (i = 0; i < cnt; i++) {
+    out[i] = random_standard_exponential_zig(bitgen_state);
+  }
+}
+
+float random_standard_exponential_f(bitgen_t *bitgen_state) {
+  return -logf(1.0f - next_float(bitgen_state));
 }
 
-void random_standard_exponential_zig_fill(bitgen_t *bitgen_state, npy_intp cnt,
-                                          double *out) {
+void random_standard_exponential_fill_f(bitgen_t * bitgen_state, npy_intp cnt, float * out)
+{
   npy_intp i;
   for (i = 0; i < cnt; i++) {
-    out[i] = standard_exponential_zig(bitgen_state);
+    out[i] = random_standard_exponential_f(bitgen_state);
   }
 }
 
-static NPY_INLINE float standard_exponential_zig_f(bitgen_t *bitgen_state);
-
 static float standard_exponential_zig_unlikely_f(bitgen_t *bitgen_state,
                                                  uint8_t idx, float x) {
   if (idx == 0) {
@@ -144,11 +112,11 @@ static float standard_exponential_zig_unlikely_f(bitgen_t *bitgen_state,
              expf(-x)) {
     return x;
   } else {
-    return standard_exponential_zig_f(bitgen_state);
+    return random_standard_exponential_zig_f(bitgen_state);
   }
 }
 
-static NPY_INLINE float standard_exponential_zig_f(bitgen_t *bitgen_state) {
+float random_standard_exponential_zig_f(bitgen_t *bitgen_state) {
   uint32_t ri;
   uint8_t idx;
   float x;
@@ -163,11 +131,15 @@ static NPY_INLINE float standard_exponential_zig_f(bitgen_t *bitgen_state) {
   return standard_exponential_zig_unlikely_f(bitgen_state, idx, x);
 }
 
-float random_standard_exponential_zig_f(bitgen_t *bitgen_state) {
-  return standard_exponential_zig_f(bitgen_state);
+void random_standard_exponential_zig_fill_f(bitgen_t * bitgen_state, npy_intp cnt, float * out)
+{
+  npy_intp i;
+  for (i = 0; i < cnt; i++) {
+    out[i] = random_standard_exponential_zig_f(bitgen_state);
+  }
 }
 
-static NPY_INLINE double next_gauss_zig(bitgen_t *bitgen_state) {
+double random_standard_normal(bitgen_t *bitgen_state) {
   uint64_t r;
   int sign;
   uint64_t rabs;
@@ -202,18 +174,14 @@ static NPY_INLINE double next_gauss_zig(bitgen_t *bitgen_state) {
   }
 }
 
-double random_gauss_zig(bitgen_t *bitgen_state) {
-  return next_gauss_zig(bitgen_state);
-}
-
-void random_gauss_zig_fill(bitgen_t *bitgen_state, npy_intp cnt, double *out) {
+void random_standard_normal_fill(bitgen_t *bitgen_state, npy_intp cnt, double *out) {
   npy_intp i;
   for (i = 0; i < cnt; i++) {
-    out[i] = next_gauss_zig(bitgen_state);
+    out[i] = random_standard_normal(bitgen_state);
   }
 }
 
-float random_gauss_zig_f(bitgen_t *bitgen_state) {
+float random_standard_normal_f(bitgen_t *bitgen_state) {
   uint32_t r;
   int sign;
   uint32_t rabs;
@@ -247,101 +215,14 @@ float random_gauss_zig_f(bitgen_t *bitgen_state) {
   }
 }
 
-/*
-static NPY_INLINE double standard_gamma(bitgen_t *bitgen_state, double shape) {
-  double b, c;
-  double U, V, X, Y;
-
-  if (shape == 1.0) {
-    return random_standard_exponential(bitgen_state);
-  } else if (shape < 1.0) {
-    for (;;) {
-      U = next_double(bitgen_state);
-      V = random_standard_exponential(bitgen_state);
-      if (U <= 1.0 - shape) {
-        X = pow(U, 1. / shape);
-        if (X <= V) {
-          return X;
-        }
-      } else {
-        Y = -log((1 - U) / shape);
-        X = pow(1.0 - shape + shape * Y, 1. / shape);
-        if (X <= (V + Y)) {
-          return X;
-        }
-      }
-    }
-  } else {
-    b = shape - 1. / 3.;
-    c = 1. / sqrt(9 * b);
-    for (;;) {
-      do {
-        X = random_gauss(bitgen_state);
-        V = 1.0 + c * X;
-      } while (V <= 0.0);
-
-      V = V * V * V;
-      U = next_double(bitgen_state);
-      if (U < 1.0 - 0.0331 * (X * X) * (X * X))
-        return (b * V);
-      if (log(U) < 0.5 * X * X + b * (1. - V + log(V)))
-        return (b * V);
-    }
-  }
-}
-
-static NPY_INLINE float standard_gamma_float(bitgen_t *bitgen_state, float
-shape) { float b, c; float U, V, X, Y;
-
-  if (shape == 1.0f) {
-    return random_standard_exponential_f(bitgen_state);
-  } else if (shape < 1.0f) {
-    for (;;) {
-      U = next_float(bitgen_state);
-      V = random_standard_exponential_f(bitgen_state);
-      if (U <= 1.0f - shape) {
-        X = powf(U, 1.0f / shape);
-        if (X <= V) {
-          return X;
-        }
-      } else {
-        Y = -logf((1.0f - U) / shape);
-        X = powf(1.0f - shape + shape * Y, 1.0f / shape);
-        if (X <= (V + Y)) {
-          return X;
-        }
-      }
-    }
-  } else {
-    b = shape - 1.0f / 3.0f;
-    c = 1.0f / sqrtf(9.0f * b);
-    for (;;) {
-      do {
-        X = random_gauss_f(bitgen_state);
-        V = 1.0f + c * X;
-      } while (V <= 0.0f);
-
-      V = V * V * V;
-      U = next_float(bitgen_state);
-      if (U < 1.0f - 0.0331f * (X * X) * (X * X))
-        return (b * V);
-      if (logf(U) < 0.5f * X * X + b * (1.0f - V + logf(V)))
-        return (b * V);
-    }
+void random_standard_normal_fill_f(bitgen_t *bitgen_state, npy_intp cnt, float *out) {
+  npy_intp i;
+  for (i = 0; i < cnt; i++) {
+    out[i] = random_standard_normal_f(bitgen_state);
   }
 }
 
-
-double random_standard_gamma(bitgen_t *bitgen_state, double shape) {
-  return standard_gamma(bitgen_state, shape);
-}
-
-float random_standard_gamma_f(bitgen_t *bitgen_state, float shape) {
-  return standard_gamma_float(bitgen_state, shape);
-}
-*/
-
-static NPY_INLINE double standard_gamma_zig(bitgen_t *bitgen_state,
+double random_standard_gamma(bitgen_t *bitgen_state,
                                             double shape) {
   double b, c;
   double U, V, X, Y;
@@ -372,7 +253,7 @@ static NPY_INLINE double standard_gamma_zig(bitgen_t *bitgen_state,
     c = 1. / sqrt(9 * b);
     for (;;) {
       do {
-        X = random_gauss_zig(bitgen_state);
+        X = random_standard_normal(bitgen_state);
         V = 1.0 + c * X;
       } while (V <= 0.0);
 
@@ -387,7 +268,7 @@ static NPY_INLINE double standard_gamma_zig(bitgen_t *bitgen_state,
   }
 }
 
-static NPY_INLINE float standard_gamma_zig_f(bitgen_t *bitgen_state,
+float random_standard_gamma_f(bitgen_t *bitgen_state,
                                              float shape) {
   float b, c;
   float U, V, X, Y;
@@ -418,7 +299,7 @@ static NPY_INLINE float standard_gamma_zig_f(bitgen_t *bitgen_state,
     c = 1.0f / sqrtf(9.0f * b);
     for (;;) {
       do {
-        X = random_gauss_zig_f(bitgen_state);
+        X = random_standard_normal_f(bitgen_state);
         V = 1.0f + c * X;
       } while (V <= 0.0f);
 
@@ -433,14 +314,6 @@ static NPY_INLINE float standard_gamma_zig_f(bitgen_t *bitgen_state,
   }
 }
 
-double random_standard_gamma_zig(bitgen_t *bitgen_state, double shape) {
-  return standard_gamma_zig(bitgen_state, shape);
-}
-
-float random_standard_gamma_zig_f(bitgen_t *bitgen_state, float shape) {
-  return standard_gamma_zig_f(bitgen_state, shape);
-}
-
 int64_t random_positive_int64(bitgen_t *bitgen_state) {
   return next_uint64(bitgen_state) >> 1;
 }
@@ -470,10 +343,10 @@ uint64_t random_uint(bitgen_t *bitgen_state) {
  * algorithm comes from SPECFUN by Shanjie Zhang and Jianming Jin and their
  * book "Computation of Special Functions", 1996, John Wiley & Sons, Inc.
  *
- * If loggam(k+1) is being used to compute log(k!) for an integer k, consider
+ * If random_loggam(k+1) is being used to compute log(k!) for an integer k, consider
  * using logfactorial(k) instead.
  */
-double loggam(double x) {
+double random_loggam(double x) {
   double x0, x2, xp, gl, gl0;
   RAND_INT_TYPE k, n;
 
@@ -513,12 +386,12 @@ double random_normal(bitgen_t *bitgen_state, double loc, double scale) {
 }
 */
 
-double random_normal_zig(bitgen_t *bitgen_state, double loc, double scale) {
-  return loc + scale * random_gauss_zig(bitgen_state);
+double random_normal(bitgen_t *bitgen_state, double loc, double scale) {
+  return loc + scale * random_standard_normal(bitgen_state);
 }
 
 double random_exponential(bitgen_t *bitgen_state, double scale) {
-  return scale * standard_exponential_zig(bitgen_state);
+  return scale * random_standard_exponential_zig(bitgen_state);
 }
 
 double random_uniform(bitgen_t *bitgen_state, double lower, double range) {
@@ -526,11 +399,11 @@ double random_uniform(bitgen_t *bitgen_state, double lower, double range) {
 }
 
 double random_gamma(bitgen_t *bitgen_state, double shape, double scale) {
-  return scale * random_standard_gamma_zig(bitgen_state, shape);
+  return scale * random_standard_gamma(bitgen_state, shape);
 }
 
-float random_gamma_float(bitgen_t *bitgen_state, float shape, float scale) {
-  return scale * random_standard_gamma_zig_f(bitgen_state, shape);
+float random_gamma_f(bitgen_t *bitgen_state, float shape, float scale) {
+  return scale * random_standard_gamma_f(bitgen_state, shape);
 }
 
 double random_beta(bitgen_t *bitgen_state, double a, double b) {
@@ -562,14 +435,14 @@ double random_beta(bitgen_t *bitgen_state, double a, double b) {
       }
     }
   } else {
-    Ga = random_standard_gamma_zig(bitgen_state, a);
-    Gb = random_standard_gamma_zig(bitgen_state, b);
+    Ga = random_standard_gamma(bitgen_state, a);
+    Gb = random_standard_gamma(bitgen_state, b);
     return Ga / (Ga + Gb);
   }
 }
 
 double random_chisquare(bitgen_t *bitgen_state, double df) {
-  return 2.0 * random_standard_gamma_zig(bitgen_state, df / 2.0);
+  return 2.0 * random_standard_gamma(bitgen_state, df / 2.0);
 }
 
 double random_f(bitgen_t *bitgen_state, double dfnum, double dfden) {
@@ -578,22 +451,22 @@ double random_f(bitgen_t *bitgen_state, double dfnum, double dfden) {
 }
 
 double random_standard_cauchy(bitgen_t *bitgen_state) {
-  return random_gauss_zig(bitgen_state) / random_gauss_zig(bitgen_state);
+  return random_standard_normal(bitgen_state) / random_standard_normal(bitgen_state);
 }
 
 double random_pareto(bitgen_t *bitgen_state, double a) {
-  return exp(standard_exponential_zig(bitgen_state) / a) - 1;
+  return exp(random_standard_exponential_zig(bitgen_state) / a) - 1;
 }
 
 double random_weibull(bitgen_t *bitgen_state, double a) {
   if (a == 0.0) {
     return 0.0;
   }
-  return pow(standard_exponential_zig(bitgen_state), 1. / a);
+  return pow(random_standard_exponential_zig(bitgen_state), 1. / a);
 }
 
 double random_power(bitgen_t *bitgen_state, double a) {
-  return pow(1 - exp(-standard_exponential_zig(bitgen_state)), 1. / a);
+  return pow(1 - exp(-random_standard_exponential_zig(bitgen_state)), 1. / a);
 }
 
 double random_laplace(bitgen_t *bitgen_state, double loc, double scale) {
@@ -634,7 +507,7 @@ double random_logistic(bitgen_t *bitgen_state, double loc, double scale) {
 }
 
 double random_lognormal(bitgen_t *bitgen_state, double mean, double sigma) {
-  return exp(random_normal_zig(bitgen_state, mean, sigma));
+  return exp(random_normal(bitgen_state, mean, sigma));
 }
 
 double random_rayleigh(bitgen_t *bitgen_state, double mode) {
@@ -644,8 +517,8 @@ double random_rayleigh(bitgen_t *bitgen_state, double mode) {
 double random_standard_t(bitgen_t *bitgen_state, double df) {
   double num, denom;
 
-  num = random_gauss_zig(bitgen_state);
-  denom = random_standard_gamma_zig(bitgen_state, df / 2);
+  num = random_standard_normal(bitgen_state);
+  denom = random_standard_gamma(bitgen_state, df / 2);
   return sqrt(df / 2) * num / sqrt(denom);
 }
 
@@ -699,7 +572,7 @@ static RAND_INT_TYPE random_poisson_ptrs(bitgen_t *bitgen_state, double lam) {
     /* log(V) == log(0.0) ok here */
     /* if U==0.0 so that us==0.0, log is ok since always returns */
     if ((log(V) + log(invalpha) - log(a / (us * us) + b)) <=
-        (-lam + k * loglam - loggam(k + 1))) {
+        (-lam + k * loglam - random_loggam(k + 1))) {
       return k;
     }
   }
@@ -934,7 +807,7 @@ double random_noncentral_chisquare(bitgen_t *bitgen_state, double df,
   }
   if (1 < df) {
     const double Chi2 = random_chisquare(bitgen_state, df - 1);
-    const double n = random_gauss_zig(bitgen_state) + sqrt(nonc);
+    const double n = random_standard_normal(bitgen_state) + sqrt(nonc);
     return Chi2 + n * n;
   } else {
     const RAND_INT_TYPE i = random_poisson(bitgen_state, nonc / 2.0);
@@ -953,7 +826,7 @@ double random_wald(bitgen_t *bitgen_state, double mean, double scale) {
   double mu_2l;
 
   mu_2l = mean / (2 * scale);
-  Y = random_gauss_zig(bitgen_state);
+  Y = random_standard_normal(bitgen_state);
   Y = mean * Y * Y;
   X = mean + mu_2l * (Y - sqrt(4 * scale * Y + Y * Y));
   U = next_double(bitgen_state);
@@ -1092,8 +965,8 @@ RAND_INT_TYPE random_zipf(bitgen_t *bitgen_state, double a) {
   while (1) {
     double T, U, V, X;
 
-    U = 1.0 - random_double(bitgen_state);
-    V = random_double(bitgen_state);
+    U = 1.0 - next_double(bitgen_state);
+    V = next_double(bitgen_state);
     X = floor(pow(U, -1.0 / am1));
     /*
      * The real result may be above what can be represented in a signed
@@ -1297,10 +1170,7 @@ static NPY_INLINE uint64_t bounded_lemire_uint64(bitgen_t *bitgen_state,
 
   if (leftover < rng_excl) {
     /* `rng_excl` is a simple upper bound for `threshold`. */
-
-    const uint64_t threshold = -rng_excl % rng_excl;
-    /* Same as: threshold=((uint64_t)(0x10000000000000000ULLL - rng_excl)) %
-     * rng_excl; */
+    const uint64_t threshold = (UINT64_MAX - rng) % rng_excl;
 
     while (leftover < threshold) {
       m = ((__uint128_t)next_uint64(bitgen_state)) * rng_excl;
@@ -1323,10 +1193,7 @@ static NPY_INLINE uint64_t bounded_lemire_uint64(bitgen_t *bitgen_state,
 
   if (leftover < rng_excl) {
     /* `rng_excl` is a simple upper bound for `threshold`. */
-
-    const uint64_t threshold = -rng_excl % rng_excl;
-    /* Same as:threshold=((uint64_t)(0x10000000000000000ULLL - rng_excl)) %
-     * rng_excl; */
+    const uint64_t threshold = (UINT64_MAX - rng) % rng_excl;
 
     while (leftover < threshold) {
       x = next_uint64(bitgen_state);
@@ -1387,8 +1254,7 @@ static NPY_INLINE uint32_t buffered_bounded_lemire_uint32(
 
   if (leftover < rng_excl) {
     /* `rng_excl` is a simple upper bound for `threshold`. */
-    const uint32_t threshold = -rng_excl % rng_excl;
-    /* Same as: threshold=((uint64_t)(0x100000000ULL - rng_excl)) % rng_excl; */
+    const uint32_t threshold = (UINT32_MAX - rng) % rng_excl;
 
     while (leftover < threshold) {
       m = ((uint64_t)next_uint32(bitgen_state)) * rng_excl;
@@ -1422,8 +1288,7 @@ static NPY_INLINE uint16_t buffered_bounded_lemire_uint16(
 
   if (leftover < rng_excl) {
     /* `rng_excl` is a simple upper bound for `threshold`. */
-    const uint16_t threshold = -rng_excl % rng_excl;
-    /* Same as: threshold=((uint32_t)(0x10000ULL - rng_excl)) % rng_excl; */
+    const uint16_t threshold = (UINT16_MAX - rng) % rng_excl;
 
     while (leftover < threshold) {
       m = ((uint32_t)buffered_uint16(bitgen_state, bcnt, buf)) * rng_excl;
@@ -1458,8 +1323,7 @@ static NPY_INLINE uint8_t buffered_bounded_lemire_uint8(bitgen_t *bitgen_state,
 
   if (leftover < rng_excl) {
     /* `rng_excl` is a simple upper bound for `threshold`. */
-    const uint8_t threshold = -rng_excl % rng_excl;
-    /* Same as: threshold=((uint16_t)(0x100ULL - rng_excl)) % rng_excl; */
+    const uint8_t threshold = (UINT8_MAX - rng) % rng_excl;
 
     while (leftover < threshold) {
       m = ((uint16_t)buffered_uint8(bitgen_state, bcnt, buf)) * rng_excl;
diff --git a/numpy/random/src/distributions/random_hypergeometric.c b/numpy/random/src/distributions/random_hypergeometric.c
index 94dc6380f..da5ea9c68 100644
--- a/numpy/random/src/distributions/random_hypergeometric.c
+++ b/numpy/random/src/distributions/random_hypergeometric.c
@@ -1,4 +1,4 @@
-#include "distributions.h"
+#include "include/distributions.h"
 #include "logfactorial.h"
 #include <stdint.h>
 
@@ -188,8 +188,8 @@ static int64_t hypergeometric_hrua(bitgen_t *bitgen_state,
     while (1) {
         double U, V, X, T;
         double gp;
-        U = random_double(bitgen_state);
-        V = random_double(bitgen_state);  // "U star" in Stadlober (1989)
+        U = next_double(bitgen_state);
+        V = next_double(bitgen_state);  // "U star" in Stadlober (1989)
         X = a + h*(V - 0.5) / U;
 
         // fast rejection:
diff --git a/numpy/random/src/distributions/random_mvhg_count.c b/numpy/random/src/distributions/random_mvhg_count.c
new file mode 100644
index 000000000..9c0cc045d
--- /dev/null
+++ b/numpy/random/src/distributions/random_mvhg_count.c
@@ -0,0 +1,131 @@
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#include "include/distributions.h"
+
+/*
+ *  random_mvhg_count
+ *
+ *  Draw variates from the multivariate hypergeometric distribution--
+ *  the "count" algorithm.
+ *
+ *  Parameters
+ *  ----------
+ *  bitgen_t *bitgen_state
+ *      Pointer to a `bitgen_t` instance.
+ *  int64_t total
+ *      The sum of the values in the array `colors`.  (This is redundant
+ *      information, but we know the caller has already computed it, so
+ *      we might as well use it.)
+ *  size_t num_colors
+ *      The length of the `colors` array.
+ *  int64_t *colors
+ *      The array of colors (i.e. the number of each type in the collection
+ *      from which the random variate is drawn).
+ *  int64_t nsample
+ *      The number of objects drawn without replacement for each variate.
+ *      `nsample` must not exceed sum(colors).  This condition is not checked;
+ *      it is assumed that the caller has already validated the value.
+ *  size_t num_variates
+ *      The number of variates to be produced and put in the array
+ *      pointed to by `variates`.  One variate is a vector of length
+ *      `num_colors`, so the array pointed to by `variates` must have length
+ *      `num_variates * num_colors`.
+ *  int64_t *variates
+ *      The array that will hold the result.  It must have length
+ *      `num_variates * num_colors`.
+ *      The array is not initialized in the function; it is expected that the
+ *      array has been initialized with zeros when the function is called.
+ *
+ *  Notes
+ *  -----
+ *  The "count" algorithm for drawing one variate is roughly equivalent to the
+ *  following numpy code:
+ *
+ *      choices = np.repeat(np.arange(len(colors)), colors)
+ *      selection = np.random.choice(choices, nsample, replace=False)
+ *      variate = np.bincount(selection, minlength=len(colors))
+ *
+ *  This function uses a temporary array with length sum(colors).
+ *
+ *  Assumptions on the arguments (not checked in the function):
+ *    *  colors[k] >= 0  for k in range(num_colors)
+ *    *  total = sum(colors)
+ *    *  0 <= nsample <= total
+ *    *  the product total * sizeof(size_t) does not exceed SIZE_MAX
+ *    *  the product num_variates * num_colors does not overflow
+ */
+
+int random_mvhg_count(bitgen_t *bitgen_state,
+                      int64_t total,
+                      size_t num_colors, int64_t *colors,
+                      int64_t nsample,
+                      size_t num_variates, int64_t *variates)
+{
+    size_t *choices;
+    bool more_than_half;
+
+    if ((total == 0) || (nsample == 0) || (num_variates == 0)) {
+        // Nothing to do.
+        return 0;
+    }
+
+    choices = malloc(total * (sizeof *choices));
+    if (choices == NULL) {
+        return -1;
+    }
+
+    /*
+     *  If colors contains, for example, [3 2 5], then choices
+     *  will contain [0 0 0 1 1 2 2 2 2 2].
+     */
+    for (size_t i = 0, k = 0; i < num_colors; ++i) {
+        for (int64_t j = 0; j < colors[i]; ++j) {
+            choices[k] = i;
+            ++k;
+        }
+    }
+
+    more_than_half = nsample > (total / 2);
+    if (more_than_half) {
+        nsample = total - nsample;
+    }
+
+    for (size_t i = 0; i < num_variates * num_colors; i += num_colors) {
+        /*
+         *  Fisher-Yates shuffle, but only loop through the first
+         *  `nsample` entries of `choices`.  After the loop,
+         *  choices[:nsample] contains a random sample from the
+         *  the full array.
+         */
+        for (size_t j = 0; j < (size_t) nsample; ++j) {
+            size_t tmp, k;
+            // Note: nsample is not greater than total, so there is no danger
+            // of integer underflow in `(size_t) total - j - 1`.
+            k = j + (size_t) random_interval(bitgen_state,
+                                             (size_t) total - j - 1);
+            tmp = choices[k];
+            choices[k] = choices[j];
+            choices[j] = tmp;
+        }
+        /*
+         *  Count the number of occurrences of each value in choices[:nsample].
+         *  The result, stored in sample[i:i+num_colors], is the sample from
+         *  the multivariate hypergeometric distribution.
+         */
+        for (size_t j = 0; j < (size_t) nsample; ++j) {
+            variates[i + choices[j]] += 1;
+        }
+
+        if (more_than_half) {
+            for (size_t k = 0; k < num_colors; ++k) {
+                variates[i + k] = colors[k] - variates[i + k];
+            }
+        }
+    }
+
+    free(choices);
+
+    return 0;
+}
diff --git a/numpy/random/src/distributions/random_mvhg_marginals.c b/numpy/random/src/distributions/random_mvhg_marginals.c
new file mode 100644
index 000000000..301a4acad
--- /dev/null
+++ b/numpy/random/src/distributions/random_mvhg_marginals.c
@@ -0,0 +1,138 @@
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <math.h>
+
+#include "include/distributions.h"
+#include "logfactorial.h"
+
+
+/*
+ *  random_mvhg_marginals
+ *
+ *  Draw samples from the multivariate hypergeometric distribution--
+ *  the "marginals" algorithm.
+ *
+ *  This version generates the sample by iteratively calling
+ *  hypergeometric() (the univariate hypergeometric distribution).
+ *
+ *  Parameters
+ *  ----------
+ *  bitgen_t *bitgen_state
+ *      Pointer to a `bitgen_t` instance.
+ *  int64_t total
+ *      The sum of the values in the array `colors`.  (This is redundant
+ *      information, but we know the caller has already computed it, so
+ *      we might as well use it.)
+ *  size_t num_colors
+ *      The length of the `colors` array.  The functions assumes
+ *      num_colors > 0.
+ *  int64_t *colors
+ *      The array of colors (i.e. the number of each type in the collection
+ *      from which the random variate is drawn).
+ *  int64_t nsample
+ *      The number of objects drawn without replacement for each variate.
+ *      `nsample` must not exceed sum(colors).  This condition is not checked;
+ *      it is assumed that the caller has already validated the value.
+ *  size_t num_variates
+ *      The number of variates to be produced and put in the array
+ *      pointed to by `variates`.  One variate is a vector of length
+ *      `num_colors`, so the array pointed to by `variates` must have length
+ *      `num_variates * num_colors`.
+ *  int64_t *variates
+ *      The array that will hold the result.  It must have length
+ *      `num_variates * num_colors`.
+ *      The array is not initialized in the function; it is expected that the
+ *      array has been initialized with zeros when the function is called.
+ *
+ *  Notes
+ *  -----
+ *  Here's an example that demonstrates the idea of this algorithm.
+ *
+ *  Suppose the urn contains red, green, blue and yellow marbles.
+ *  Let nred be the number of red marbles, and define the quantities for
+ *  the other colors similarly.  The total number of marbles is
+ *
+ *      total = nred + ngreen + nblue + nyellow.
+ *
+ *  To generate a sample using rk_hypergeometric:
+ *
+ *     red_sample = hypergeometric(ngood=nred, nbad=total - nred,
+ *                                 nsample=nsample)
+ *
+ *  This gives us the number of red marbles in the sample.  The number of
+ *  marbles in the sample that are *not* red is nsample - red_sample.
+ *  To figure out the distribution of those marbles, we again use
+ *  rk_hypergeometric:
+ *
+ *      green_sample = hypergeometric(ngood=ngreen,
+ *                                    nbad=total - nred - ngreen,
+ *                                    nsample=nsample - red_sample)
+ *
+ *  Similarly,
+ *
+ *      blue_sample = hypergeometric(
+ *                        ngood=nblue,
+ *                        nbad=total - nred - ngreen - nblue,
+ *                        nsample=nsample - red_sample - green_sample)
+ *
+ *  Finally,
+ *
+ *      yellow_sample = total - (red_sample + green_sample + blue_sample).
+ *
+ *  The above sequence of steps is implemented as a loop for an arbitrary
+ *  number of colors in the innermost loop in the code below.  `remaining`
+ *  is the value passed to `nbad`; it is `total - colors[0]` in the first
+ *  call to random_hypergeometric(), and then decreases by `colors[j]` in
+ *  each iteration.  `num_to_sample` is the `nsample` argument.  It
+ *  starts at this function's `nsample` input, and is decreased by the
+ *  result of the call to random_hypergeometric() in each iteration.
+ *
+ *  Assumptions on the arguments (not checked in the function):
+ *    *  colors[k] >= 0  for k in range(num_colors)
+ *    *  total = sum(colors)
+ *    *  0 <= nsample <= total
+ *    *  the product num_variates * num_colors does not overflow
+ */
+
+void random_mvhg_marginals(bitgen_t *bitgen_state,
+                           int64_t total,
+                           size_t num_colors, int64_t *colors,
+                           int64_t nsample,
+                           size_t num_variates, int64_t *variates)
+{
+    bool more_than_half;
+
+    if ((total == 0) || (nsample == 0) || (num_variates == 0)) {
+        // Nothing to do.
+        return;
+    }
+
+    more_than_half = nsample > (total / 2);
+    if (more_than_half) {
+        nsample = total - nsample;
+    }
+
+    for (size_t i = 0; i < num_variates * num_colors; i += num_colors) {
+        int64_t num_to_sample = nsample;
+        int64_t remaining = total;
+        for (size_t j = 0; (num_to_sample > 0) && (j + 1 < num_colors); ++j) {
+            int64_t r;
+            remaining -= colors[j];
+            r = random_hypergeometric(bitgen_state,
+                                      colors[j], remaining, num_to_sample);
+            variates[i + j] = r;
+            num_to_sample -= r;
+        }
+
+        if (num_to_sample > 0) {
+            variates[i + num_colors - 1] = num_to_sample;
+        }
+
+        if (more_than_half) {
+            for (size_t k = 0; k < num_colors; ++k) {
+                variates[i + k] = colors[k] - variates[i + k];
+            }
+        }
+    }
+}
diff --git a/numpy/random/src/legacy/legacy-distributions.c b/numpy/random/src/legacy/legacy-distributions.c
index 684b3d762..fd067fe8d 100644
--- a/numpy/random/src/legacy/legacy-distributions.c
+++ b/numpy/random/src/legacy/legacy-distributions.c
@@ -1,4 +1,4 @@
-#include "legacy-distributions.h"
+#include "include/legacy-distributions.h"
 
 
 static NPY_INLINE double legacy_double(aug_bitgen_t *aug_state) {
@@ -294,8 +294,8 @@ static RAND_INT_TYPE random_hypergeometric_hrua(bitgen_t *bitgen_state,
   d7 = sqrt((double)(popsize - m) * sample * d4 * d5 / (popsize - 1) + 0.5);
   d8 = D1 * d7 + D2;
   d9 = (RAND_INT_TYPE)floor((double)(m + 1) * (mingoodbad + 1) / (popsize + 2));
-  d10 = (loggam(d9 + 1) + loggam(mingoodbad - d9 + 1) + loggam(m - d9 + 1) +
-         loggam(maxgoodbad - m + d9 + 1));
+  d10 = (random_loggam(d9 + 1) + random_loggam(mingoodbad - d9 + 1) +
+         random_loggam(m - d9 + 1) + random_loggam(maxgoodbad - m + d9 + 1));
   d11 = MIN(MIN(m, mingoodbad) + 1.0, floor(d6 + 16 * d7));
   /* 16 for 16-decimal-digit precision in D1 and D2 */
 
@@ -309,8 +309,8 @@ static RAND_INT_TYPE random_hypergeometric_hrua(bitgen_t *bitgen_state,
       continue;
 
     Z = (RAND_INT_TYPE)floor(W);
-    T = d10 - (loggam(Z + 1) + loggam(mingoodbad - Z + 1) + loggam(m - Z + 1) +
-               loggam(maxgoodbad - m + Z + 1));
+    T = d10 - (random_loggam(Z + 1) + random_loggam(mingoodbad - Z + 1) +
+               random_loggam(m - Z + 1) + random_loggam(maxgoodbad - m + Z + 1));
 
     /* fast acceptance: */
     if ((X * (4.0 - X) - 3.0) <= T)
diff --git a/numpy/random/tests/test_direct.py b/numpy/random/tests/test_direct.py
index 0f57c4bd4..34d7bd278 100644
--- a/numpy/random/tests/test_direct.py
+++ b/numpy/random/tests/test_direct.py
@@ -10,7 +10,7 @@ from numpy.random import (
     Generator, MT19937, PCG64, Philox, RandomState, SeedSequence, SFC64,
     default_rng
 )
-from numpy.random.common import interface
+from numpy.random._common import interface
 
 try:
     import cffi  # noqa: F401
@@ -120,7 +120,7 @@ def gauss_from_uint(x, n, bits):
     return gauss[:n]
 
 def test_seedsequence():
-    from numpy.random.bit_generator import (ISeedSequence,
+    from numpy.random._bit_generator import (ISeedSequence,
                                             ISpawnableSeedSequence,
                                             SeedlessSeedSequence)
 
diff --git a/numpy/random/tests/test_generator_mt19937.py b/numpy/random/tests/test_generator_mt19937.py
index 391c33c1a..d4502d276 100644
--- a/numpy/random/tests/test_generator_mt19937.py
+++ b/numpy/random/tests/test_generator_mt19937.py
@@ -4,7 +4,7 @@ import pytest
 
 import numpy as np
 from numpy.testing import (
-    assert_, assert_raises, assert_equal,
+    assert_, assert_raises, assert_equal, assert_allclose,
     assert_warns, assert_no_warnings, assert_array_equal,
     assert_array_almost_equal, suppress_warnings)
 
@@ -115,6 +115,140 @@ class TestMultinomial(object):
         assert_array_equal(non_contig, contig)
 
 
+class TestMultivariateHypergeometric(object):
+
+    def setup(self):
+        self.seed = 8675309
+
+    def test_argument_validation(self):
+        # Error cases...
+
+        # `colors` must be a 1-d sequence
+        assert_raises(ValueError, random.multivariate_hypergeometric,
+                      10, 4)
+
+        # Negative nsample
+        assert_raises(ValueError, random.multivariate_hypergeometric,
+                      [2, 3, 4], -1)
+
+        # Negative color
+        assert_raises(ValueError, random.multivariate_hypergeometric,
+                      [-1, 2, 3], 2)
+
+        # nsample exceeds sum(colors)
+        assert_raises(ValueError, random.multivariate_hypergeometric,
+                      [2, 3, 4], 10)
+
+        # nsample exceeds sum(colors) (edge case of empty colors)
+        assert_raises(ValueError, random.multivariate_hypergeometric,
+                      [], 1)
+
+        # Validation errors associated with very large values in colors.
+        assert_raises(ValueError, random.multivariate_hypergeometric,
+                      [999999999, 101], 5, 1, 'marginals')
+
+        int64_info = np.iinfo(np.int64)
+        max_int64 = int64_info.max
+        max_int64_index = max_int64 // int64_info.dtype.itemsize
+        assert_raises(ValueError, random.multivariate_hypergeometric,
+                      [max_int64_index - 100, 101], 5, 1, 'count')
+
+    @pytest.mark.parametrize('method', ['count', 'marginals'])
+    def test_edge_cases(self, method):
+        # Set the seed, but in fact, all the results in this test are
+        # deterministic, so we don't really need this.
+        random = Generator(MT19937(self.seed))
+
+        x = random.multivariate_hypergeometric([0, 0, 0], 0, method=method)
+        assert_array_equal(x, [0, 0, 0])
+
+        x = random.multivariate_hypergeometric([], 0, method=method)
+        assert_array_equal(x, [])
+
+        x = random.multivariate_hypergeometric([], 0, size=1, method=method)
+        assert_array_equal(x, np.empty((1, 0), dtype=np.int64))
+
+        x = random.multivariate_hypergeometric([1, 2, 3], 0, method=method)
+        assert_array_equal(x, [0, 0, 0])
+
+        x = random.multivariate_hypergeometric([9, 0, 0], 3, method=method)
+        assert_array_equal(x, [3, 0, 0])
+
+        colors = [1, 1, 0, 1, 1]
+        x = random.multivariate_hypergeometric(colors, sum(colors),
+                                               method=method)
+        assert_array_equal(x, colors)
+
+        x = random.multivariate_hypergeometric([3, 4, 5], 12, size=3,
+                                               method=method)
+        assert_array_equal(x, [[3, 4, 5]]*3)
+
+    # Cases for nsample:
+    #     nsample < 10
+    #     10 <= nsample < colors.sum()/2
+    #     colors.sum()/2 < nsample < colors.sum() - 10
+    #     colors.sum() - 10 < nsample < colors.sum()
+    @pytest.mark.parametrize('nsample', [8, 25, 45, 55])
+    @pytest.mark.parametrize('method', ['count', 'marginals'])
+    @pytest.mark.parametrize('size', [5, (2, 3), 150000])
+    def test_typical_cases(self, nsample, method, size):
+        random = Generator(MT19937(self.seed))
+
+        colors = np.array([10, 5, 20, 25])
+        sample = random.multivariate_hypergeometric(colors, nsample, size,
+                                                    method=method)
+        if isinstance(size, int):
+            expected_shape = (size,) + colors.shape
+        else:
+            expected_shape = size + colors.shape
+        assert_equal(sample.shape, expected_shape)
+        assert_((sample >= 0).all())
+        assert_((sample <= colors).all())
+        assert_array_equal(sample.sum(axis=-1),
+                           np.full(size, fill_value=nsample, dtype=int))
+        if isinstance(size, int) and size >= 100000:
+            # This sample is large enough to compare its mean to
+            # the expected values.
+            assert_allclose(sample.mean(axis=0),
+                            nsample * colors / colors.sum(),
+                            rtol=1e-3, atol=0.005)
+
+    def test_repeatability1(self):
+        random = Generator(MT19937(self.seed))
+        sample = random.multivariate_hypergeometric([3, 4, 5], 5, size=5,
+                                                    method='count')
+        expected = np.array([[2, 1, 2],
+                             [2, 1, 2],
+                             [1, 1, 3],
+                             [2, 0, 3],
+                             [2, 1, 2]])
+        assert_array_equal(sample, expected)
+
+    def test_repeatability2(self):
+        random = Generator(MT19937(self.seed))
+        sample = random.multivariate_hypergeometric([20, 30, 50], 50,
+                                                    size=5,
+                                                    method='marginals')
+        expected = np.array([[ 9, 17, 24],
+                             [ 7, 13, 30],
+                             [ 9, 15, 26],
+                             [ 9, 17, 24],
+                             [12, 14, 24]])
+        assert_array_equal(sample, expected)
+
+    def test_repeatability3(self):
+        random = Generator(MT19937(self.seed))
+        sample = random.multivariate_hypergeometric([20, 30, 50], 12,
+                                                    size=5,
+                                                    method='marginals')
+        expected = np.array([[2, 3, 7],
+                             [5, 3, 4],
+                             [2, 5, 5],
+                             [5, 3, 4],
+                             [1, 5, 6]])
+        assert_array_equal(sample, expected)
+
+
 class TestSetState(object):
     def setup(self):
         self.seed = 1234567890
@@ -329,11 +463,11 @@ class TestIntegers(object):
                'int16':  '39624ead49ad67e37545744024d2648b',
                'int32':  '5c4810373f979336c6c0c999996e47a1',
                'int64':  'ab126c15edff26f55c50d2b7e37391ac',
-               'int8':   'd1746364b48a020dab9ef0568e6c0cd2',
+               'int8':   'ba71ccaffeeeb9eeb1860f8075020b9c',
                'uint16': '39624ead49ad67e37545744024d2648b',
                'uint32': '5c4810373f979336c6c0c999996e47a1',
                'uint64': 'ab126c15edff26f55c50d2b7e37391ac',
-               'uint8':  'd1746364b48a020dab9ef0568e6c0cd2'}
+               'uint8':  'ba71ccaffeeeb9eeb1860f8075020b9c'}
 
         for dt in self.itype[1:]:
             random = Generator(MT19937(1234))
@@ -484,6 +618,24 @@ class TestIntegers(object):
         with pytest.raises(ValueError):
             random.integers(0, 200, size=10, dtype=other_byteord_dt)
 
+    # chi2max is the maximum acceptable chi-squared value.
+    @pytest.mark.slow
+    @pytest.mark.parametrize('sample_size,high,dtype,chi2max',
+        [(5000000, 5, np.int8, 125.0),          # p-value ~4.6e-25
+         (5000000, 7, np.uint8, 150.0),         # p-value ~7.7e-30
+         (10000000, 2500, np.int16, 3300.0),    # p-value ~3.0e-25
+         (50000000, 5000, np.uint16, 6500.0),   # p-value ~3.5e-25
+        ])
+    def test_integers_small_dtype_chisquared(self, sample_size, high,
+                                             dtype, chi2max):
+        # Regression test for gh-14774.
+        samples = random.integers(high, size=sample_size, dtype=dtype)
+
+        values, counts = np.unique(samples, return_counts=True)
+        expected = sample_size / high
+        chi2 = ((counts - expected)**2 / expected).sum()
+        assert chi2 < chi2max
+
 
 class TestRandomDist(object):
     # Make sure the random distribution returns the correct value for a
diff --git a/numpy/random/tests/test_randomstate.py b/numpy/random/tests/test_randomstate.py
index a0edc5c23..5131f1839 100644
--- a/numpy/random/tests/test_randomstate.py
+++ b/numpy/random/tests/test_randomstate.py
@@ -11,7 +11,8 @@ from numpy.testing import (
         suppress_warnings
         )
 
-from numpy.random import MT19937, PCG64, mtrand as random
+from numpy.random import MT19937, PCG64
+from numpy import random
 
 INT_FUNCS = {'binomial': (100.0, 0.6),
              'geometric': (.5,),
diff --git a/numpy/random/tests/test_randomstate_regression.py b/numpy/random/tests/test_randomstate_regression.py
index edf32ea97..bdc2214b6 100644
--- a/numpy/random/tests/test_randomstate_regression.py
+++ b/numpy/random/tests/test_randomstate_regression.py
@@ -8,7 +8,7 @@ from numpy.testing import (
 from numpy.compat import long
 import numpy as np
 
-from numpy.random import mtrand as random
+from numpy import random
 
 
 class TestRegression(object):
diff --git a/numpy/random/tests/test_seed_sequence.py b/numpy/random/tests/test_seed_sequence.py
index 8d6d604a2..fe23680ed 100644
--- a/numpy/random/tests/test_seed_sequence.py
+++ b/numpy/random/tests/test_seed_sequence.py
@@ -1,7 +1,7 @@
 import numpy as np
 from numpy.testing import assert_array_equal
 
-from numpy.random.bit_generator import SeedSequence
+from numpy.random import SeedSequence
 
 
 def test_reference_data():
diff --git a/numpy/tests/test_public_api.py b/numpy/tests/test_public_api.py
index e3621c0fd..c71d03432 100644
--- a/numpy/tests/test_public_api.py
+++ b/numpy/tests/test_public_api.py
@@ -298,15 +298,7 @@ PRIVATE_BUT_PRESENT_MODULES = ['numpy.' + s for s in [
     "ma.timer_comparison",
     "matrixlib",
     "matrixlib.defmatrix",
-    "random.bit_generator",
-    "random.bounded_integers",
-    "random.common",
-    "random.generator",
-    "random.mt19937",
     "random.mtrand",
-    "random.pcg64",
-    "random.philox",
-    "random.sfc64",
     "testing.print_coercion_tables",
     "testing.utils",
 ]]
diff --git a/setup.py b/setup.py
index 068f0f405..46c95d6e6 100755
--- a/setup.py
+++ b/setup.py
@@ -44,6 +44,7 @@ Programming Language :: Python :: 3
 Programming Language :: Python :: 3.5
 Programming Language :: Python :: 3.6
 Programming Language :: Python :: 3.7
+Programming Language :: Python :: 3.8
 Programming Language :: Python :: 3 :: Only
 Programming Language :: Python :: Implementation :: CPython
 Topic :: Software Development
diff --git a/tools/travis-test.sh b/tools/travis-test.sh
index 6094f0ee6..e04a33143 100755
--- a/tools/travis-test.sh
+++ b/tools/travis-test.sh
@@ -36,8 +36,6 @@ setup_base()
   sysflags="$($PYTHON -c "from distutils import sysconfig; \
     print (sysconfig.get_config_var('CFLAGS'))")"
   export CFLAGS="$sysflags $werrors -Wlogical-op -Wno-sign-compare"
-  # use c99
-  export CFLAGS=$CFLAGS" -std=c99"
   # We used to use 'setup.py install' here, but that has the terrible
   # behaviour that if a copy of the package is already installed in the
   # install location, then the new copy just gets dropped on top of it.
@@ -141,8 +139,6 @@ if [ -n "$USE_WHEEL" ] && [ $# -eq 0 ]; then
   $PIP install -U virtualenv
   # ensure some warnings are not issued
   export CFLAGS=$CFLAGS" -Wno-sign-compare -Wno-unused-result"
-  # use c99
-  export CFLAGS=$CFLAGS" -std=c99"
   # adjust gcc flags if C coverage requested
   if [ -n "$RUN_COVERAGE" ]; then
      export NPY_DISTUTILS_APPEND_FLAGS=1
@@ -169,8 +165,6 @@ elif [ -n "$USE_SDIST" ] && [ $# -eq 0 ]; then
   $PYTHON -c "import fcntl; fcntl.fcntl(1, fcntl.F_SETFL, 0)"
   # ensure some warnings are not issued
   export CFLAGS=$CFLAGS" -Wno-sign-compare -Wno-unused-result"
-  # use c99
-  export CFLAGS=$CFLAGS" -std=c99"
   $PYTHON setup.py sdist
   # Make another virtualenv to install into
   virtualenv --python=`which $PYTHON` venv-for-wheel